diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/mongo/db/query/canonical_query_encoder.cpp | 29 | ||||
-rw-r--r-- | src/mongo/db/query/collation/SConscript | 3 | ||||
-rw-r--r-- | src/mongo/db/query/collation/collation_spec.cpp | 110 | ||||
-rw-r--r-- | src/mongo/db/query/collation/collation_spec.h | 169 | ||||
-rw-r--r-- | src/mongo/db/query/collation/collation_spec_test.cpp | 360 | ||||
-rw-r--r-- | src/mongo/db/query/collation/collator_factory_icu.cpp | 443 | ||||
-rw-r--r-- | src/mongo/db/query/collation/collator_factory_icu_test.cpp | 128 | ||||
-rw-r--r-- | src/mongo/db/query/collation/collator_interface.h | 8 | ||||
-rw-r--r-- | src/mongo/db/query/collation/collator_interface_icu.cpp | 3 | ||||
-rw-r--r-- | src/mongo/db/query/collation/collator_interface_icu.h | 2 | ||||
-rw-r--r-- | src/mongo/db/query/collation/collator_interface_icu_test.cpp | 90 | ||||
-rw-r--r-- | src/mongo/db/query/collation/collator_interface_mock.cpp | 10 | ||||
-rw-r--r-- | src/mongo/db/query/collation/collator_interface_mock_test.cpp | 6 | ||||
-rw-r--r-- | src/mongo/idl/basic_types.idl | 46 |
14 files changed, 1084 insertions, 323 deletions
diff --git a/src/mongo/db/query/canonical_query_encoder.cpp b/src/mongo/db/query/canonical_query_encoder.cpp index f5c610004d0..fb73aa3e5c2 100644 --- a/src/mongo/db/query/canonical_query_encoder.cpp +++ b/src/mongo/db/query/canonical_query_encoder.cpp @@ -343,8 +343,11 @@ void encodeGeoNearMatchExpression(const GeoNearMatchExpression* tree, StringBuil template <class T> char encodeEnum(T val) { - // Ensure val can be encoded as a digit between '0' and '9' inclusive. - invariant(static_cast<int>(val) < 10); + static_assert(static_cast<int>(T::kMax) <= 9, + "enum has too many values to encode as a value between '0' and '9'. You must " + "change the encoding scheme"); + invariant(val <= T::kMax); + return static_cast<char>(val) + '0'; } @@ -353,21 +356,21 @@ void encodeCollation(const CollatorInterface* collation, StringBuilder* keyBuild return; } - const Collation& spec = collation->getSpec(); + const CollationSpec& spec = collation->getSpec(); *keyBuilder << kEncodeCollationSection; - *keyBuilder << spec.getLocale(); - *keyBuilder << spec.getCaseLevel(); + *keyBuilder << spec.localeID; + *keyBuilder << spec.caseLevel; // Ensure that we can encode this value with a single ascii byte '0' through '9'. - *keyBuilder << encodeEnum(spec.getCaseFirst()); - *keyBuilder << encodeEnum(spec.getStrength()); - *keyBuilder << spec.getNumericOrdering(); - - *keyBuilder << encodeEnum(spec.getAlternate()); - *keyBuilder << encodeEnum(spec.getMaxVariable()); - *keyBuilder << spec.getNormalization(); - *keyBuilder << spec.getBackwards().value_or(false); + *keyBuilder << encodeEnum(spec.caseFirst); + *keyBuilder << encodeEnum(spec.strength); + *keyBuilder << spec.numericOrdering; + + *keyBuilder << encodeEnum(spec.alternate); + *keyBuilder << encodeEnum(spec.maxVariable); + *keyBuilder << spec.normalization; + *keyBuilder << spec.backwards; // We do not encode 'spec.version' because query shape strings are never persisted, and need // not be stable between versions. diff --git a/src/mongo/db/query/collation/SConscript b/src/mongo/db/query/collation/SConscript index 653878dc5be..4304071e676 100644 --- a/src/mongo/db/query/collation/SConscript +++ b/src/mongo/db/query/collation/SConscript @@ -9,11 +9,11 @@ env.Library( target="collator_interface", source=[ "collation_index_key.cpp", + "collation_spec.cpp", "collator_interface.cpp", ], LIBDEPS=[ "$BUILD_DIR/mongo/base", - "$BUILD_DIR/mongo/idl/basic_types", ], ) @@ -101,6 +101,7 @@ icuEnv.CppUnitTest( source=[ "collation_bson_comparison_test.cpp", "collation_index_key_test.cpp", + "collation_spec_test.cpp", "collator_factory_icu_locales_test.cpp", "collator_factory_icu_test.cpp", "collator_factory_mock_test.cpp", diff --git a/src/mongo/db/query/collation/collation_spec.cpp b/src/mongo/db/query/collation/collation_spec.cpp new file mode 100644 index 00000000000..ce0a99db9c9 --- /dev/null +++ b/src/mongo/db/query/collation/collation_spec.cpp @@ -0,0 +1,110 @@ +/** + * Copyright (C) 2018-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/query/collation/collation_spec.h" + +#include "mongo/bson/bsonobjbuilder.h" +#include "mongo/util/assert_util.h" + +namespace mongo { + +const char* CollationSpec::kLocaleField = "locale"; +const char* CollationSpec::kCaseLevelField = "caseLevel"; +const char* CollationSpec::kCaseFirstField = "caseFirst"; +const char* CollationSpec::kStrengthField = "strength"; +const char* CollationSpec::kNumericOrderingField = "numericOrdering"; +const char* CollationSpec::kAlternateField = "alternate"; +const char* CollationSpec::kMaxVariableField = "maxVariable"; +const char* CollationSpec::kNormalizationField = "normalization"; +const char* CollationSpec::kBackwardsField = "backwards"; +const char* CollationSpec::kVersionField = "version"; +const char* CollationSpec::kSimpleBinaryComparison = "simple"; +const char* CollationSpec::kCaseFirstUpper = "upper"; +const char* CollationSpec::kCaseFirstLower = "lower"; +const char* CollationSpec::kCaseFirstOff = "off"; +const char* CollationSpec::kAlternateNonIgnorable = "non-ignorable"; +const char* CollationSpec::kAlternateShifted = "shifted"; +const char* CollationSpec::kMaxVariablePunct = "punct"; +const char* CollationSpec::kMaxVariableSpace = "space"; +const BSONObj CollationSpec::kSimpleSpec = + BSON(CollationSpec::kLocaleField << CollationSpec::kSimpleBinaryComparison); + +BSONObj CollationSpec::toBSON() const { + BSONObjBuilder builder; + builder.append(CollationSpec::kLocaleField, localeID); + builder.append(CollationSpec::kCaseLevelField, caseLevel); + + switch (caseFirst) { + case CollationSpec::CaseFirstType::kUpper: + builder.append(CollationSpec::kCaseFirstField, CollationSpec::kCaseFirstUpper); + break; + case CollationSpec::CaseFirstType::kLower: + builder.append(CollationSpec::kCaseFirstField, CollationSpec::kCaseFirstLower); + break; + case CollationSpec::CaseFirstType::kOff: + builder.append(CollationSpec::kCaseFirstField, CollationSpec::kCaseFirstOff); + break; + default: + MONGO_UNREACHABLE; + } + + builder.append(CollationSpec::kStrengthField, static_cast<int>(strength)); + builder.append(CollationSpec::kNumericOrderingField, numericOrdering); + + switch (alternate) { + case CollationSpec::AlternateType::kNonIgnorable: + builder.append(CollationSpec::kAlternateField, CollationSpec::kAlternateNonIgnorable); + break; + case CollationSpec::AlternateType::kShifted: + builder.append(CollationSpec::kAlternateField, CollationSpec::kAlternateShifted); + break; + default: + MONGO_UNREACHABLE; + } + + switch (maxVariable) { + case CollationSpec::MaxVariableType::kPunct: + builder.append(CollationSpec::kMaxVariableField, CollationSpec::kMaxVariablePunct); + break; + case CollationSpec::MaxVariableType::kSpace: + builder.append(CollationSpec::kMaxVariableField, CollationSpec::kMaxVariableSpace); + break; + default: + MONGO_UNREACHABLE; + } + + builder.append(CollationSpec::kNormalizationField, normalization); + builder.append(CollationSpec::kBackwardsField, backwards); + builder.append(CollationSpec::kVersionField, version); + return builder.obj(); +} + +} // namespace mongo diff --git a/src/mongo/db/query/collation/collation_spec.h b/src/mongo/db/query/collation/collation_spec.h index f8369c2bd23..9edc22d729d 100644 --- a/src/mongo/db/query/collation/collation_spec.h +++ b/src/mongo/db/query/collation/collation_spec.h @@ -32,14 +32,169 @@ #include <string> #include "mongo/bson/bsonobj.h" -#include "mongo/idl/basic_types_gen.h" -namespace mongo::CollationSpec { +namespace mongo { -constexpr const char* kSimpleBinaryComparison = "simple"; +// TODO (SERVER-52538): Factor with IDL-generated Collation struct. +/** + * A CollationSpec is a parsed representation of a user-provided collation BSONObj. + */ +struct CollationSpec { + // Controls whether uppercase sorts before lowercase or vice versa. + enum class CaseFirstType { + // Sort uppercase before lowercase. + kUpper, + + // Sort lowercase before uppercase. + kLower, + + // Use default sorting behavior for the strength. + kOff, + + // Update this if you add another value. + kMax = kOff, + }; + + // Controls the set of characteristics used to compare strings. + enum class StrengthType { + // Only consider base character differences. + kPrimary = 1, + + // Additionally consider accent differences. + kSecondary = 2, + + // Additionally consider case differences. + kTertiary = 3, + + // Additionally consider punctuation and space differences. (If alternate=shifted, spaces + // and punctuation are not considered base characters, and are only considered at this + // strength.) + kQuaternary = 4, + + // Equal Unicode point values. + // E.g. Hebrew cantillation marks are only distinguished at this level. + kIdentical = 5, + + // Update this if you add another value. + kMax = kIdentical, + }; + + // Controls whether spaces and punctuation are considered base characters. + enum class AlternateType { + // Spaces and punctuation are considered base characters. + kNonIgnorable, + + // Spaces and punctuation are not considered base characters, and are only distinguished at + // strength > 3. + kShifted, + + // Update this if you add another value. + kMax = kShifted, + }; + + // Controls which characters are affected by alternate=shifted. + enum class MaxVariableType { + // Punctuation and spaces are affected. + kPunct, + + // Only spaces are affected + kSpace, + + // Update this if you add another value. + kMax = kSpace, + }; + + + // Field name constants. + static const char* kLocaleField; + static const char* kCaseLevelField; + static const char* kCaseFirstField; + static const char* kStrengthField; + static const char* kNumericOrderingField; + static const char* kAlternateField; + static const char* kMaxVariableField; + static const char* kNormalizationField; + static const char* kBackwardsField; + static const char* kVersionField; + + // Field value constants. + static const char* kSimpleBinaryComparison; + static const char* kCaseFirstUpper; + static const char* kCaseFirstLower; + static const char* kCaseFirstOff; + static const char* kAlternateNonIgnorable; + static const char* kAlternateShifted; + static const char* kMaxVariablePunct; + static const char* kMaxVariableSpace; + + // Collation spec which the user can supply to represent the "simple" locale. + static const BSONObj kSimpleSpec; + + /** + * Constructs a CollationSpec with no locale, where all other fields have their default values. + */ + CollationSpec() = default; + + /** + * Constructs a CollationSpec for the given locale, where all other fields have their default + * values. + */ + CollationSpec(std::string locale, std::string version) + : localeID(std::move(locale)), version(std::move(version)) {} + + /** + * Serializes this CollationSpec to its BSON format. + */ + BSONObj toBSON() const; + + // A string such as "en_US", identifying the language, country, or other attributes of the + // locale for this collation. + // Required. + std::string localeID; + + // Turns case sensitivity on at strength 1 or 2. + bool caseLevel = false; + + CaseFirstType caseFirst = CaseFirstType::kOff; + + StrengthType strength = StrengthType::kTertiary; + + // Order numbers based on numerical order and not lexicographic order. + bool numericOrdering = false; + + AlternateType alternate = AlternateType::kNonIgnorable; + + MaxVariableType maxVariable = MaxVariableType::kPunct; + + // Any language that uses multiple combining characters such as Arabic, ancient Greek, Hebrew, + // Hindi, Thai or Vietnamese either requires Normalization Checking to be on, or the text to go + // through a normalization process before collation. + bool normalization = false; + + // Causes accent differences to be considered in reverse order, as it is done in the French + // language. + bool backwards = false; + + // Indicates the version of the collator. It is used to ensure that we do not mix versions by, + // for example, constructing an index with one version of ICU and then attempting to use this + // index with a server that is built against a newer ICU version. + std::string version; +}; + +/** + * Returns whether 'left' and 'right' are logically equivalent collations. + */ +inline bool operator==(const CollationSpec& left, const CollationSpec& right) { + return ((left.localeID == right.localeID) && (left.caseLevel == right.caseLevel) && + (left.caseFirst == right.caseFirst) && (left.strength == right.strength) && + (left.numericOrdering == right.numericOrdering) && + (left.alternate == right.alternate) && (left.maxVariable == right.maxVariable) && + (left.normalization == right.normalization) && (left.backwards == right.backwards) && + (left.version == right.version)); +} -// Collation spec which the user can supply to represent the "simple" locale. -const static BSONObj kSimpleSpec = - BSON(Collation::kLocaleFieldName << CollationSpec::kSimpleBinaryComparison); +inline bool operator!=(const CollationSpec& left, const CollationSpec& right) { + return !(left == right); +} -} // namespace mongo::CollationSpec +} // namespace mongo diff --git a/src/mongo/db/query/collation/collation_spec_test.cpp b/src/mongo/db/query/collation/collation_spec_test.cpp new file mode 100644 index 00000000000..c255476292e --- /dev/null +++ b/src/mongo/db/query/collation/collation_spec_test.cpp @@ -0,0 +1,360 @@ +/** + * Copyright (C) 2018-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/query/collation/collation_spec.h" + +#include "mongo/bson/bsonobj.h" +#include "mongo/bson/bsonobjbuilder.h" +#include "mongo/unittest/unittest.h" + +namespace { + +using namespace mongo; + +TEST(CollationSpecTest, SpecsWithNonEqualLocaleStringsAreNotEqual) { + CollationSpec collationSpec1; + collationSpec1.localeID = "fr"; + + CollationSpec collationSpec2; + collationSpec2.localeID = "de"; + + ASSERT_FALSE(collationSpec1 == collationSpec2); + ASSERT_TRUE(collationSpec1 != collationSpec2); +} + +TEST(CollationSpecTest, SpecsWithNonEqualCaseLevelValuesAreNotEqual) { + CollationSpec collationSpec1; + collationSpec1.localeID = "fr"; + collationSpec1.caseLevel = true; + + CollationSpec collationSpec2; + collationSpec2.localeID = "fr"; + collationSpec2.caseLevel = false; + + ASSERT_FALSE(collationSpec1 == collationSpec2); + ASSERT_TRUE(collationSpec1 != collationSpec2); +} + +TEST(CollationSpecTest, SpecsWithNonEqualCaseFirstValuesAreNotEqual) { + CollationSpec collationSpec1; + collationSpec1.localeID = "fr"; + collationSpec1.caseFirst = CollationSpec::CaseFirstType::kUpper; + + CollationSpec collationSpec2; + collationSpec2.localeID = "fr"; + collationSpec2.caseFirst = CollationSpec::CaseFirstType::kOff; + + ASSERT_FALSE(collationSpec1 == collationSpec2); + ASSERT_TRUE(collationSpec1 != collationSpec2); +} + +TEST(CollationSpecTest, SpecsWithNonEqualStrengthsAreNotEqual) { + CollationSpec collationSpec1; + collationSpec1.localeID = "fr"; + collationSpec1.strength = CollationSpec::StrengthType::kPrimary; + + CollationSpec collationSpec2; + collationSpec2.localeID = "fr"; + collationSpec2.strength = CollationSpec::StrengthType::kSecondary; + + ASSERT_FALSE(collationSpec1 == collationSpec2); + ASSERT_TRUE(collationSpec1 != collationSpec2); +} + +TEST(CollationSpecTest, SpecsWithNonEqualNumericOrderingValuesAreNotEqual) { + CollationSpec collationSpec1; + collationSpec1.localeID = "fr"; + collationSpec1.numericOrdering = false; + + CollationSpec collationSpec2; + collationSpec2.localeID = "fr"; + collationSpec2.numericOrdering = true; + + ASSERT_FALSE(collationSpec1 == collationSpec2); + ASSERT_TRUE(collationSpec1 != collationSpec2); +} + +TEST(CollationSpecTest, SpecsWithNonEqualAlternateValuesAreNotEqual) { + CollationSpec collationSpec1; + collationSpec1.localeID = "fr"; + collationSpec1.alternate = CollationSpec::AlternateType::kNonIgnorable; + + CollationSpec collationSpec2; + collationSpec2.localeID = "fr"; + collationSpec2.alternate = CollationSpec::AlternateType::kShifted; + + ASSERT_FALSE(collationSpec1 == collationSpec2); + ASSERT_TRUE(collationSpec1 != collationSpec2); +} + +TEST(CollationSpecTest, SpecsWithNonEqualMaxVariableValuesAreNotEqual) { + CollationSpec collationSpec1; + collationSpec1.localeID = "fr"; + collationSpec1.maxVariable = CollationSpec::MaxVariableType::kPunct; + + CollationSpec collationSpec2; + collationSpec2.localeID = "fr"; + collationSpec2.maxVariable = CollationSpec::MaxVariableType::kSpace; + + ASSERT_FALSE(collationSpec1 == collationSpec2); + ASSERT_TRUE(collationSpec1 != collationSpec2); +} + +TEST(CollationSpecTest, SpecsWithNonEqualNormalizationValuesAreNotEqual) { + CollationSpec collationSpec1; + collationSpec1.localeID = "fr"; + collationSpec1.normalization = false; + + CollationSpec collationSpec2; + collationSpec2.localeID = "fr"; + collationSpec2.normalization = true; + + ASSERT_FALSE(collationSpec1 == collationSpec2); + ASSERT_TRUE(collationSpec1 != collationSpec2); +} + +TEST(CollationSpecTest, SpecsWithNonEqualBackwardsValuesAreNotEqual) { + CollationSpec collationSpec1; + collationSpec1.localeID = "fr"; + collationSpec1.backwards = false; + + CollationSpec collationSpec2; + collationSpec2.localeID = "fr"; + collationSpec2.backwards = true; + + ASSERT_FALSE(collationSpec1 == collationSpec2); + ASSERT_TRUE(collationSpec1 != collationSpec2); +} + +TEST(CollationSpecTest, SpecsWithNonEqualVersionValuesAreNotEqual) { + CollationSpec collationSpec1; + collationSpec1.localeID = "fr"; + collationSpec1.version = "version1"; + + CollationSpec collationSpec2; + collationSpec2.localeID = "fr"; + collationSpec2.version = "version2"; + + ASSERT_FALSE(collationSpec1 == collationSpec2); + ASSERT_TRUE(collationSpec1 != collationSpec2); +} + +TEST(CollationSpecTest, EqualSpecs) { + CollationSpec collationSpec1; + collationSpec1.localeID = "fr"; + + CollationSpec collationSpec2; + collationSpec2.localeID = "fr"; + + ASSERT_TRUE(collationSpec1 == collationSpec2); + ASSERT_FALSE(collationSpec1 != collationSpec2); +} + +TEST(CollationSpecTest, ToBSONCorrectlySerializesDefaults) { + CollationSpec collationSpec; + collationSpec.localeID = "myLocale"; + collationSpec.version = "myVersion"; + + BSONObj expectedObj = BSON("locale" + << "myLocale" + << "caseLevel" << false << "caseFirst" + << "off" + << "strength" << 3 << "numericOrdering" << false << "alternate" + << "non-ignorable" + << "maxVariable" + << "punct" + << "normalization" << false << "backwards" << false << "version" + << "myVersion"); + + ASSERT_BSONOBJ_EQ(expectedObj, collationSpec.toBSON()); +} + +TEST(CollationSpecTest, ToBSONCorrectlySerializesCaseFirstUpper) { + CollationSpec collationSpec; + collationSpec.localeID = "myLocale"; + collationSpec.version = "myVersion"; + collationSpec.caseFirst = CollationSpec::CaseFirstType::kUpper; + + BSONObj expectedObj = BSON("locale" + << "myLocale" + << "caseLevel" << false << "caseFirst" + << "upper" + << "strength" << 3 << "numericOrdering" << false << "alternate" + << "non-ignorable" + << "maxVariable" + << "punct" + << "normalization" << false << "backwards" << false << "version" + << "myVersion"); + + ASSERT_BSONOBJ_EQ(expectedObj, collationSpec.toBSON()); +} + +TEST(CollationSpecTest, ToBSONCorrectlySerializesCaseFirstLower) { + CollationSpec collationSpec; + collationSpec.localeID = "myLocale"; + collationSpec.version = "myVersion"; + collationSpec.caseFirst = CollationSpec::CaseFirstType::kLower; + + BSONObj expectedObj = BSON("locale" + << "myLocale" + << "caseLevel" << false << "caseFirst" + << "lower" + << "strength" << 3 << "numericOrdering" << false << "alternate" + << "non-ignorable" + << "maxVariable" + << "punct" + << "normalization" << false << "backwards" << false << "version" + << "myVersion"); + + ASSERT_BSONOBJ_EQ(expectedObj, collationSpec.toBSON()); +} + +TEST(CollationSpecTest, ToBSONCorrectlySerializesPrimaryStrength) { + CollationSpec collationSpec; + collationSpec.localeID = "myLocale"; + collationSpec.version = "myVersion"; + collationSpec.strength = CollationSpec::StrengthType::kPrimary; + + BSONObj expectedObj = BSON("locale" + << "myLocale" + << "caseLevel" << false << "caseFirst" + << "off" + << "strength" << 1 << "numericOrdering" << false << "alternate" + << "non-ignorable" + << "maxVariable" + << "punct" + << "normalization" << false << "backwards" << false << "version" + << "myVersion"); + + ASSERT_BSONOBJ_EQ(expectedObj, collationSpec.toBSON()); +} + +TEST(CollationSpecTest, ToBSONCorrectlySerializesSecondaryStrength) { + CollationSpec collationSpec; + collationSpec.localeID = "myLocale"; + collationSpec.version = "myVersion"; + collationSpec.strength = CollationSpec::StrengthType::kSecondary; + + BSONObj expectedObj = BSON("locale" + << "myLocale" + << "caseLevel" << false << "caseFirst" + << "off" + << "strength" << 2 << "numericOrdering" << false << "alternate" + << "non-ignorable" + << "maxVariable" + << "punct" + << "normalization" << false << "backwards" << false << "version" + << "myVersion"); + + ASSERT_BSONOBJ_EQ(expectedObj, collationSpec.toBSON()); +} + +TEST(CollationSpecTest, ToBSONCorrectlySerializesQuaternaryStrength) { + CollationSpec collationSpec; + collationSpec.localeID = "myLocale"; + collationSpec.version = "myVersion"; + collationSpec.strength = CollationSpec::StrengthType::kQuaternary; + + BSONObj expectedObj = BSON("locale" + << "myLocale" + << "caseLevel" << false << "caseFirst" + << "off" + << "strength" << 4 << "numericOrdering" << false << "alternate" + << "non-ignorable" + << "maxVariable" + << "punct" + << "normalization" << false << "backwards" << false << "version" + << "myVersion"); + + ASSERT_BSONOBJ_EQ(expectedObj, collationSpec.toBSON()); +} + +TEST(CollationSpecTest, ToBSONCorrectlySerializesIdenticalStrength) { + CollationSpec collationSpec; + collationSpec.localeID = "myLocale"; + collationSpec.version = "myVersion"; + collationSpec.strength = CollationSpec::StrengthType::kIdentical; + + BSONObj expectedObj = BSON("locale" + << "myLocale" + << "caseLevel" << false << "caseFirst" + << "off" + << "strength" << 5 << "numericOrdering" << false << "alternate" + << "non-ignorable" + << "maxVariable" + << "punct" + << "normalization" << false << "backwards" << false << "version" + << "myVersion"); + + ASSERT_BSONOBJ_EQ(expectedObj, collationSpec.toBSON()); +} + +TEST(CollationSpecTest, ToBSONCorrectlySerializesAlternateShifted) { + CollationSpec collationSpec; + collationSpec.localeID = "myLocale"; + collationSpec.version = "myVersion"; + collationSpec.alternate = CollationSpec::AlternateType::kShifted; + + BSONObj expectedObj = BSON("locale" + << "myLocale" + << "caseLevel" << false << "caseFirst" + << "off" + << "strength" << 3 << "numericOrdering" << false << "alternate" + << "shifted" + << "maxVariable" + << "punct" + << "normalization" << false << "backwards" << false << "version" + << "myVersion"); + + ASSERT_BSONOBJ_EQ(expectedObj, collationSpec.toBSON()); +} + +TEST(CollationSpecTest, ToBSONCorrectlySerializesMaxVariableSpace) { + CollationSpec collationSpec; + collationSpec.localeID = "myLocale"; + collationSpec.version = "myVersion"; + collationSpec.maxVariable = CollationSpec::MaxVariableType::kSpace; + + BSONObj expectedObj = BSON("locale" + << "myLocale" + << "caseLevel" << false << "caseFirst" + << "off" + << "strength" << 3 << "numericOrdering" << false << "alternate" + << "non-ignorable" + << "maxVariable" + << "space" + << "normalization" << false << "backwards" << false << "version" + << "myVersion"); + + ASSERT_BSONOBJ_EQ(expectedObj, collationSpec.toBSON()); +} + +} // namespace diff --git a/src/mongo/db/query/collation/collator_factory_icu.cpp b/src/mongo/db/query/collation/collator_factory_icu.cpp index b2d90d1b3d0..cb552780c91 100644 --- a/src/mongo/db/query/collation/collator_factory_icu.cpp +++ b/src/mongo/db/query/collation/collator_factory_icu.cpp @@ -49,7 +49,7 @@ namespace { constexpr StringData kFallbackLocaleName = "root"_sd; -// Helper methods for converting between ICU attributes and types used by Collation. +// Helper methods for converting between ICU attributes and types used by CollationSpec. UColAttributeValue boolToAttribute(bool value) { if (value) { @@ -69,218 +69,300 @@ bool attributeToBool(UColAttributeValue attribute) { } } -UColAttributeValue getCaseFirstAttribute(CollationCaseFirstEnum caseFirst) { +UColAttributeValue getCaseFirstAttribute(CollationSpec::CaseFirstType caseFirst) { switch (caseFirst) { - case CollationCaseFirstEnum::kUpper: + case CollationSpec::CaseFirstType::kUpper: return UCOL_UPPER_FIRST; - case CollationCaseFirstEnum::kLower: + case CollationSpec::CaseFirstType::kLower: return UCOL_LOWER_FIRST; - case CollationCaseFirstEnum::kOff: + case CollationSpec::CaseFirstType::kOff: return UCOL_OFF; - default: - MONGO_UNREACHABLE; } MONGO_UNREACHABLE; } -CollationCaseFirstEnum getCaseFirstFromAttribute(UColAttributeValue caseFirstAttribute) { +CollationSpec::CaseFirstType getCaseFirstFromAttribute(UColAttributeValue caseFirstAttribute) { switch (caseFirstAttribute) { case UCOL_UPPER_FIRST: - return CollationCaseFirstEnum::kUpper; + return CollationSpec::CaseFirstType::kUpper; case UCOL_LOWER_FIRST: - return CollationCaseFirstEnum::kLower; + return CollationSpec::CaseFirstType::kLower; case UCOL_OFF: - return CollationCaseFirstEnum::kOff; + return CollationSpec::CaseFirstType::kOff; default: MONGO_UNREACHABLE; } } -UColAttributeValue getStrengthAttribute(int strength) { - switch (static_cast<CollationStrength>(strength)) { - case CollationStrength::kPrimary: +UColAttributeValue getStrengthAttribute(CollationSpec::StrengthType strength) { + switch (strength) { + case CollationSpec::StrengthType::kPrimary: return UCOL_PRIMARY; - case CollationStrength::kSecondary: + case CollationSpec::StrengthType::kSecondary: return UCOL_SECONDARY; - case CollationStrength::kTertiary: + case CollationSpec::StrengthType::kTertiary: return UCOL_TERTIARY; - case CollationStrength::kQuaternary: + case CollationSpec::StrengthType::kQuaternary: return UCOL_QUATERNARY; - case CollationStrength::kIdentical: + case CollationSpec::StrengthType::kIdentical: return UCOL_IDENTICAL; - default: - MONGO_UNREACHABLE; } MONGO_UNREACHABLE; } -int getStrengthFromAttribute(UColAttributeValue strengthAttribute) { +CollationSpec::StrengthType getStrengthFromAttribute(UColAttributeValue strengthAttribute) { switch (strengthAttribute) { case UCOL_PRIMARY: - return static_cast<int>(CollationStrength::kPrimary); + return CollationSpec::StrengthType::kPrimary; case UCOL_SECONDARY: - return static_cast<int>(CollationStrength::kSecondary); + return CollationSpec::StrengthType::kSecondary; case UCOL_TERTIARY: - return static_cast<int>(CollationStrength::kTertiary); + return CollationSpec::StrengthType::kTertiary; case UCOL_QUATERNARY: - return static_cast<int>(CollationStrength::kQuaternary); + return CollationSpec::StrengthType::kQuaternary; case UCOL_IDENTICAL: - return static_cast<int>(CollationStrength::kIdentical); + return CollationSpec::StrengthType::kIdentical; default: MONGO_UNREACHABLE; } } -UColAttributeValue getAlternateAttribute(CollationAlternateEnum alternate) { +UColAttributeValue getAlternateAttribute(CollationSpec::AlternateType alternate) { switch (alternate) { - case CollationAlternateEnum::kNonIgnorable: + case CollationSpec::AlternateType::kNonIgnorable: return UCOL_NON_IGNORABLE; - case CollationAlternateEnum::kShifted: + case CollationSpec::AlternateType::kShifted: return UCOL_SHIFTED; - default: - MONGO_UNREACHABLE; } MONGO_UNREACHABLE; } -CollationAlternateEnum getAlternateFromAttribute(UColAttributeValue alternateAttribute) { +CollationSpec::AlternateType getAlternateFromAttribute(UColAttributeValue alternateAttribute) { switch (alternateAttribute) { case UCOL_NON_IGNORABLE: - return CollationAlternateEnum::kNonIgnorable; + return CollationSpec::AlternateType::kNonIgnorable; case UCOL_SHIFTED: - return CollationAlternateEnum::kShifted; + return CollationSpec::AlternateType::kShifted; default: MONGO_UNREACHABLE; } } -UColReorderCode getMaxVariableReorderCode(CollationMaxVariableEnum maxVariable) { +UColReorderCode getMaxVariableReorderCode(CollationSpec::MaxVariableType maxVariable) { switch (maxVariable) { - case CollationMaxVariableEnum::kPunct: + case CollationSpec::MaxVariableType::kPunct: return UCOL_REORDER_CODE_PUNCTUATION; - case CollationMaxVariableEnum::kSpace: + case CollationSpec::MaxVariableType::kSpace: return UCOL_REORDER_CODE_SPACE; - default: - MONGO_UNREACHABLE; } MONGO_UNREACHABLE; } -CollationMaxVariableEnum getMaxVariableFromReorderCode(UColReorderCode maxVariableReorderCode) { +CollationSpec::MaxVariableType getMaxVariableFromReorderCode( + UColReorderCode maxVariableReorderCode) { switch (maxVariableReorderCode) { case UCOL_REORDER_CODE_PUNCTUATION: - return CollationMaxVariableEnum::kPunct; + return CollationSpec::MaxVariableType::kPunct; case UCOL_REORDER_CODE_SPACE: - return CollationMaxVariableEnum::kSpace; + return CollationSpec::MaxVariableType::kSpace; default: MONGO_UNREACHABLE; } } -// Sets the Collation's localeID to 'localeID'. For each collation option, if the user specified the -// option then set it on icuCollation, otherwise copy icuCollation's default to the Collation. -Status updateCollationSpecFromICUCollator(const BSONObj& spec, - const std::string& localeID, - icu::Collator* icuCollator, - Collation* collation) { +// Helper methods for converting from constants to types used by CollationSpec. + +StatusWith<CollationSpec::CaseFirstType> stringToCaseFirstType(const std::string& caseFirst) { + if (caseFirst == CollationSpec::kCaseFirstUpper) { + return CollationSpec::CaseFirstType::kUpper; + } else if (caseFirst == CollationSpec::kCaseFirstLower) { + return CollationSpec::CaseFirstType::kLower; + } else if (caseFirst == CollationSpec::kCaseFirstOff) { + return CollationSpec::CaseFirstType::kOff; + } else { + return {ErrorCodes::FailedToParse, + str::stream() << "Field '" << CollationSpec::kCaseFirstField << "' must be '" + << CollationSpec::kCaseFirstUpper << "', '" + << CollationSpec::kCaseFirstLower << "', or '" + << CollationSpec::kCaseFirstOff << "'. Got: " << caseFirst}; + } +} + +StatusWith<CollationSpec::StrengthType> integerToStrengthType(long long strength) { + switch (strength) { + case static_cast<int>(CollationSpec::StrengthType::kPrimary): + return CollationSpec::StrengthType::kPrimary; + case static_cast<int>(CollationSpec::StrengthType::kSecondary): + return CollationSpec::StrengthType::kSecondary; + case static_cast<int>(CollationSpec::StrengthType::kTertiary): + return CollationSpec::StrengthType::kTertiary; + case static_cast<int>(CollationSpec::StrengthType::kQuaternary): + return CollationSpec::StrengthType::kQuaternary; + case static_cast<int>(CollationSpec::StrengthType::kIdentical): + return CollationSpec::StrengthType::kIdentical; + } + return {ErrorCodes::FailedToParse, + str::stream() << "Field '" << CollationSpec::kStrengthField + << "' must be an integer 1 through 5. Got: " << strength}; +} + +StatusWith<CollationSpec::AlternateType> stringToAlternateType(const std::string& alternate) { + if (alternate == CollationSpec::kAlternateNonIgnorable) { + return CollationSpec::AlternateType::kNonIgnorable; + } else if (alternate == CollationSpec::kAlternateShifted) { + return CollationSpec::AlternateType::kShifted; + } else { + return {ErrorCodes::FailedToParse, + str::stream() << "Field '" << CollationSpec::kAlternateField << "' must be '" + << CollationSpec::kAlternateNonIgnorable << "' or '" + << CollationSpec::kAlternateShifted << "'. Got: " << alternate}; + } +} + +StatusWith<CollationSpec::MaxVariableType> stringToMaxVariableType(const std::string& maxVariable) { + if (maxVariable == CollationSpec::kMaxVariablePunct) { + return CollationSpec::MaxVariableType::kPunct; + } else if (maxVariable == CollationSpec::kMaxVariableSpace) { + return CollationSpec::MaxVariableType::kSpace; + } else { + return {ErrorCodes::FailedToParse, + str::stream() << "Field '" << CollationSpec::kMaxVariableField << "' must be '" + << CollationSpec::kMaxVariablePunct << "' or '" + << CollationSpec::kMaxVariableSpace << "'. Got: " << maxVariable}; + } +} + +// Extracts the collation options from 'spec', performs validation, and sets the options in +// 'icuCollator' and the output CollationSpec. +// Sets the localeID in the CollationSpec to 'localeID'. +StatusWith<CollationSpec> parseToCollationSpec(const BSONObj& spec, + const std::string& localeID, + icu::Collator* icuCollator) { + CollationSpec parsedSpec; + // Set the localeID. - collation->setLocale(localeID); + parsedSpec.localeID = localeID; + + // Count the number of fields we have parsed from 'spec'. + // Begin this at 1 since the locale has already been parsed. + int parsedFields = 1; // Set caseLevel. - if (!spec.hasField(Collation::kCaseLevelFieldName)) { + Status parseStatus = + bsonExtractBooleanField(spec, CollationSpec::kCaseLevelField, &parsedSpec.caseLevel); + if (parseStatus == ErrorCodes::NoSuchKey) { UErrorCode status = U_ZERO_ERROR; UColAttributeValue caseLevelAttribute = icuCollator->getAttribute(UCOL_CASE_LEVEL, status); if (U_FAILURE(status)) { icu::ErrorCode icuError; icuError.set(status); return {ErrorCodes::OperationFailed, - str::stream() << "Failed to get '" << Collation::kCaseLevelFieldName + str::stream() << "Failed to get '" << CollationSpec::kCaseLevelField << "' attribute from icu::Collator: " << icuError.errorName() << ". Collation spec: " << spec}; } - collation->setCaseLevel(attributeToBool(caseLevelAttribute)); + parsedSpec.caseLevel = attributeToBool(caseLevelAttribute); + } else if (!parseStatus.isOK()) { + return parseStatus; } else { + ++parsedFields; UErrorCode status = U_ZERO_ERROR; - icuCollator->setAttribute( - UCOL_CASE_LEVEL, boolToAttribute(collation->getCaseLevel()), status); + icuCollator->setAttribute(UCOL_CASE_LEVEL, boolToAttribute(parsedSpec.caseLevel), status); if (U_FAILURE(status)) { icu::ErrorCode icuError; icuError.set(status); return {ErrorCodes::OperationFailed, - str::stream() << "Failed to set '" << Collation::kCaseLevelFieldName + str::stream() << "Failed to set '" << CollationSpec::kCaseLevelField << "' attribute: " << icuError.errorName() << ". Collation spec: " << spec}; } } // Set caseFirst. - if (!spec.hasField(Collation::kCaseFirstFieldName)) { + std::string caseFirst; + parseStatus = bsonExtractStringField(spec, CollationSpec::kCaseFirstField, &caseFirst); + if (parseStatus == ErrorCodes::NoSuchKey) { UErrorCode status = U_ZERO_ERROR; UColAttributeValue caseFirstAttribute = icuCollator->getAttribute(UCOL_CASE_FIRST, status); if (U_FAILURE(status)) { icu::ErrorCode icuError; icuError.set(status); return {ErrorCodes::OperationFailed, - str::stream() << "Failed to get '" << Collation::kCaseFirstFieldName + str::stream() << "Failed to get '" << CollationSpec::kCaseFirstField << "' attribute from icu::Collator: " << icuError.errorName() << ". Collation spec: " << spec}; } - collation->setCaseFirst(getCaseFirstFromAttribute(caseFirstAttribute)); + parsedSpec.caseFirst = getCaseFirstFromAttribute(caseFirstAttribute); + } else if (!parseStatus.isOK()) { + return parseStatus; } else { + ++parsedFields; + + auto caseFirstStatus = stringToCaseFirstType(caseFirst); + if (!caseFirstStatus.isOK()) { + return caseFirstStatus.getStatus(); + } + parsedSpec.caseFirst = caseFirstStatus.getValue(); + UErrorCode status = U_ZERO_ERROR; icuCollator->setAttribute( - UCOL_CASE_FIRST, getCaseFirstAttribute(collation->getCaseFirst()), status); + UCOL_CASE_FIRST, getCaseFirstAttribute(parsedSpec.caseFirst), status); if (U_FAILURE(status)) { icu::ErrorCode icuError; icuError.set(status); return {ErrorCodes::OperationFailed, - str::stream() << "Failed to set '" << Collation::kCaseFirstFieldName + str::stream() << "Failed to set '" << CollationSpec::kCaseFirstField << "' attribute: " << icuError.errorName() << ". Collation spec: " << spec}; } } // Set strength. - if (!spec.hasField(Collation::kStrengthFieldName)) { + long long strength; + parseStatus = bsonExtractIntegerField(spec, CollationSpec::kStrengthField, &strength); + if (parseStatus == ErrorCodes::NoSuchKey) { UErrorCode status = U_ZERO_ERROR; UColAttributeValue strengthAttribute = icuCollator->getAttribute(UCOL_STRENGTH, status); if (U_FAILURE(status)) { icu::ErrorCode icuError; icuError.set(status); return {ErrorCodes::OperationFailed, - str::stream() << "Failed to get '" << Collation::kStrengthFieldName + str::stream() << "Failed to get '" << CollationSpec::kStrengthField << "' attribute from icu::Collator: " << icuError.errorName() << ". Collation spec: " << spec}; } - collation->setStrength(getStrengthFromAttribute(strengthAttribute)); + parsedSpec.strength = getStrengthFromAttribute(strengthAttribute); + } else if (!parseStatus.isOK()) { + return parseStatus; } else { - try { - // For backwards compatibility, "strength" is parsed from any int, long, or double. - // Check it matches an enum value. - CollationStrength_parse({"collation.strength"}, collation->getStrength()); - } catch (const DBException& exc) { - return exc.toStatus(); + ++parsedFields; + + auto strengthStatus = integerToStrengthType(strength); + if (!strengthStatus.isOK()) { + return strengthStatus.getStatus(); } + parsedSpec.strength = strengthStatus.getValue(); UErrorCode status = U_ZERO_ERROR; - icuCollator->setAttribute( - UCOL_STRENGTH, getStrengthAttribute(collation->getStrength()), status); + icuCollator->setAttribute(UCOL_STRENGTH, getStrengthAttribute(parsedSpec.strength), status); if (U_FAILURE(status)) { icu::ErrorCode icuError; icuError.set(status); return {ErrorCodes::OperationFailed, - str::stream() << "Failed to set '" << Collation::kStrengthFieldName + str::stream() << "Failed to set '" << CollationSpec::kStrengthField << "' attribute: " << icuError.errorName() << ". Collation spec: " << spec}; } } // Set numericOrdering. - if (!spec.hasField(Collation::kNumericOrderingFieldName)) { + parseStatus = bsonExtractBooleanField( + spec, CollationSpec::kNumericOrderingField, &parsedSpec.numericOrdering); + if (parseStatus == ErrorCodes::NoSuchKey) { UErrorCode status = U_ZERO_ERROR; UColAttributeValue numericOrderingAttribute = icuCollator->getAttribute(UCOL_NUMERIC_COLLATION, status); @@ -288,27 +370,32 @@ Status updateCollationSpecFromICUCollator(const BSONObj& spec, icu::ErrorCode icuError; icuError.set(status); return {ErrorCodes::OperationFailed, - str::stream() << "Failed to get '" << Collation::kNumericOrderingFieldName + str::stream() << "Failed to get '" << CollationSpec::kNumericOrderingField << "' attribute from icu::Collator: " << icuError.errorName() << ". Collation spec: " << spec}; } - collation->setNumericOrdering(attributeToBool(numericOrderingAttribute)); + parsedSpec.numericOrdering = attributeToBool(numericOrderingAttribute); + } else if (!parseStatus.isOK()) { + return parseStatus; } else { + ++parsedFields; UErrorCode status = U_ZERO_ERROR; icuCollator->setAttribute( - UCOL_NUMERIC_COLLATION, boolToAttribute(collation->getNumericOrdering()), status); + UCOL_NUMERIC_COLLATION, boolToAttribute(parsedSpec.numericOrdering), status); if (U_FAILURE(status)) { icu::ErrorCode icuError; icuError.set(status); return {ErrorCodes::OperationFailed, - str::stream() << "Failed to set '" << Collation::kNumericOrderingFieldName + str::stream() << "Failed to set '" << CollationSpec::kNumericOrderingField << "' attribute: " << icuError.errorName() << ". Collation spec: " << spec}; } } // Set alternate. - if (!spec.hasField(Collation::kAlternateFieldName)) { + std::string alternate; + parseStatus = bsonExtractStringField(spec, CollationSpec::kAlternateField, &alternate); + if (parseStatus == ErrorCodes::NoSuchKey) { UErrorCode status = U_ZERO_ERROR; UColAttributeValue alternateAttribute = icuCollator->getAttribute(UCOL_ALTERNATE_HANDLING, status); @@ -316,43 +403,67 @@ Status updateCollationSpecFromICUCollator(const BSONObj& spec, icu::ErrorCode icuError; icuError.set(status); return {ErrorCodes::OperationFailed, - str::stream() << "Failed to get '" << Collation::kAlternateFieldName + str::stream() << "Failed to get '" << CollationSpec::kAlternateField << "' attribute from icu::Collator: " << icuError.errorName() << ". Collation spec: " << spec}; } - collation->setAlternate(getAlternateFromAttribute(alternateAttribute)); + parsedSpec.alternate = getAlternateFromAttribute(alternateAttribute); + } else if (!parseStatus.isOK()) { + return parseStatus; } else { + ++parsedFields; + + auto alternateStatus = stringToAlternateType(alternate); + if (!alternateStatus.isOK()) { + return alternateStatus.getStatus(); + } + parsedSpec.alternate = alternateStatus.getValue(); + UErrorCode status = U_ZERO_ERROR; icuCollator->setAttribute( - UCOL_ALTERNATE_HANDLING, getAlternateAttribute(collation->getAlternate()), status); + UCOL_ALTERNATE_HANDLING, getAlternateAttribute(parsedSpec.alternate), status); if (U_FAILURE(status)) { icu::ErrorCode icuError; icuError.set(status); return {ErrorCodes::OperationFailed, - str::stream() << "Failed to set '" << Collation::kAlternateFieldName + str::stream() << "Failed to set '" << CollationSpec::kAlternateField << "' attribute: " << icuError.errorName() << ". Collation spec: " << spec}; } } // Set maxVariable. - if (!spec.hasField(Collation::kMaxVariableFieldName)) { - collation->setMaxVariable(getMaxVariableFromReorderCode(icuCollator->getMaxVariable())); + std::string maxVariable; + parseStatus = bsonExtractStringField(spec, CollationSpec::kMaxVariableField, &maxVariable); + if (parseStatus == ErrorCodes::NoSuchKey) { + parsedSpec.maxVariable = getMaxVariableFromReorderCode(icuCollator->getMaxVariable()); + } else if (!parseStatus.isOK()) { + return parseStatus; } else { + ++parsedFields; + + auto maxVariableStatus = stringToMaxVariableType(maxVariable); + if (!maxVariableStatus.isOK()) { + return maxVariableStatus.getStatus(); + } + parsedSpec.maxVariable = maxVariableStatus.getValue(); + UErrorCode status = U_ZERO_ERROR; - icuCollator->setMaxVariable(getMaxVariableReorderCode(collation->getMaxVariable()), status); + icuCollator->setMaxVariable(getMaxVariableReorderCode(parsedSpec.maxVariable), status); if (U_FAILURE(status)) { icu::ErrorCode icuError; icuError.set(status); return {ErrorCodes::OperationFailed, - str::stream() << "Failed to set '" << Collation::kMaxVariableFieldName + str::stream() << "Failed to set '" << CollationSpec::kMaxVariableField << "' attribute: " << icuError.errorName() << ". Collation spec: " << spec}; } } // Set normalization. - if (!spec.hasField(Collation::kNormalizationFieldName)) { + parseStatus = bsonExtractBooleanField( + spec, CollationSpec::kNormalizationField, &parsedSpec.normalization); + if (parseStatus == ErrorCodes::NoSuchKey) { UErrorCode status = U_ZERO_ERROR; UColAttributeValue normalizationAttribute = icuCollator->getAttribute(UCOL_NORMALIZATION_MODE, status); @@ -360,27 +471,32 @@ Status updateCollationSpecFromICUCollator(const BSONObj& spec, icu::ErrorCode icuError; icuError.set(status); return {ErrorCodes::OperationFailed, - str::stream() << "Failed to get '" << Collation::kNormalizationFieldName + str::stream() << "Failed to get '" << CollationSpec::kNormalizationField << "' attribute from icu::Collator: " << icuError.errorName() << ". Collation spec: " << spec}; } - collation->setNormalization(attributeToBool(normalizationAttribute)); + parsedSpec.normalization = attributeToBool(normalizationAttribute); + } else if (!parseStatus.isOK()) { + return parseStatus; } else { + ++parsedFields; UErrorCode status = U_ZERO_ERROR; icuCollator->setAttribute( - UCOL_NORMALIZATION_MODE, boolToAttribute(collation->getNormalization()), status); + UCOL_NORMALIZATION_MODE, boolToAttribute(parsedSpec.normalization), status); if (U_FAILURE(status)) { icu::ErrorCode icuError; icuError.set(status); return {ErrorCodes::OperationFailed, - str::stream() << "Failed to set '" << Collation::kNormalizationFieldName + str::stream() << "Failed to set '" << CollationSpec::kNormalizationField << "' attribute: " << icuError.errorName() << ". Collation spec: " << spec}; } } // Set backwards. - if (!spec.hasField(Collation::kBackwardsFieldName)) { + parseStatus = + bsonExtractBooleanField(spec, CollationSpec::kBackwardsField, &parsedSpec.backwards); + if (parseStatus == ErrorCodes::NoSuchKey) { UErrorCode status = U_ZERO_ERROR; UColAttributeValue backwardsAttribute = icuCollator->getAttribute(UCOL_FRENCH_COLLATION, status); @@ -388,43 +504,81 @@ Status updateCollationSpecFromICUCollator(const BSONObj& spec, icu::ErrorCode icuError; icuError.set(status); return {ErrorCodes::OperationFailed, - str::stream() << "Failed to get '" << Collation::kBackwardsFieldName + str::stream() << "Failed to get '" << CollationSpec::kBackwardsField << "' attribute from icu::Collator: " << icuError.errorName() << ". Collation spec: " << spec}; } - collation->setBackwards(attributeToBool(backwardsAttribute)); + parsedSpec.backwards = attributeToBool(backwardsAttribute); + } else if (!parseStatus.isOK()) { + return parseStatus; } else { + ++parsedFields; UErrorCode status = U_ZERO_ERROR; - // collation->getBackwards should be engaged if spec has a "backwards" field. - invariant(collation->getBackwards().is_initialized()); icuCollator->setAttribute( - UCOL_FRENCH_COLLATION, boolToAttribute(*collation->getBackwards()), status); + UCOL_FRENCH_COLLATION, boolToAttribute(parsedSpec.backwards), status); if (U_FAILURE(status)) { icu::ErrorCode icuError; icuError.set(status); return {ErrorCodes::OperationFailed, - str::stream() << "Failed to set '" << Collation::kBackwardsFieldName + str::stream() << "Failed to set '" << CollationSpec::kBackwardsField << "' attribute: " << icuError.errorName() << ". Collation spec: " << spec}; } } - if (!collation->getVersion()) { - collation->setVersion(StringData(U_ICU_VERSION)); + // Populate the spec with the ICU version information. + parsedSpec.version = U_ICU_VERSION; + + // Parse the version string, if present in the spec. If the version string does not match the + // ICU version currently in use we must return an "IncompatibleCollationVersion" error. + std::string specVersionStr; + parseStatus = bsonExtractStringField(spec, CollationSpec::kVersionField, &specVersionStr); + if (parseStatus == ErrorCodes::NoSuchKey) { + // The BSON spec does not have any particular version. We've already populated it with the + // ICU version string above. + invariant(!parsedSpec.version.empty()); + } else if (!parseStatus.isOK()) { + return parseStatus; } else { - if (U_ICU_VERSION != *collation->getVersion()) { + if (specVersionStr != parsedSpec.version) { return {ErrorCodes::IncompatibleCollationVersion, - str::stream() << "Requested collation version " << collation->getVersion() + str::stream() << "Requested collation version " << specVersionStr << " but the only available collator version was " - << U_ICU_VERSION << ". Requested collation spec: " << spec}; + << parsedSpec.version << ". Requested collation spec: " << spec}; } + + ++parsedFields; } - return Status::OK(); + // Check for unknown fields. + invariant(parsedFields <= spec.nFields()); + if (parsedFields < spec.nFields()) { + return {ErrorCodes::FailedToParse, + str::stream() << "Collation spec contains unknown field. Collation spec: " << spec}; + } + + return parsedSpec; +} + +// Extracts the localeID from 'spec', if present. +StatusWith<std::string> parseLocaleID(const BSONObj& spec) { + std::string localeID; + Status status = bsonExtractStringField(spec, CollationSpec::kLocaleField, &localeID); + if (!status.isOK()) { + return status; + } + if (localeID.find('\0') != std::string::npos) { + return {ErrorCodes::BadValue, + str::stream() << "Field '" << CollationSpec::kLocaleField + << "' cannot contain null byte. Collation spec: " << spec}; + } + return localeID; } // Returns a non-OK status if any part of the locale ID is invalid or not recognized by ICU. -Status validateLocaleID(const BSONObj& spec, StringData originalID, const icu::Collator& collator) { +Status validateLocaleID(const BSONObj& spec, + const std::string& originalID, + const icu::Collator& collator) { UErrorCode status = U_ZERO_ERROR; icu::Locale collatorLocale = collator.getLocale(ULOC_VALID_LOCALE, status); if (U_FAILURE(status)) { @@ -437,7 +591,7 @@ Status validateLocaleID(const BSONObj& spec, StringData originalID, const icu::C if (originalID.empty()) { return {ErrorCodes::BadValue, - str::stream() << "Field '" << Collation::kLocaleFieldName + str::stream() << "Field '" << CollationSpec::kLocaleField << "' cannot be the empty string in: " << spec}; } @@ -450,7 +604,7 @@ Status validateLocaleID(const BSONObj& spec, StringData originalID, const icu::C auto collatorLocaleName = StringData(collatorLocale.getName()); if (originalID != collatorLocaleName) { str::stream ss; - ss << "Field '" << Collation::kLocaleFieldName << "' is invalid in: " << spec; + ss << "Field '" << CollationSpec::kLocaleField << "' is invalid in: " << spec; if ((collatorLocaleName != kFallbackLocaleName) && !collatorLocaleName.empty()) { ss << ". Did you mean '" << collatorLocaleName << "'?"; @@ -463,29 +617,28 @@ Status validateLocaleID(const BSONObj& spec, StringData originalID, const icu::C } // Returns a non-OK status if 'spec' contains any invalid combinations of options. -Status validateCollationSpec(const Collation& collation, const BSONObj& spec) { +Status validateCollationSpec(const CollationSpec& spec) { // The backwards option specifically means backwards secondary weighting, and therefore only // affects the secondary comparison level. It has no effect at strength 1. - if (collation.getBackwards().value_or(false) && - static_cast<CollationStrength>(collation.getStrength()) == CollationStrength::kPrimary) { + if (spec.backwards && spec.strength == CollationSpec::StrengthType::kPrimary) { return {ErrorCodes::BadValue, - str::stream() << "'" << Collation::kBackwardsFieldName << "' is invalid with '" - << Collation::kStrengthFieldName << "' of " - << static_cast<int>(CollationStrength::kPrimary) << " in: " << spec}; + str::stream() << "'" << CollationSpec::kBackwardsField << "' is invalid with '" + << CollationSpec::kStrengthField << "' of " + << static_cast<int>(CollationSpec::StrengthType::kPrimary) + << " in: " << spec.toBSON()}; } // The caseFirst option only affects tertiary level or caseLevel comparisons. It will have no // affect if caseLevel is off and strength is 1 or 2. - if (collation.getCaseFirst() != CollationCaseFirstEnum::kOff && !collation.getCaseLevel() && - (static_cast<CollationStrength>(collation.getStrength()) == CollationStrength::kPrimary || - static_cast<CollationStrength>(collation.getStrength()) == - CollationStrength::kSecondary)) { + if (spec.caseFirst != CollationSpec::CaseFirstType::kOff && !spec.caseLevel && + (spec.strength == CollationSpec::StrengthType::kPrimary || + spec.strength == CollationSpec::StrengthType::kSecondary)) { return {ErrorCodes::BadValue, - str::stream() << "'" << Collation::kCaseFirstFieldName << "' is invalid unless '" - << Collation::kCaseLevelFieldName << "' is on or '" - << Collation::kStrengthFieldName << "' is greater than " - << static_cast<int>(CollationStrength::kSecondary) - << " in: " << spec}; + str::stream() << "'" << CollationSpec::kCaseFirstField << "' is invalid unless '" + << CollationSpec::kCaseLevelField << "' is on or '" + << CollationSpec::kStrengthField << "' is greater than " + << static_cast<int>(CollationSpec::StrengthType::kSecondary) + << " in: " << spec.toBSON()}; } return Status::OK(); @@ -493,33 +646,32 @@ Status validateCollationSpec(const Collation& collation, const BSONObj& spec) { } // namespace +// TODO (SERVER-52538): Use Collation parser from basic_types.idl StatusWith<std::unique_ptr<CollatorInterface>> CollatorFactoryICU::makeFromBSON( const BSONObj& spec) { - - Collation collation; - try { - collation = Collation::parse({"collation"}, spec); - } catch (const DBException& ex) { - return ex.toStatus(); - } - - if (collation.getLocale().find('\0') != std::string::npos) { - return {ErrorCodes::BadValue, - str::stream() << "Field '" << Collation::kLocaleFieldName - << "' cannot contain null byte. Collation spec: " << spec}; + // Parse the locale ID out of the spec. + auto parsedLocaleID = parseLocaleID(spec); + if (!parsedLocaleID.isOK()) { + return parsedLocaleID.getStatus(); } // If spec = {locale: "simple"}, return a null pointer. A null CollatorInterface indicates // simple binary compare. - if (collation.getLocale() == CollationSpec::kSimpleBinaryComparison) { + if (parsedLocaleID.getValue() == CollationSpec::kSimpleBinaryComparison) { + if (spec.nFields() > 1) { + return {ErrorCodes::FailedToParse, + str::stream() << "If " << CollationSpec::kLocaleField << "=" + << CollationSpec::kSimpleBinaryComparison + << ", no other fields should be present in: " << spec}; + } return {nullptr}; } // Construct an icu::Locale. - auto userLocale = icu::Locale::createFromName(collation.getLocale().toString().c_str()); + auto userLocale = icu::Locale::createFromName(parsedLocaleID.getValue().c_str()); if (userLocale.isBogus()) { return {ErrorCodes::BadValue, - str::stream() << "Field '" << Collation::kLocaleFieldName + str::stream() << "Field '" << CollationSpec::kLocaleField << "' is not valid in: " << spec}; } @@ -534,26 +686,25 @@ StatusWith<std::unique_ptr<CollatorInterface>> CollatorFactoryICU::makeFromBSON( << ". Collation spec: " << spec}; } - Status localeValidationStatus = validateLocaleID(spec, collation.getLocale(), *icuCollator); + Status localeValidationStatus = validateLocaleID(spec, parsedLocaleID.getValue(), *icuCollator); if (!localeValidationStatus.isOK()) { return localeValidationStatus; } - // Update the Collation's options with the defaults in icuCollator. + // Construct a CollationSpec using the options provided in spec or the defaults in icuCollator. // Use userLocale.getName() for the localeID, since it is canonicalized and includes options. - auto updateCollationSpecStatus = updateCollationSpecFromICUCollator( - spec, userLocale.getName(), icuCollator.get(), &collation); - if (!updateCollationSpecStatus.isOK()) { - return updateCollationSpecStatus; + auto parsedSpec = parseToCollationSpec(spec, userLocale.getName(), icuCollator.get()); + if (!parsedSpec.isOK()) { + return parsedSpec.getStatus(); } - auto validateSpecStatus = validateCollationSpec(collation, spec); + auto validateSpecStatus = validateCollationSpec(parsedSpec.getValue()); if (!validateSpecStatus.isOK()) { return validateSpecStatus; } - auto mongoCollator = - std::make_unique<CollatorInterfaceICU>(std::move(collation), std::move(icuCollator)); + auto mongoCollator = std::make_unique<CollatorInterfaceICU>(std::move(parsedSpec.getValue()), + std::move(icuCollator)); return {std::move(mongoCollator)}; } diff --git a/src/mongo/db/query/collation/collator_factory_icu_test.cpp b/src/mongo/db/query/collation/collator_factory_icu_test.cpp index f0a08fdf97a..d7d0edce54b 100644 --- a/src/mongo/db/query/collation/collator_factory_icu_test.cpp +++ b/src/mongo/db/query/collation/collator_factory_icu_test.cpp @@ -46,7 +46,7 @@ TEST(CollatorFactoryICUTest, LocaleStringParsesSuccessfully) { auto collator = factory.makeFromBSON(BSON("locale" << "en_US")); ASSERT_OK(collator.getStatus()); - ASSERT_EQ("en_US", collator.getValue()->getSpec().getLocale()); + ASSERT_EQ("en_US", collator.getValue()->getSpec().localeID); } TEST(CollatorFactoryICUTest, SimpleLocaleReturnsNullPointer) { @@ -57,6 +57,15 @@ TEST(CollatorFactoryICUTest, SimpleLocaleReturnsNullPointer) { ASSERT_TRUE(collator.getValue() == nullptr); } +TEST(CollatorFactoryICUTest, SimpleLocaleWithOtherFieldsFailsToParse) { + CollatorFactoryICU factory; + auto collator = factory.makeFromBSON(BSON("locale" + << "simple" + << "caseLevel" << true)); + ASSERT_NOT_OK(collator.getStatus()); + ASSERT_EQ(collator.getStatus(), ErrorCodes::FailedToParse); +} + TEST(CollatorFactoryICUTest, LocaleFieldNotAStringFailsToParse) { CollatorFactoryICU factory; auto collator = factory.makeFromBSON(BSON("locale" << 3)); @@ -389,7 +398,7 @@ TEST(CollatorFactoryICUTest, MissingLocaleStringFailsToParse) { CollatorFactoryICU factory; auto collator = factory.makeFromBSON(BSONObj()); ASSERT_NOT_OK(collator.getStatus()); - ASSERT_EQ(collator.getStatus().code(), 40414); + ASSERT_EQ(collator.getStatus(), ErrorCodes::NoSuchKey); } TEST(CollatorFactoryICUTest, UnknownSpecFieldFailsToParse) { @@ -400,6 +409,7 @@ TEST(CollatorFactoryICUTest, UnknownSpecFieldFailsToParse) { CollatorFactoryICU factory; auto collator = factory.makeFromBSON(spec); ASSERT_NOT_OK(collator.getStatus()); + ASSERT_EQ(collator.getStatus(), ErrorCodes::FailedToParse); } TEST(CollatorFactoryICUTest, DefaultsSetSuccessfully) { @@ -407,18 +417,18 @@ TEST(CollatorFactoryICUTest, DefaultsSetSuccessfully) { auto collator = factory.makeFromBSON(BSON("locale" << "en_US")); ASSERT_OK(collator.getStatus()); - ASSERT_FALSE(collator.getValue()->getSpec().getCaseLevel()); - ASSERT_EQ(static_cast<int>(CollationCaseFirstEnum::kOff), - static_cast<int>(collator.getValue()->getSpec().getCaseFirst())); - ASSERT_EQ(static_cast<int>(CollationStrength::kTertiary), - collator.getValue()->getSpec().getStrength()); - ASSERT_FALSE(collator.getValue()->getSpec().getNumericOrdering()); - ASSERT_EQ(static_cast<int>(CollationAlternateEnum::kNonIgnorable), - static_cast<int>(collator.getValue()->getSpec().getAlternate())); - ASSERT_EQ(static_cast<int>(CollationMaxVariableEnum::kPunct), - static_cast<int>(collator.getValue()->getSpec().getMaxVariable())); - ASSERT_FALSE(collator.getValue()->getSpec().getNormalization()); - ASSERT_FALSE(*collator.getValue()->getSpec().getBackwards()); + ASSERT_FALSE(collator.getValue()->getSpec().caseLevel); + ASSERT_EQ(static_cast<int>(CollationSpec::CaseFirstType::kOff), + static_cast<int>(collator.getValue()->getSpec().caseFirst)); + ASSERT_EQ(static_cast<int>(CollationSpec::StrengthType::kTertiary), + static_cast<int>(collator.getValue()->getSpec().strength)); + ASSERT_FALSE(collator.getValue()->getSpec().numericOrdering); + ASSERT_EQ(static_cast<int>(CollationSpec::AlternateType::kNonIgnorable), + static_cast<int>(collator.getValue()->getSpec().alternate)); + ASSERT_EQ(static_cast<int>(CollationSpec::MaxVariableType::kPunct), + static_cast<int>(collator.getValue()->getSpec().maxVariable)); + ASSERT_FALSE(collator.getValue()->getSpec().normalization); + ASSERT_FALSE(collator.getValue()->getSpec().backwards); } TEST(CollatorFactoryICUTest, LanguageDependentDefaultsSetSuccessfully) { @@ -426,7 +436,7 @@ TEST(CollatorFactoryICUTest, LanguageDependentDefaultsSetSuccessfully) { auto collator = factory.makeFromBSON(BSON("locale" << "fr_CA")); ASSERT_OK(collator.getStatus()); - ASSERT_TRUE(*collator.getValue()->getSpec().getBackwards()); + ASSERT_TRUE(collator.getValue()->getSpec().backwards); } TEST(CollatorFactoryICUTest, CaseLevelFalseParsesSuccessfully) { @@ -435,7 +445,7 @@ TEST(CollatorFactoryICUTest, CaseLevelFalseParsesSuccessfully) { << "en_US" << "caseLevel" << false)); ASSERT_OK(collator.getStatus()); - ASSERT_FALSE(collator.getValue()->getSpec().getCaseLevel()); + ASSERT_FALSE(collator.getValue()->getSpec().caseLevel); } TEST(CollatorFactoryICUTest, CaseLevelTrueParsesSuccessfully) { @@ -444,7 +454,7 @@ TEST(CollatorFactoryICUTest, CaseLevelTrueParsesSuccessfully) { << "en_US" << "caseLevel" << true)); ASSERT_OK(collator.getStatus()); - ASSERT_TRUE(collator.getValue()->getSpec().getCaseLevel()); + ASSERT_TRUE(collator.getValue()->getSpec().caseLevel); } TEST(CollatorFactoryICUTest, CaseFirstOffParsesSuccessfully) { @@ -454,8 +464,8 @@ TEST(CollatorFactoryICUTest, CaseFirstOffParsesSuccessfully) { << "caseFirst" << "off")); ASSERT_OK(collator.getStatus()); - ASSERT_EQ(static_cast<int>(CollationCaseFirstEnum::kOff), - static_cast<int>(collator.getValue()->getSpec().getCaseFirst())); + ASSERT_EQ(static_cast<int>(CollationSpec::CaseFirstType::kOff), + static_cast<int>(collator.getValue()->getSpec().caseFirst)); } TEST(CollatorFactoryICUTest, CaseFirstUpperParsesSuccessfully) { @@ -465,8 +475,8 @@ TEST(CollatorFactoryICUTest, CaseFirstUpperParsesSuccessfully) { << "caseFirst" << "upper")); ASSERT_OK(collator.getStatus()); - ASSERT_EQ(static_cast<int>(CollationCaseFirstEnum::kUpper), - static_cast<int>(collator.getValue()->getSpec().getCaseFirst())); + ASSERT_EQ(static_cast<int>(CollationSpec::CaseFirstType::kUpper), + static_cast<int>(collator.getValue()->getSpec().caseFirst)); } TEST(CollatorFactoryICUTest, CaseFirstLowerParsesSuccessfully) { @@ -476,8 +486,8 @@ TEST(CollatorFactoryICUTest, CaseFirstLowerParsesSuccessfully) { << "caseFirst" << "lower")); ASSERT_OK(collator.getStatus()); - ASSERT_EQ(static_cast<int>(CollationCaseFirstEnum::kLower), - static_cast<int>(collator.getValue()->getSpec().getCaseFirst())); + ASSERT_EQ(static_cast<int>(CollationSpec::CaseFirstType::kLower), + static_cast<int>(collator.getValue()->getSpec().caseFirst)); } TEST(CollatorFactoryICUTest, PrimaryStrengthParsesSuccessfully) { @@ -486,8 +496,8 @@ TEST(CollatorFactoryICUTest, PrimaryStrengthParsesSuccessfully) { << "en_US" << "strength" << 1)); ASSERT_OK(collator.getStatus()); - ASSERT_EQ(static_cast<int>(CollationStrength::kPrimary), - collator.getValue()->getSpec().getStrength()); + ASSERT_EQ(static_cast<int>(CollationSpec::StrengthType::kPrimary), + static_cast<int>(collator.getValue()->getSpec().strength)); } TEST(CollatorFactoryICUTest, SecondaryStrengthParsesSuccessfully) { @@ -496,8 +506,8 @@ TEST(CollatorFactoryICUTest, SecondaryStrengthParsesSuccessfully) { << "en_US" << "strength" << 2)); ASSERT_OK(collator.getStatus()); - ASSERT_EQ(static_cast<int>(CollationStrength::kSecondary), - collator.getValue()->getSpec().getStrength()); + ASSERT_EQ(static_cast<int>(CollationSpec::StrengthType::kSecondary), + static_cast<int>(collator.getValue()->getSpec().strength)); } TEST(CollatorFactoryICUTest, TertiaryStrengthParsesSuccessfully) { @@ -506,8 +516,8 @@ TEST(CollatorFactoryICUTest, TertiaryStrengthParsesSuccessfully) { << "en_US" << "strength" << 3)); ASSERT_OK(collator.getStatus()); - ASSERT_EQ(static_cast<int>(CollationStrength::kTertiary), - collator.getValue()->getSpec().getStrength()); + ASSERT_EQ(static_cast<int>(CollationSpec::StrengthType::kTertiary), + static_cast<int>(collator.getValue()->getSpec().strength)); } TEST(CollatorFactoryICUTest, QuaternaryStrengthParsesSuccessfully) { @@ -516,8 +526,8 @@ TEST(CollatorFactoryICUTest, QuaternaryStrengthParsesSuccessfully) { << "en_US" << "strength" << 4)); ASSERT_OK(collator.getStatus()); - ASSERT_EQ(static_cast<int>(CollationStrength::kQuaternary), - collator.getValue()->getSpec().getStrength()); + ASSERT_EQ(static_cast<int>(CollationSpec::StrengthType::kQuaternary), + static_cast<int>(collator.getValue()->getSpec().strength)); } TEST(CollatorFactoryICUTest, IdenticalStrengthParsesSuccessfully) { @@ -526,8 +536,8 @@ TEST(CollatorFactoryICUTest, IdenticalStrengthParsesSuccessfully) { << "en_US" << "strength" << 5)); ASSERT_OK(collator.getStatus()); - ASSERT_EQ(static_cast<int>(CollationStrength::kIdentical), - collator.getValue()->getSpec().getStrength()); + ASSERT_EQ(static_cast<int>(CollationSpec::StrengthType::kIdentical), + static_cast<int>(collator.getValue()->getSpec().strength)); } TEST(CollatorFactoryICUTest, NumericOrderingFalseParsesSuccessfully) { @@ -536,7 +546,7 @@ TEST(CollatorFactoryICUTest, NumericOrderingFalseParsesSuccessfully) { << "en_US" << "numericOrdering" << false)); ASSERT_OK(collator.getStatus()); - ASSERT_FALSE(collator.getValue()->getSpec().getNumericOrdering()); + ASSERT_FALSE(collator.getValue()->getSpec().numericOrdering); } TEST(CollatorFactoryICUTest, NumericOrderingTrueParsesSuccessfully) { @@ -545,7 +555,7 @@ TEST(CollatorFactoryICUTest, NumericOrderingTrueParsesSuccessfully) { << "en_US" << "numericOrdering" << true)); ASSERT_OK(collator.getStatus()); - ASSERT_TRUE(collator.getValue()->getSpec().getNumericOrdering()); + ASSERT_TRUE(collator.getValue()->getSpec().numericOrdering); } TEST(CollatorFactoryICUTest, AlternateNonIgnorableParsesSuccessfully) { @@ -555,8 +565,8 @@ TEST(CollatorFactoryICUTest, AlternateNonIgnorableParsesSuccessfully) { << "alternate" << "non-ignorable")); ASSERT_OK(collator.getStatus()); - ASSERT_EQ(static_cast<int>(CollationAlternateEnum::kNonIgnorable), - static_cast<int>(collator.getValue()->getSpec().getAlternate())); + ASSERT_EQ(static_cast<int>(CollationSpec::AlternateType::kNonIgnorable), + static_cast<int>(collator.getValue()->getSpec().alternate)); } TEST(CollatorFactoryICUTest, AlternateShiftedParsesSuccessfully) { @@ -566,8 +576,8 @@ TEST(CollatorFactoryICUTest, AlternateShiftedParsesSuccessfully) { << "alternate" << "shifted")); ASSERT_OK(collator.getStatus()); - ASSERT_EQ(static_cast<int>(CollationAlternateEnum::kShifted), - static_cast<int>(collator.getValue()->getSpec().getAlternate())); + ASSERT_EQ(static_cast<int>(CollationSpec::AlternateType::kShifted), + static_cast<int>(collator.getValue()->getSpec().alternate)); } TEST(CollatorFactoryICUTest, MaxVariablePunctParsesSuccessfully) { @@ -577,8 +587,8 @@ TEST(CollatorFactoryICUTest, MaxVariablePunctParsesSuccessfully) { << "maxVariable" << "punct")); ASSERT_OK(collator.getStatus()); - ASSERT_EQ(static_cast<int>(CollationMaxVariableEnum::kPunct), - static_cast<int>(collator.getValue()->getSpec().getMaxVariable())); + ASSERT_EQ(static_cast<int>(CollationSpec::MaxVariableType::kPunct), + static_cast<int>(collator.getValue()->getSpec().maxVariable)); } TEST(CollatorFactoryICUTest, MaxVariableSpaceParsesSuccessfully) { @@ -588,8 +598,8 @@ TEST(CollatorFactoryICUTest, MaxVariableSpaceParsesSuccessfully) { << "maxVariable" << "space")); ASSERT_OK(collator.getStatus()); - ASSERT_EQ(static_cast<int>(CollationMaxVariableEnum::kSpace), - static_cast<int>(collator.getValue()->getSpec().getMaxVariable())); + ASSERT_EQ(static_cast<int>(CollationSpec::MaxVariableType::kSpace), + static_cast<int>(collator.getValue()->getSpec().maxVariable)); } TEST(CollatorFactoryICUTest, NormalizationFalseParsesSuccessfully) { @@ -598,7 +608,7 @@ TEST(CollatorFactoryICUTest, NormalizationFalseParsesSuccessfully) { << "en_US" << "normalization" << false)); ASSERT_OK(collator.getStatus()); - ASSERT_FALSE(collator.getValue()->getSpec().getNormalization()); + ASSERT_FALSE(collator.getValue()->getSpec().normalization); } TEST(CollatorFactoryICUTest, NormalizationTrueParsesSuccessfully) { @@ -607,7 +617,7 @@ TEST(CollatorFactoryICUTest, NormalizationTrueParsesSuccessfully) { << "en_US" << "normalization" << true)); ASSERT_OK(collator.getStatus()); - ASSERT_TRUE(collator.getValue()->getSpec().getNormalization()); + ASSERT_TRUE(collator.getValue()->getSpec().normalization); } TEST(CollatorFactoryICUTest, BackwardsFalseParsesSuccessfully) { @@ -616,7 +626,7 @@ TEST(CollatorFactoryICUTest, BackwardsFalseParsesSuccessfully) { << "en_US" << "backwards" << false)); ASSERT_OK(collator.getStatus()); - ASSERT_FALSE(*collator.getValue()->getSpec().getBackwards()); + ASSERT_FALSE(collator.getValue()->getSpec().backwards); } TEST(CollatorFactoryICUTest, BackwardsTrueParsesSuccessfully) { @@ -625,7 +635,7 @@ TEST(CollatorFactoryICUTest, BackwardsTrueParsesSuccessfully) { << "en_US" << "backwards" << true)); ASSERT_OK(collator.getStatus()); - ASSERT_TRUE(*collator.getValue()->getSpec().getBackwards()); + ASSERT_TRUE(collator.getValue()->getSpec().backwards); } TEST(CollatorFactoryICUTest, LongStrengthFieldParsesSuccessfully) { @@ -634,8 +644,8 @@ TEST(CollatorFactoryICUTest, LongStrengthFieldParsesSuccessfully) { << "en_US" << "strength" << 1LL)); ASSERT_OK(collator.getStatus()); - ASSERT_EQ(static_cast<int>(CollationStrength::kPrimary), - collator.getValue()->getSpec().getStrength()); + ASSERT_EQ(static_cast<int>(CollationSpec::StrengthType::kPrimary), + static_cast<int>(collator.getValue()->getSpec().strength)); } TEST(CollatorFactoryICUTest, DoubleStrengthFieldParsesSuccessfully) { @@ -644,8 +654,8 @@ TEST(CollatorFactoryICUTest, DoubleStrengthFieldParsesSuccessfully) { << "en_US" << "strength" << 1.0)); ASSERT_OK(collator.getStatus()); - ASSERT_EQ(static_cast<int>(CollationStrength::kPrimary), - collator.getValue()->getSpec().getStrength()); + ASSERT_EQ(static_cast<int>(CollationSpec::StrengthType::kPrimary), + static_cast<int>(collator.getValue()->getSpec().strength)); } TEST(CollatorFactoryICUTest, NonBooleanCaseLevelFieldFailsToParse) { @@ -674,7 +684,7 @@ TEST(CollatorFactoryICUTest, InvalidStringCaseFirstFieldFailsToParse) { << "caseFirst" << "invalid")); ASSERT_NOT_OK(collator.getStatus()); - ASSERT_EQ(collator.getStatus(), ErrorCodes::BadValue); + ASSERT_EQ(collator.getStatus(), ErrorCodes::FailedToParse); } TEST(CollatorFactoryICUTest, NonNumberStrengthFieldFailsToParse) { @@ -693,7 +703,7 @@ TEST(CollatorFactoryICUTest, TooLargeStrengthFieldFailsToParse) { << "en_US" << "strength" << 2147483648LL)); ASSERT_NOT_OK(collator.getStatus()); - ASSERT_EQ(collator.getStatus().code(), 51024); + ASSERT_EQ(collator.getStatus(), ErrorCodes::FailedToParse); } TEST(CollatorFactoryICUTest, FractionalStrengthFieldFailsToParse) { @@ -711,7 +721,7 @@ TEST(CollatorFactoryICUTest, NegativeStrengthFieldFailsToParse) { << "en_US" << "strength" << -1)); ASSERT_NOT_OK(collator.getStatus()); - ASSERT_EQ(collator.getStatus().code(), 51024); + ASSERT_EQ(collator.getStatus(), ErrorCodes::FailedToParse); } TEST(CollatorFactoryICUTest, InvalidIntegerStrengthFieldFailsToParse) { @@ -720,7 +730,7 @@ TEST(CollatorFactoryICUTest, InvalidIntegerStrengthFieldFailsToParse) { << "en_US" << "strength" << 6)); ASSERT_NOT_OK(collator.getStatus()); - ASSERT_EQ(collator.getStatus().code(), 51024); + ASSERT_EQ(collator.getStatus(), ErrorCodes::FailedToParse); } TEST(CollatorFactoryICUTest, NonBoolNumericOrderingFieldFailsToParse) { @@ -749,7 +759,7 @@ TEST(CollatorFactoryICUTest, InvalidStringAlternateFieldFailsToParse) { << "alternate" << "invalid")); ASSERT_NOT_OK(collator.getStatus()); - ASSERT_EQ(collator.getStatus(), ErrorCodes::BadValue); + ASSERT_EQ(collator.getStatus(), ErrorCodes::FailedToParse); } TEST(CollatorFactoryICUTest, NonStringMaxVariableFieldFailsToParse) { @@ -768,7 +778,7 @@ TEST(CollatorFactoryICUTest, InvalidStringMaxVariableFieldFailsToParse) { << "maxVariable" << "invalid")); ASSERT_NOT_OK(collator.getStatus()); - ASSERT_EQ(collator.getStatus(), ErrorCodes::BadValue); + ASSERT_EQ(collator.getStatus(), ErrorCodes::FailedToParse); } TEST(CollatorFactoryICUTest, NonBoolNormalizationFieldFailsToParse) { @@ -798,7 +808,7 @@ TEST(CollatorFactoryICUTest, VersionFieldParsesSuccessfully) { << "version" << "57.1")); ASSERT_OK(collator.getStatus()); - ASSERT_EQ("57.1", *collator.getValue()->getSpec().getVersion()); + ASSERT_EQ("57.1", collator.getValue()->getSpec().version); } TEST(CollatorFactoryICUTest, VersionFieldPopulatedWhenOmitted) { @@ -806,7 +816,7 @@ TEST(CollatorFactoryICUTest, VersionFieldPopulatedWhenOmitted) { auto collator = factory.makeFromBSON(BSON("locale" << "en_US")); ASSERT_OK(collator.getStatus()); - ASSERT_EQ("57.1", *collator.getValue()->getSpec().getVersion()); + ASSERT_EQ("57.1", collator.getValue()->getSpec().version); } TEST(CollatorFactoryICUTest, NonStringVersionFieldFailsToParse) { diff --git a/src/mongo/db/query/collation/collator_interface.h b/src/mongo/db/query/collation/collator_interface.h index 9825f5267a8..0e19137f8df 100644 --- a/src/mongo/db/query/collation/collator_interface.h +++ b/src/mongo/db/query/collation/collator_interface.h @@ -89,7 +89,7 @@ public: /** * Constructs a CollatorInterface capable of computing the collation described by 'spec'. */ - CollatorInterface(Collation spec) : _spec(std::move(spec)) {} + CollatorInterface(CollationSpec spec) : _spec(std::move(spec)) {} virtual ~CollatorInterface() {} @@ -136,9 +136,9 @@ public: } /** - * Returns a reference to the Collation. + * Returns a reference to the CollationSpec. */ - const Collation& getSpec() const { + const CollationSpec& getSpec() const { return _spec; } @@ -171,7 +171,7 @@ protected: } private: - const Collation _spec; + const CollationSpec _spec; }; } // namespace mongo diff --git a/src/mongo/db/query/collation/collator_interface_icu.cpp b/src/mongo/db/query/collation/collator_interface_icu.cpp index 6dceb89b140..db35958c63a 100644 --- a/src/mongo/db/query/collation/collator_interface_icu.cpp +++ b/src/mongo/db/query/collation/collator_interface_icu.cpp @@ -40,7 +40,8 @@ namespace mongo { -CollatorInterfaceICU::CollatorInterfaceICU(Collation spec, std::unique_ptr<icu::Collator> collator) +CollatorInterfaceICU::CollatorInterfaceICU(CollationSpec spec, + std::unique_ptr<icu::Collator> collator) : CollatorInterface(std::move(spec)), _collator(std::move(collator)) {} std::unique_ptr<CollatorInterface> CollatorInterfaceICU::clone() const { diff --git a/src/mongo/db/query/collation/collator_interface_icu.h b/src/mongo/db/query/collation/collator_interface_icu.h index d33a0b35392..a5b7b380985 100644 --- a/src/mongo/db/query/collation/collator_interface_icu.h +++ b/src/mongo/db/query/collation/collator_interface_icu.h @@ -45,7 +45,7 @@ namespace mongo { */ class CollatorInterfaceICU final : public CollatorInterface { public: - CollatorInterfaceICU(Collation spec, std::unique_ptr<icu::Collator> collator); + CollatorInterfaceICU(CollationSpec spec, std::unique_ptr<icu::Collator> collator); std::unique_ptr<CollatorInterface> clone() const final; diff --git a/src/mongo/db/query/collation/collator_interface_icu_test.cpp b/src/mongo/db/query/collation/collator_interface_icu_test.cpp index 73ed077e8af..2ba98f7df6e 100644 --- a/src/mongo/db/query/collation/collator_interface_icu_test.cpp +++ b/src/mongo/db/query/collation/collator_interface_icu_test.cpp @@ -64,8 +64,8 @@ bool isExpectedComparison(int cmp, ExpectedComparison expectedCmp) { } void assertEnUSComparison(StringData left, StringData right, ExpectedComparison expectedCmp) { - Collation collationSpec; - collationSpec.setLocale("en_US"); + CollationSpec collationSpec; + collationSpec.localeID = "en_US"; UErrorCode status = U_ZERO_ERROR; std::unique_ptr<icu::Collator> coll( icu::Collator::createInstance(icu::Locale("en", "US"), status)); @@ -101,8 +101,8 @@ void assertNotEqualEnUS(StringData left, StringData right) { } TEST(CollatorInterfaceICUTest, ClonedCollatorMatchesOriginal) { - Collation collationSpec; - collationSpec.setLocale("en_US"); + CollationSpec collationSpec; + collationSpec.localeID = "en_US"; UErrorCode status = U_ZERO_ERROR; std::unique_ptr<icu::Collator> coll( @@ -115,8 +115,8 @@ TEST(CollatorInterfaceICUTest, ClonedCollatorMatchesOriginal) { } TEST(CollatorInterfaceICUTest, ASCIIComparisonWorksForUSEnglishCollation) { - Collation collationSpec; - collationSpec.setLocale("en_US"); + CollationSpec collationSpec; + collationSpec.localeID = "en_US"; UErrorCode status = U_ZERO_ERROR; std::unique_ptr<icu::Collator> coll( @@ -130,10 +130,10 @@ TEST(CollatorInterfaceICUTest, ASCIIComparisonWorksForUSEnglishCollation) { } TEST(CollatorInterfaceICUTest, ASCIIComparisonWorksUsingLocaleStringParsing) { - Collation collationSpec; - collationSpec.setLocale("en_US"); + CollationSpec collationSpec; + collationSpec.localeID = "en_US"; - auto locale = icu::Locale::createFromName(collationSpec.getLocale().toString().c_str()); + auto locale = icu::Locale::createFromName(collationSpec.localeID.c_str()); ASSERT_EQ(std::string("en"), locale.getLanguage()); ASSERT_EQ(std::string("US"), locale.getCountry()); @@ -148,10 +148,10 @@ TEST(CollatorInterfaceICUTest, ASCIIComparisonWorksUsingLocaleStringParsing) { } TEST(CollatorInterfaceICUTest, ASCIIComparisonWorksUsingComparisonKeys) { - Collation collationSpec; - collationSpec.setLocale("en_US"); + CollationSpec collationSpec; + collationSpec.localeID = "en_US"; - auto locale = icu::Locale::createFromName(collationSpec.getLocale().toString().c_str()); + auto locale = icu::Locale::createFromName(collationSpec.localeID.c_str()); ASSERT_EQ(std::string("en"), locale.getLanguage()); ASSERT_EQ(std::string("US"), locale.getCountry()); @@ -176,8 +176,8 @@ TEST(CollatorInterfaceICUTest, ASCIIComparisonWorksUsingComparisonKeys) { } TEST(CollatorInterfaceICUTest, ZeroLengthStringsCompareCorrectly) { - Collation collationSpec; - collationSpec.setLocale("en_US"); + CollationSpec collationSpec; + collationSpec.localeID = "en_US"; UErrorCode status = U_ZERO_ERROR; std::unique_ptr<icu::Collator> coll( @@ -191,8 +191,8 @@ TEST(CollatorInterfaceICUTest, ZeroLengthStringsCompareCorrectly) { } TEST(CollatorInterfaceICUTest, ZeroLengthStringsCompareCorrectlyUsingComparisonKeys) { - Collation collationSpec; - collationSpec.setLocale("en_US"); + CollationSpec collationSpec; + collationSpec.localeID = "en_US"; UErrorCode status = U_ZERO_ERROR; std::unique_ptr<icu::Collator> coll( @@ -208,8 +208,8 @@ TEST(CollatorInterfaceICUTest, ZeroLengthStringsCompareCorrectlyUsingComparisonK } TEST(CollatorInterfaceICUTest, EmptyNullTerminatedStringComparesCorrectly) { - Collation collationSpec; - collationSpec.setLocale("en_US"); + CollationSpec collationSpec; + collationSpec.localeID = "en_US"; UErrorCode status = U_ZERO_ERROR; std::unique_ptr<icu::Collator> coll( @@ -227,8 +227,8 @@ TEST(CollatorInterfaceICUTest, EmptyNullTerminatedStringComparesCorrectly) { } TEST(CollatorInterfaceICUTest, EmptyNullTerminatedStringComparesCorrectlyUsingComparisonKeys) { - Collation collationSpec; - collationSpec.setLocale("en_US"); + CollationSpec collationSpec; + collationSpec.localeID = "en_US"; UErrorCode status = U_ZERO_ERROR; std::unique_ptr<icu::Collator> coll( @@ -248,8 +248,8 @@ TEST(CollatorInterfaceICUTest, EmptyNullTerminatedStringComparesCorrectlyUsingCo } TEST(CollatorInterfaceICUTest, LengthOneStringWithNullByteComparesCorrectly) { - Collation collationSpec; - collationSpec.setLocale("en_US"); + CollationSpec collationSpec; + collationSpec.localeID = "en_US"; UErrorCode status = U_ZERO_ERROR; std::unique_ptr<icu::Collator> coll( @@ -267,8 +267,8 @@ TEST(CollatorInterfaceICUTest, LengthOneStringWithNullByteComparesCorrectly) { } TEST(CollatorInterfaceICUTest, LengthOneStringWithNullByteComparesCorrectlyUsingComparisonKeys) { - Collation collationSpec; - collationSpec.setLocale("en_US"); + CollationSpec collationSpec; + collationSpec.localeID = "en_US"; UErrorCode status = U_ZERO_ERROR; std::unique_ptr<icu::Collator> coll( @@ -288,8 +288,8 @@ TEST(CollatorInterfaceICUTest, LengthOneStringWithNullByteComparesCorrectlyUsing } TEST(CollatorInterfaceICUTest, StringsWithEmbeddedNullByteCompareCorrectly) { - Collation collationSpec; - collationSpec.setLocale("en_US"); + CollationSpec collationSpec; + collationSpec.localeID = "en_US"; UErrorCode status = U_ZERO_ERROR; std::unique_ptr<icu::Collator> coll( @@ -308,8 +308,8 @@ TEST(CollatorInterfaceICUTest, StringsWithEmbeddedNullByteCompareCorrectly) { } TEST(CollatorInterfaceICUTest, StringsWithEmbeddedNullByteCompareCorrectlyUsingComparisonKeys) { - Collation collationSpec; - collationSpec.setLocale("en_US"); + CollationSpec collationSpec; + collationSpec.localeID = "en_US"; UErrorCode status = U_ZERO_ERROR; std::unique_ptr<icu::Collator> coll( @@ -330,9 +330,9 @@ TEST(CollatorInterfaceICUTest, StringsWithEmbeddedNullByteCompareCorrectlyUsingC } TEST(CollatorInterfaceICUTest, TwoUSEnglishCollationsAreEqual) { - Collation collationSpec; - collationSpec.setLocale("en_US"); - auto locale = icu::Locale::createFromName(collationSpec.getLocale().toString().c_str()); + CollationSpec collationSpec; + collationSpec.localeID = "en_US"; + auto locale = icu::Locale::createFromName(collationSpec.localeID.c_str()); UErrorCode status = U_ZERO_ERROR; std::unique_ptr<icu::Collator> coll1(icu::Collator::createInstance(locale, status)); @@ -348,13 +348,13 @@ TEST(CollatorInterfaceICUTest, TwoUSEnglishCollationsAreEqual) { } TEST(CollatorInterfaceICUTest, USEnglishAndBritishEnglishCollationsAreNotEqual) { - Collation collationSpec1; - collationSpec1.setLocale("en_US"); - auto locale1 = icu::Locale::createFromName(collationSpec1.getLocale().toString().c_str()); + CollationSpec collationSpec1; + collationSpec1.localeID = "en_US"; + auto locale1 = icu::Locale::createFromName(collationSpec1.localeID.c_str()); - Collation collationSpec2; - collationSpec2.setLocale("en_UK"); - auto locale2 = icu::Locale::createFromName(collationSpec2.getLocale().toString().c_str()); + CollationSpec collationSpec2; + collationSpec2.localeID = "en_UK"; + auto locale2 = icu::Locale::createFromName(collationSpec2.localeID.c_str()); UErrorCode status = U_ZERO_ERROR; std::unique_ptr<icu::Collator> coll1(icu::Collator::createInstance(locale1, status)); @@ -370,8 +370,8 @@ TEST(CollatorInterfaceICUTest, USEnglishAndBritishEnglishCollationsAreNotEqual) } TEST(CollatorInterfaceICUTest, FrenchCanadianCollatorComparesCorrectly) { - Collation collationSpec; - collationSpec.setLocale("fr_CA"); + CollationSpec collationSpec; + collationSpec.localeID = "fr_CA"; UErrorCode status = U_ZERO_ERROR; std::unique_ptr<icu::Collator> coll( @@ -394,8 +394,8 @@ TEST(CollatorInterfaceICUTest, FrenchCanadianCollatorComparesCorrectly) { } TEST(CollatorInterfaceICUTest, FrenchCanadianCollatorComparesCorrectlyUsingComparisonKeys) { - Collation collationSpec; - collationSpec.setLocale("fr_CA"); + CollationSpec collationSpec; + collationSpec.localeID = "fr_CA"; UErrorCode status = U_ZERO_ERROR; std::unique_ptr<icu::Collator> coll( @@ -567,8 +567,8 @@ TEST(CollatorInterfaceICUTest, DifferentEmbeddedInvalidSequencesAndDifferentFina } TEST(CollatorInterfaceICUTest, ComparisonKeysForEnUsCollatorCorrect) { - Collation collationSpec; - collationSpec.setLocale("en_US"); + CollationSpec collationSpec; + collationSpec.localeID = "en_US"; UErrorCode status = U_ZERO_ERROR; std::unique_ptr<icu::Collator> coll( icu::Collator::createInstance(icu::Locale("en", "US"), status)); @@ -581,8 +581,8 @@ TEST(CollatorInterfaceICUTest, ComparisonKeysForEnUsCollatorCorrect) { } TEST(CollatorInterfaceICUTest, ComparisonKeysForFrCaCollatorCorrect) { - Collation collationSpec; - collationSpec.setLocale("fr_CA"); + CollationSpec collationSpec; + collationSpec.localeID = "fr_CA"; UErrorCode status = U_ZERO_ERROR; std::unique_ptr<icu::Collator> coll( icu::Collator::createInstance(icu::Locale("fr", "CA"), status)); diff --git a/src/mongo/db/query/collation/collator_interface_mock.cpp b/src/mongo/db/query/collation/collator_interface_mock.cpp index ef2d50bca79..1c896c9f9eb 100644 --- a/src/mongo/db/query/collation/collator_interface_mock.cpp +++ b/src/mongo/db/query/collation/collator_interface_mock.cpp @@ -57,16 +57,8 @@ std::string mockTypeToString(CollatorInterfaceMock::MockType type) { } // namespace -Collation makeCollation(StringData locale, StringData version) { - Collation collation(locale.toString()); - // "backwards" is optional. The ICU collator always sets it to true/false based on the locale. - collation.setBackwards(false); - collation.setVersion(version); - return collation; -} - CollatorInterfaceMock::CollatorInterfaceMock(MockType mockType) - : CollatorInterface(makeCollation(mockTypeToString(mockType), "mock_version")), + : CollatorInterface(CollationSpec(mockTypeToString(mockType), "mock_version")), _mockType(mockType) {} std::unique_ptr<CollatorInterface> CollatorInterfaceMock::clone() const { diff --git a/src/mongo/db/query/collation/collator_interface_mock_test.cpp b/src/mongo/db/query/collation/collator_interface_mock_test.cpp index 9e5994f4f30..340e9690ef6 100644 --- a/src/mongo/db/query/collation/collator_interface_mock_test.cpp +++ b/src/mongo/db/query/collation/collator_interface_mock_test.cpp @@ -191,9 +191,9 @@ TEST(CollatorInterfaceMockSelfTest, MockCollatorReportsMockVersionString) { CollatorInterfaceMock reverseCollator(CollatorInterfaceMock::MockType::kReverseString); CollatorInterfaceMock alwaysEqualCollator(CollatorInterfaceMock::MockType::kAlwaysEqual); CollatorInterfaceMock toLowerCollator(CollatorInterfaceMock::MockType::kToLowerString); - ASSERT_EQ(*reverseCollator.getSpec().getVersion(), "mock_version"); - ASSERT_EQ(*alwaysEqualCollator.getSpec().getVersion(), "mock_version"); - ASSERT_EQ(*toLowerCollator.getSpec().getVersion(), "mock_version"); + ASSERT_EQ(reverseCollator.getSpec().version, "mock_version"); + ASSERT_EQ(alwaysEqualCollator.getSpec().version, "mock_version"); + ASSERT_EQ(toLowerCollator.getSpec().version, "mock_version"); } TEST(CollatorInterfaceMockSelfTest, StringsAreHashedWithRespectToCollation) { diff --git a/src/mongo/idl/basic_types.idl b/src/mongo/idl/basic_types.idl index c82abccf4f7..edd724d10c8 100644 --- a/src/mongo/idl/basic_types.idl +++ b/src/mongo/idl/basic_types.idl @@ -263,29 +263,19 @@ enums: kLower: lower kOff: off - CollationStrength: - description: Controls the set of characteristics used to compare strings. - type: int - values: - kPrimary: 1 - kSecondary: 2 - kTertiary: 3 - kQuaternary: 4 - kIdentical: 5 - CollationAlternate: description: Whether collation should consider whitespace and punctuation as base characters for purposes of comparison. type: string values: - kNonIgnorable: non-ignorable - kShifted: shifted + kAlternateNonIgnorable: non-ignorable + kAlternateShifted: shifted CollationMaxVariable: description: Up to which characters are considered ignorable when alternate is "shifted". type: string values: - kPunct: punct - kSpace: space + kMaxVariablePunct: punct + kMaxVariableSpace: space structs: OkReply: @@ -354,43 +344,31 @@ structs: fields: locale: type: string - # Turns case sensitivity on at strength 1 or 2. caseLevel: type: bool - default: false + optional: true caseFirst: type: CollationCaseFirst - default: kOff - # For backwards-compatibility, we must accept longs, ints, and doubles, so we cannot - # use the int-typed CollationStrength enum directly. + optional: true strength: - type: safeInt - default: static_cast<int>(CollationStrength::kTertiary) + type: exactInt64 + optional: true validator: { gte: 0, lte: 5 } - # Order numbers based on numerical order and not lexicographic order. numericOrdering: type: bool - default: false + optional: true alternate: type: CollationAlternate - default: kNonIgnorable + optional: true maxVariable: type: CollationMaxVariable - default: kPunct - # Any language that uses multiple combining characters such as Arabic, ancient Greek, - # Hebrew, Hindi, Thai or Vietnamese either requires Normalization Checking to be on, or - # the text to go through a normalization process before collation. + optional: true normalization: type: bool - default: false - # Causes accent differences to be considered in reverse order, as it is done in the - # French language. + optional: true backwards: type: bool optional: true - # Indicates the version of the collator. It is used to ensure that we do not mix - # versions by, for example, constructing an index with one version of ICU and then - # attempting to use this index with a server that is built against a newer ICU version. version: type: string optional: true |