diff options
author | David Storch <david.storch@10gen.com> | 2016-02-25 18:10:44 -0500 |
---|---|---|
committer | David Storch <david.storch@10gen.com> | 2016-03-09 15:50:28 -0500 |
commit | 937462b0017316587c8856a5e6a0d1d83418ef36 (patch) | |
tree | 146d74a3257d9b5936f8f7303f27341655d721b4 | |
parent | e300d62fdc1af32c943e54dfb5e43b9fa2c8bb75 (diff) | |
download | mongo-937462b0017316587c8856a5e6a0d1d83418ef36.tar.gz |
SERVER-22738 add CollatorInterface::getComparisonKey()
-rw-r--r-- | src/mongo/db/query/collation/SConscript | 10 | ||||
-rw-r--r-- | src/mongo/db/query/collation/collation_serializer.cpp (renamed from src/mongo/db/query/collation/collation_spec_serializer.cpp) | 17 | ||||
-rw-r--r-- | src/mongo/db/query/collation/collation_serializer.h (renamed from src/mongo/db/query/collation/collation_spec_serializer.h) | 26 | ||||
-rw-r--r-- | src/mongo/db/query/collation/collation_serializer_test.cpp (renamed from src/mongo/db/query/collation/collation_spec_serializer_test.cpp) | 38 | ||||
-rw-r--r-- | src/mongo/db/query/collation/collation_spec.h | 13 | ||||
-rw-r--r-- | src/mongo/db/query/collation/collator_factory_icu_test.cpp | 21 | ||||
-rw-r--r-- | src/mongo/db/query/collation/collator_interface.h | 53 | ||||
-rw-r--r-- | src/mongo/db/query/collation/collator_interface_icu.cpp | 45 | ||||
-rw-r--r-- | src/mongo/db/query/collation/collator_interface_icu.h | 2 | ||||
-rw-r--r-- | src/mongo/db/query/collation/collator_interface_icu_test.cpp | 230 | ||||
-rw-r--r-- | src/third_party/icu4c-56.1/source/SConscript | 2 |
11 files changed, 414 insertions, 43 deletions
diff --git a/src/mongo/db/query/collation/SConscript b/src/mongo/db/query/collation/SConscript index 0fad3d2bb8d..7a65216f7c2 100644 --- a/src/mongo/db/query/collation/SConscript +++ b/src/mongo/db/query/collation/SConscript @@ -33,9 +33,9 @@ env.CppUnitTest( ) env.Library( - target="collation_spec_serializer", + target="collation_serializer", source=[ - "collation_spec_serializer.cpp", + "collation_serializer.cpp", ], LIBDEPS=[ "$BUILD_DIR/mongo/base", @@ -44,12 +44,12 @@ env.Library( ) env.CppUnitTest( - target="collation_spec_serializer_test", + target="collation_serializer_test", source=[ - "collation_spec_serializer_test.cpp", + "collation_serializer_test.cpp", ], LIBDEPS=[ - "collation_spec_serializer", + "collation_serializer", ], ) diff --git a/src/mongo/db/query/collation/collation_spec_serializer.cpp b/src/mongo/db/query/collation/collation_serializer.cpp index 33975e2bb9d..e5d38bba7cd 100644 --- a/src/mongo/db/query/collation/collation_spec_serializer.cpp +++ b/src/mongo/db/query/collation/collation_serializer.cpp @@ -28,14 +28,16 @@ #include "mongo/platform/basic.h" -#include "mongo/db/query/collation/collation_spec_serializer.h" +#include "mongo/db/query/collation/collation_serializer.h" +#include "mongo/base/string_data.h" #include "mongo/bson/bsonobj.h" #include "mongo/bson/bsonobjbuilder.h" +#include "mongo/db/query/collation/collation_spec.h" namespace mongo { -BSONObj CollationSpecSerializer::toBSON(const CollationSpec& spec) { +BSONObj CollationSerializer::specToBSON(const CollationSpec& spec) { BSONObjBuilder builder; builder.append(CollationSpec::kLocaleField, spec.localeID); builder.append(CollationSpec::kCaseLevelField, spec.caseLevel); @@ -84,4 +86,15 @@ BSONObj CollationSpecSerializer::toBSON(const CollationSpec& spec) { return builder.obj(); } +// TODO SERVER-22372: Add test coverage for this method once the CollatorInterfaceMock is +// implemented. +void CollationSerializer::appendCollationKey(StringData fieldName, + const CollatorInterface::ComparisonKey& key, + BSONObjBuilder* bob) { + const auto keyData = key.getKeyData(); + // 'keyData' should not contain a trailing null byte, but the BSONObjBuilder will add one after + // appending the string. + bob->append(fieldName, keyData); +} + } // namespace mongo diff --git a/src/mongo/db/query/collation/collation_spec_serializer.h b/src/mongo/db/query/collation/collation_serializer.h index 774399610af..4a273abd451 100644 --- a/src/mongo/db/query/collation/collation_spec_serializer.h +++ b/src/mongo/db/query/collation/collation_serializer.h @@ -28,19 +28,35 @@ #pragma once -#include "mongo/db/query/collation/collation_spec.h" +#include "mongo/db/query/collation/collator_interface.h" namespace mongo { class BSONObj; +class BSONObjBuilder; +class StringData; -class CollationSpecSerializer { +struct CollationSpec; + +/** + * Provides functions for serializing collation-related objects. + */ +class CollationSerializer { public: /** - * Converts 'spec' to its BSONObj representation. The resulting BSON can be stored and later - * used to recreate the corresponding CollatorInterface. + * Converts CollationSpec 'spec' to its BSONObj representation. The resulting BSON can be stored + * and later used to recreate the corresponding CollatorInterface. + * + * The resulting BSONObj is owned by the caller. + */ + static BSONObj specToBSON(const CollationSpec& spec); + + /** + * Appends 'key' to 'bob' as a BSONElement of BSONType string with field name 'fieldName'. */ - static BSONObj toBSON(const CollationSpec& spec); + static void appendCollationKey(StringData fieldName, + const CollatorInterface::ComparisonKey& key, + BSONObjBuilder* bob); }; } // namespace mongo diff --git a/src/mongo/db/query/collation/collation_spec_serializer_test.cpp b/src/mongo/db/query/collation/collation_serializer_test.cpp index b12b3d0e27f..9fa9592b5b2 100644 --- a/src/mongo/db/query/collation/collation_spec_serializer_test.cpp +++ b/src/mongo/db/query/collation/collation_serializer_test.cpp @@ -28,7 +28,7 @@ #include "mongo/platform/basic.h" -#include "mongo/db/query/collation/collation_spec_serializer.h" +#include "mongo/db/query/collation/collation_serializer.h" #include "mongo/bson/bsonobjbuilder.h" #include "mongo/unittest/unittest.h" @@ -37,7 +37,7 @@ namespace { using namespace mongo; -TEST(CollationSpecSerializerTest, ToBSONCorrectlySerializesDefaults) { +TEST(CollationSerializerTest, ToBSONCorrectlySerializesDefaults) { CollationSpec collationSpec; collationSpec.localeID = "myLocale"; @@ -51,10 +51,10 @@ TEST(CollationSpecSerializerTest, ToBSONCorrectlySerializesDefaults) { << "punct" << "normalization" << false << "backwards" << false); - ASSERT_EQ(expectedObj, CollationSpecSerializer::toBSON(collationSpec)); + ASSERT_EQ(expectedObj, CollationSerializer::specToBSON(collationSpec)); } -TEST(CollationSpecSerializerTest, ToBSONCorrectlySerializesCaseFirstUpper) { +TEST(CollationSerializerTest, ToBSONCorrectlySerializesCaseFirstUpper) { CollationSpec collationSpec; collationSpec.localeID = "myLocale"; collationSpec.caseFirst = CollationSpec::CaseFirstType::kUpper; @@ -69,10 +69,10 @@ TEST(CollationSpecSerializerTest, ToBSONCorrectlySerializesCaseFirstUpper) { << "punct" << "normalization" << false << "backwards" << false); - ASSERT_EQ(expectedObj, CollationSpecSerializer::toBSON(collationSpec)); + ASSERT_EQ(expectedObj, CollationSerializer::specToBSON(collationSpec)); } -TEST(CollationSpecSerializerTest, ToBSONCorrectlySerializesCaseFirstLower) { +TEST(CollationSerializerTest, ToBSONCorrectlySerializesCaseFirstLower) { CollationSpec collationSpec; collationSpec.localeID = "myLocale"; collationSpec.caseFirst = CollationSpec::CaseFirstType::kLower; @@ -87,10 +87,10 @@ TEST(CollationSpecSerializerTest, ToBSONCorrectlySerializesCaseFirstLower) { << "punct" << "normalization" << false << "backwards" << false); - ASSERT_EQ(expectedObj, CollationSpecSerializer::toBSON(collationSpec)); + ASSERT_EQ(expectedObj, CollationSerializer::specToBSON(collationSpec)); } -TEST(CollationSpecSerializerTest, ToBSONCorrectlySerializesPrimaryStrength) { +TEST(CollationSerializerTest, ToBSONCorrectlySerializesPrimaryStrength) { CollationSpec collationSpec; collationSpec.localeID = "myLocale"; collationSpec.strength = CollationSpec::StrengthType::kPrimary; @@ -105,10 +105,10 @@ TEST(CollationSpecSerializerTest, ToBSONCorrectlySerializesPrimaryStrength) { << "punct" << "normalization" << false << "backwards" << false); - ASSERT_EQ(expectedObj, CollationSpecSerializer::toBSON(collationSpec)); + ASSERT_EQ(expectedObj, CollationSerializer::specToBSON(collationSpec)); } -TEST(CollationSpecSerializerTest, ToBSONCorrectlySerializesSecondaryStrength) { +TEST(CollationSerializerTest, ToBSONCorrectlySerializesSecondaryStrength) { CollationSpec collationSpec; collationSpec.localeID = "myLocale"; collationSpec.strength = CollationSpec::StrengthType::kSecondary; @@ -123,10 +123,10 @@ TEST(CollationSpecSerializerTest, ToBSONCorrectlySerializesSecondaryStrength) { << "punct" << "normalization" << false << "backwards" << false); - ASSERT_EQ(expectedObj, CollationSpecSerializer::toBSON(collationSpec)); + ASSERT_EQ(expectedObj, CollationSerializer::specToBSON(collationSpec)); } -TEST(CollationSpecSerializerTest, ToBSONCorrectlySerializesQuaternaryStrength) { +TEST(CollationSerializerTest, ToBSONCorrectlySerializesQuaternaryStrength) { CollationSpec collationSpec; collationSpec.localeID = "myLocale"; collationSpec.strength = CollationSpec::StrengthType::kQuaternary; @@ -141,10 +141,10 @@ TEST(CollationSpecSerializerTest, ToBSONCorrectlySerializesQuaternaryStrength) { << "punct" << "normalization" << false << "backwards" << false); - ASSERT_EQ(expectedObj, CollationSpecSerializer::toBSON(collationSpec)); + ASSERT_EQ(expectedObj, CollationSerializer::specToBSON(collationSpec)); } -TEST(CollationSpecSerializerTest, ToBSONCorrectlySerializesIdenticalStrength) { +TEST(CollationSerializerTest, ToBSONCorrectlySerializesIdenticalStrength) { CollationSpec collationSpec; collationSpec.localeID = "myLocale"; collationSpec.strength = CollationSpec::StrengthType::kIdentical; @@ -159,10 +159,10 @@ TEST(CollationSpecSerializerTest, ToBSONCorrectlySerializesIdenticalStrength) { << "punct" << "normalization" << false << "backwards" << false); - ASSERT_EQ(expectedObj, CollationSpecSerializer::toBSON(collationSpec)); + ASSERT_EQ(expectedObj, CollationSerializer::specToBSON(collationSpec)); } -TEST(CollationSpecSerializerTest, ToBSONCorrectlySerializesAlternateShifted) { +TEST(CollationSerializerTest, ToBSONCorrectlySerializesAlternateShifted) { CollationSpec collationSpec; collationSpec.localeID = "myLocale"; collationSpec.alternate = CollationSpec::AlternateType::kShifted; @@ -177,10 +177,10 @@ TEST(CollationSpecSerializerTest, ToBSONCorrectlySerializesAlternateShifted) { << "punct" << "normalization" << false << "backwards" << false); - ASSERT_EQ(expectedObj, CollationSpecSerializer::toBSON(collationSpec)); + ASSERT_EQ(expectedObj, CollationSerializer::specToBSON(collationSpec)); } -TEST(CollationSpecSerializerTest, ToBSONCorrectlySerializesMaxVariableSpace) { +TEST(CollationSerializerTest, ToBSONCorrectlySerializesMaxVariableSpace) { CollationSpec collationSpec; collationSpec.localeID = "myLocale"; collationSpec.maxVariable = CollationSpec::MaxVariableType::kSpace; @@ -195,7 +195,7 @@ TEST(CollationSpecSerializerTest, ToBSONCorrectlySerializesMaxVariableSpace) { << "space" << "normalization" << false << "backwards" << false); - ASSERT_EQ(expectedObj, CollationSpecSerializer::toBSON(collationSpec)); + ASSERT_EQ(expectedObj, CollationSerializer::specToBSON(collationSpec)); } } // namespace diff --git a/src/mongo/db/query/collation/collation_spec.h b/src/mongo/db/query/collation/collation_spec.h index d92621164be..63304c3393a 100644 --- a/src/mongo/db/query/collation/collation_spec.h +++ b/src/mongo/db/query/collation/collation_spec.h @@ -34,7 +34,7 @@ namespace mongo { /** * A CollationSpec is a parsed representation of a user-provided collation BSONObj. Can be - * re-serialized to BSON using the CollationSpecSerializer. + * re-serialized to BSON using CollationSerializer. */ struct CollationSpec { // Controls whether uppercase sorts before lowercase or vice versa. @@ -110,6 +110,17 @@ struct CollationSpec { static const char* kMaxVariablePunct; static const char* kMaxVariableSpace; + /** + * Constructs a CollationSpec with no locale, where all other fields have their default values. + */ + CollationSpec() = default; + + /** + * Constructs a CollationSpec for the given locale, where all other fields have their default + * values. + */ + CollationSpec(std::string locale) : localeID(std::move(locale)) {} + // A string such as "en_US", identifying the language, country, or other attributes of the // locale for this collation. // Required. diff --git a/src/mongo/db/query/collation/collator_factory_icu_test.cpp b/src/mongo/db/query/collation/collator_factory_icu_test.cpp index c47d065f5cf..cd4fca42430 100644 --- a/src/mongo/db/query/collation/collator_factory_icu_test.cpp +++ b/src/mongo/db/query/collation/collator_factory_icu_test.cpp @@ -660,4 +660,25 @@ TEST(CollatorFactoryICUTest, SecondaryStrengthBackwardsTrue) { // u8"\u00E1" is latin small letter a with acute. ASSERT_GT(collator.getValue()->compare(u8"a\u00E1", u8"\u00E1a"), 0); } + +TEST(CollatorInterfaceICUTest, FactoryMadeCollatorComparisonKeysCorrectEnUS) { + CollatorFactoryICU factory; + auto collator = factory.makeFromBSON(BSON("locale" + << "en_US")); + ASSERT_OK(collator.getStatus()); + const auto comparisonKeyAB = collator.getValue()->getComparisonKey("ab"); + const auto comparisonKeyABB = collator.getValue()->getComparisonKey("abb"); + const auto comparisonKeyBA = collator.getValue()->getComparisonKey("ba"); + + ASSERT_LT(comparisonKeyAB.getKeyData().compare(comparisonKeyBA.getKeyData()), 0); + ASSERT_GT(comparisonKeyBA.getKeyData().compare(comparisonKeyAB.getKeyData()), 0); + ASSERT_EQ(comparisonKeyAB.getKeyData().compare(comparisonKeyAB.getKeyData()), 0); + + ASSERT_LT(comparisonKeyAB.getKeyData().compare(comparisonKeyABB.getKeyData()), 0); + ASSERT_GT(comparisonKeyABB.getKeyData().compare(comparisonKeyAB.getKeyData()), 0); + + ASSERT_GT(comparisonKeyBA.getKeyData().compare(comparisonKeyABB.getKeyData()), 0); + ASSERT_LT(comparisonKeyABB.getKeyData().compare(comparisonKeyBA.getKeyData()), 0); +} + } // namespace diff --git a/src/mongo/db/query/collation/collator_interface.h b/src/mongo/db/query/collation/collator_interface.h index be857e10221..767eca2bea3 100644 --- a/src/mongo/db/query/collation/collator_interface.h +++ b/src/mongo/db/query/collation/collator_interface.h @@ -28,13 +28,14 @@ #pragma once +#include <string> + #include "mongo/base/disallow_copying.h" +#include "mongo/base/string_data.h" #include "mongo/db/query/collation/collation_spec.h" namespace mongo { -class StringData; - /** * An interface for ordering and matching according to a collation. Instances should be retrieved * from the CollatorFactoryInterface and may not be copied. @@ -42,15 +43,46 @@ class StringData; * All methods are thread-safe. * * Does not throw exceptions. - * - * TODO SERVER-22738: Extend interface with a getComparisonKey() method and implement a - * MongoDB-specific abstraction for a collator-generated comparison key. */ class CollatorInterface { MONGO_DISALLOW_COPYING(CollatorInterface); public: /** + * Every string has a corresponding ComparisonKey with respect to this collator. Two + * ComparisonKeys can be lexicographically ordered in order to obtain the collation's sort order + * and equivalence classes. + * + * A ComparisonKey is logically an owned array of bytes. It is cheap to move but potentially + * expensive to copy. + * + * ComparisonKeys may only be obtained via CollatorInterface::getComparisonKey(). + * + * In general, two strings should be compared with respect to a collation using + * CollatorInterface::compare(). ComparisonKey::compare() may be faster if repeatedly comparing + * the same string(s). + */ + class ComparisonKey { + public: + /** + * Returns the underlying byte array represented by this ComparisonKey. + * + * The returned StringData may not outlive the ComparisonKey used to create it, since the + * ComparisonKey owns the underlying byte array. + */ + StringData getKeyData() const { + return StringData(_key); + } + + private: + friend class CollatorInterface; + + ComparisonKey(std::string key) : _key(std::move(key)) {} + + std::string _key; + }; + + /** * Constructs a CollatorInterface capable of computing the collation described by 'spec'. */ CollatorInterface(CollationSpec spec) : _spec(std::move(spec)) {} @@ -65,6 +97,12 @@ public: virtual int compare(StringData left, StringData right) = 0; /** + * Returns the comparison key for 'stringData', according to this collation. See ComparisonKey's + * comments for details. + */ + virtual ComparisonKey getComparisonKey(StringData stringData) = 0; + + /** * Returns whether this collation has the same matching and sorting semantics as 'other'. */ bool operator==(const CollatorInterface& other) const { @@ -86,6 +124,11 @@ public: return _spec; } +protected: + static ComparisonKey makeComparisonKey(std::string key) { + return ComparisonKey(std::move(key)); + } + private: const CollationSpec _spec; }; diff --git a/src/mongo/db/query/collation/collator_interface_icu.cpp b/src/mongo/db/query/collation/collator_interface_icu.cpp index d5a698e924b..0da6579a707 100644 --- a/src/mongo/db/query/collation/collator_interface_icu.cpp +++ b/src/mongo/db/query/collation/collator_interface_icu.cpp @@ -30,6 +30,8 @@ #include "mongo/db/query/collation/collator_interface_icu.h" +#include <unicode/sortkey.h> + #include "mongo/util/assert_util.h" namespace mongo { @@ -39,14 +41,47 @@ CollatorInterfaceICU::CollatorInterfaceICU(CollationSpec spec, : CollatorInterface(std::move(spec)), _collator(std::move(collator)) {} int CollatorInterfaceICU::compare(StringData left, StringData right) { - // TODO: What happens if 'status' is a failure code? In what circumstances could this happen? + // TODO SERVER-23028: What happens if 'status' is a failure code? In what circumstances could + // this happen? UErrorCode status = U_ZERO_ERROR; - auto compareResult = _collator->compare(icu::UnicodeString(left.rawData(), left.size()), - icu::UnicodeString(right.rawData(), right.size()), - status); + auto compareResult = _collator->compareUTF8(icu::StringPiece(left.rawData(), left.size()), + icu::StringPiece(right.rawData(), right.size()), + status); invariant(U_SUCCESS(status)); - return compareResult; + switch (compareResult) { + case UCOL_EQUAL: + return 0; + case UCOL_GREATER: + return 1; + case UCOL_LESS: + return -1; + } + + MONGO_UNREACHABLE; +} + +CollatorInterface::ComparisonKey CollatorInterfaceICU::getComparisonKey(StringData stringData) { + // A StringPiece is ICU's StringData. They are logically the same abstraction. + const icu::StringPiece stringPiece(stringData.rawData(), stringData.size()); + + // TODO SERVER-23028: What happens if 'status' is a failure code? In what circumstances could + // this happen? + UErrorCode status = U_ZERO_ERROR; + icu::CollationKey icuKey; + _collator->getCollationKey(icu::UnicodeString::fromUTF8(stringPiece), icuKey, status); + invariant(U_SUCCESS(status)); + + int32_t keyLength; + const uint8_t* keyBuffer = icuKey.getByteArray(keyLength); + invariant(keyLength > 0); + invariant(keyBuffer); + + // The last byte of the sort key should always be null. When we construct the comparison key, we + // omit the trailing null byte. + invariant(keyBuffer[keyLength - 1u] == '\0'); + const char* charBuffer = reinterpret_cast<const char*>(keyBuffer); + return makeComparisonKey(std::string(charBuffer, keyLength - 1u)); } } // namespace mongo diff --git a/src/mongo/db/query/collation/collator_interface_icu.h b/src/mongo/db/query/collation/collator_interface_icu.h index 19d73c3134e..c52de3f0b94 100644 --- a/src/mongo/db/query/collation/collator_interface_icu.h +++ b/src/mongo/db/query/collation/collator_interface_icu.h @@ -45,6 +45,8 @@ public: int compare(StringData left, StringData right) final; + ComparisonKey getComparisonKey(StringData stringData) final; + private: // The ICU implementation of the collator to which we delegate interesting work. Const methods // on the ICU collator are expected to be thread-safe. diff --git a/src/mongo/db/query/collation/collator_interface_icu_test.cpp b/src/mongo/db/query/collation/collator_interface_icu_test.cpp index 969ddb30c75..bf74b9a74b5 100644 --- a/src/mongo/db/query/collation/collator_interface_icu_test.cpp +++ b/src/mongo/db/query/collation/collator_interface_icu_test.cpp @@ -69,6 +69,188 @@ TEST(CollatorInterfaceICUTest, ASCIIComparisonWorksUsingLocaleStringParsing) { ASSERT_EQ(icuCollator.compare("ab", "ab"), 0); } +TEST(CollatorInterfaceICUTest, ASCIIComparisonWorksUsingComparisonKeys) { + CollationSpec collationSpec; + collationSpec.localeID = "en_US"; + + auto locale = icu::Locale::createFromName(collationSpec.localeID.c_str()); + ASSERT_EQ(std::string("en"), locale.getLanguage()); + ASSERT_EQ(std::string("US"), locale.getCountry()); + + UErrorCode status = U_ZERO_ERROR; + std::unique_ptr<icu::Collator> coll(icu::Collator::createInstance(locale, status)); + ASSERT(U_SUCCESS(status)); + + CollatorInterfaceICU icuCollator(collationSpec, std::move(coll)); + const auto comparisonKeyAB = icuCollator.getComparisonKey("ab"); + const auto comparisonKeyABB = icuCollator.getComparisonKey("abb"); + const auto comparisonKeyBA = icuCollator.getComparisonKey("ba"); + + ASSERT_LT(comparisonKeyAB.getKeyData().compare(comparisonKeyBA.getKeyData()), 0); + ASSERT_GT(comparisonKeyBA.getKeyData().compare(comparisonKeyAB.getKeyData()), 0); + ASSERT_EQ(comparisonKeyAB.getKeyData().compare(comparisonKeyAB.getKeyData()), 0); + + ASSERT_LT(comparisonKeyAB.getKeyData().compare(comparisonKeyABB.getKeyData()), 0); + ASSERT_GT(comparisonKeyABB.getKeyData().compare(comparisonKeyAB.getKeyData()), 0); + + ASSERT_GT(comparisonKeyBA.getKeyData().compare(comparisonKeyABB.getKeyData()), 0); + ASSERT_LT(comparisonKeyABB.getKeyData().compare(comparisonKeyBA.getKeyData()), 0); +} + +TEST(CollatorInterfaceICUTest, ZeroLengthStringsCompareCorrectly) { + CollationSpec collationSpec; + collationSpec.localeID = "en_US"; + + UErrorCode status = U_ZERO_ERROR; + std::unique_ptr<icu::Collator> coll( + icu::Collator::createInstance(icu::Locale("en", "US"), status)); + ASSERT(U_SUCCESS(status)); + + CollatorInterfaceICU icuCollator(collationSpec, std::move(coll)); + ASSERT_EQ(icuCollator.compare(StringData(), StringData()), 0); + ASSERT_LT(icuCollator.compare(StringData(), "abc"), 0); + ASSERT_GT(icuCollator.compare("abc", StringData()), 0); +} + +TEST(CollatorInterfaceICUTest, ZeroLengthStringsCompareCorrectlyUsingComparisonKeys) { + CollationSpec collationSpec; + collationSpec.localeID = "en_US"; + + UErrorCode status = U_ZERO_ERROR; + std::unique_ptr<icu::Collator> coll( + icu::Collator::createInstance(icu::Locale("en", "US"), status)); + ASSERT(U_SUCCESS(status)); + + CollatorInterfaceICU icuCollator(collationSpec, std::move(coll)); + auto emptyKey = icuCollator.getComparisonKey(StringData()); + auto comparisonKeyABC = icuCollator.getComparisonKey("abc"); + ASSERT_EQ(emptyKey.getKeyData().compare(emptyKey.getKeyData()), 0); + ASSERT_LT(emptyKey.getKeyData().compare(comparisonKeyABC.getKeyData()), 0); + ASSERT_GT(comparisonKeyABC.getKeyData().compare(emptyKey.getKeyData()), 0); +} + +TEST(CollatorInterfaceICUTest, EmptyNullTerminatedStringComparesCorrectly) { + CollationSpec collationSpec; + collationSpec.localeID = "en_US"; + + UErrorCode status = U_ZERO_ERROR; + std::unique_ptr<icu::Collator> coll( + icu::Collator::createInstance(icu::Locale("en", "US"), status)); + ASSERT(U_SUCCESS(status)); + + StringData emptyString(""); + ASSERT(emptyString.rawData()); + ASSERT_EQ(emptyString.size(), 0u); + + CollatorInterfaceICU icuCollator(collationSpec, std::move(coll)); + ASSERT_EQ(icuCollator.compare(emptyString, emptyString), 0); + ASSERT_LT(icuCollator.compare(emptyString, "abc"), 0); + ASSERT_GT(icuCollator.compare("abc", emptyString), 0); +} + +TEST(CollatorInterfaceICUTest, EmptyNullTerminatedStringComparesCorrectlyUsingComparisonKeys) { + CollationSpec collationSpec; + collationSpec.localeID = "en_US"; + + UErrorCode status = U_ZERO_ERROR; + std::unique_ptr<icu::Collator> coll( + icu::Collator::createInstance(icu::Locale("en", "US"), status)); + ASSERT(U_SUCCESS(status)); + + StringData emptyString(""); + ASSERT(emptyString.rawData()); + ASSERT_EQ(emptyString.size(), 0u); + + CollatorInterfaceICU icuCollator(collationSpec, std::move(coll)); + auto emptyKey = icuCollator.getComparisonKey(emptyString); + auto comparisonKeyABC = icuCollator.getComparisonKey("abc"); + ASSERT_EQ(emptyKey.getKeyData().compare(emptyKey.getKeyData()), 0); + ASSERT_LT(emptyKey.getKeyData().compare(comparisonKeyABC.getKeyData()), 0); + ASSERT_GT(comparisonKeyABC.getKeyData().compare(emptyKey.getKeyData()), 0); +} + +TEST(CollatorInterfaceICUTest, LengthOneStringWithNullByteComparesCorrectly) { + CollationSpec collationSpec; + collationSpec.localeID = "en_US"; + + UErrorCode status = U_ZERO_ERROR; + std::unique_ptr<icu::Collator> coll( + icu::Collator::createInstance(icu::Locale("en", "US"), status)); + ASSERT(U_SUCCESS(status)); + + StringData nullByte("\0", StringData::LiteralTag()); + ASSERT_EQ(nullByte.rawData()[0], '\0'); + ASSERT_EQ(nullByte.size(), 1u); + + CollatorInterfaceICU icuCollator(collationSpec, std::move(coll)); + ASSERT_EQ(icuCollator.compare(nullByte, nullByte), 0); + ASSERT_LT(icuCollator.compare(nullByte, "abc"), 0); + ASSERT_GT(icuCollator.compare("abc", nullByte), 0); +} + +TEST(CollatorInterfaceICUTest, LengthOneStringWithNullByteComparesCorrectlyUsingComparisonKeys) { + CollationSpec collationSpec; + collationSpec.localeID = "en_US"; + + UErrorCode status = U_ZERO_ERROR; + std::unique_ptr<icu::Collator> coll( + icu::Collator::createInstance(icu::Locale("en", "US"), status)); + ASSERT(U_SUCCESS(status)); + + StringData nullByte("\0", StringData::LiteralTag()); + ASSERT_EQ(nullByte.rawData()[0], '\0'); + ASSERT_EQ(nullByte.size(), 1u); + + CollatorInterfaceICU icuCollator(collationSpec, std::move(coll)); + auto nullByteKey = icuCollator.getComparisonKey(nullByte); + auto comparisonKeyABC = icuCollator.getComparisonKey("abc"); + ASSERT_EQ(nullByteKey.getKeyData().compare(nullByteKey.getKeyData()), 0); + ASSERT_LT(nullByteKey.getKeyData().compare(comparisonKeyABC.getKeyData()), 0); + ASSERT_GT(comparisonKeyABC.getKeyData().compare(nullByteKey.getKeyData()), 0); +} + +TEST(CollatorInterfaceICUTest, StringsWithEmbeddedNullByteCompareCorrectly) { + CollationSpec collationSpec; + collationSpec.localeID = "en_US"; + + UErrorCode status = U_ZERO_ERROR; + std::unique_ptr<icu::Collator> coll( + icu::Collator::createInstance(icu::Locale("en", "US"), status)); + ASSERT(U_SUCCESS(status)); + + StringData string1("a\0b", StringData::LiteralTag()); + ASSERT_EQ(string1.size(), 3u); + StringData string2("a\0c", StringData::LiteralTag()); + ASSERT_EQ(string2.size(), 3u); + + CollatorInterfaceICU icuCollator(collationSpec, std::move(coll)); + ASSERT_EQ(icuCollator.compare(string1, string1), 0); + ASSERT_LT(icuCollator.compare(string1, string2), 0); + ASSERT_GT(icuCollator.compare(string2, string1), 0); +} + +TEST(CollatorInterfaceICUTest, StringsWithEmbeddedNullByteCompareCorrectlyUsingComparisonKeys) { + CollationSpec collationSpec; + collationSpec.localeID = "en_US"; + + UErrorCode status = U_ZERO_ERROR; + std::unique_ptr<icu::Collator> coll( + icu::Collator::createInstance(icu::Locale("en", "US"), status)); + ASSERT(U_SUCCESS(status)); + + StringData string1("a\0b", StringData::LiteralTag()); + ASSERT_EQ(string1.size(), 3u); + StringData string2("a\0c", StringData::LiteralTag()); + ASSERT_EQ(string2.size(), 3u); + + CollatorInterfaceICU icuCollator(collationSpec, std::move(coll)); + auto key1 = icuCollator.getComparisonKey(string1); + auto key2 = icuCollator.getComparisonKey(string2); + ASSERT_EQ(key1.getKeyData().compare(key1.getKeyData()), 0); + ASSERT_LT(key1.getKeyData().compare(key2.getKeyData()), 0); + ASSERT_GT(key2.getKeyData().compare(key1.getKeyData()), 0); +} + TEST(CollatorInterfaceICUTest, TwoUSEnglishCollationsAreEqual) { CollationSpec collationSpec; collationSpec.localeID = "en_US"; @@ -109,4 +291,52 @@ TEST(CollatorInterfaceICUTest, USEnglishAndBritishEnglishCollationsAreNotEqual) ASSERT_TRUE(icuCollator1 != icuCollator2); } +TEST(CollatorInterfaceICUTest, FrenchCanadianCollatorComparesCorrectly) { + CollationSpec collationSpec; + collationSpec.localeID = "fr_CA"; + + UErrorCode status = U_ZERO_ERROR; + std::unique_ptr<icu::Collator> coll( + icu::Collator::createInstance(icu::Locale("fr", "CA"), status)); + ASSERT(U_SUCCESS(status)); + + CollatorInterfaceICU icuCollator(collationSpec, std::move(coll)); + + StringData circumflex(u8"p\u00EAche"); + StringData graveAndAcute(u8"p\u00E8ch\u00E9"); + StringData circumflexAndAcute(u8"p\u00EAch\u00E9"); + + ASSERT_LT(icuCollator.compare(circumflex, graveAndAcute), 0); + ASSERT_LT(icuCollator.compare(graveAndAcute, circumflexAndAcute), 0); + ASSERT_LT(icuCollator.compare(circumflex, circumflexAndAcute), 0); + + ASSERT_GT(icuCollator.compare(circumflexAndAcute, graveAndAcute), 0); + ASSERT_GT(icuCollator.compare(graveAndAcute, circumflex), 0); + ASSERT_GT(icuCollator.compare(circumflexAndAcute, circumflex), 0); +} + +TEST(CollatorInterfaceICUTest, FrenchCanadianCollatorComparesCorrectlyUsingComparisonKeys) { + CollationSpec collationSpec; + collationSpec.localeID = "fr_CA"; + + UErrorCode status = U_ZERO_ERROR; + std::unique_ptr<icu::Collator> coll( + icu::Collator::createInstance(icu::Locale("fr", "CA"), status)); + ASSERT(U_SUCCESS(status)); + + CollatorInterfaceICU icuCollator(collationSpec, std::move(coll)); + + auto circumflex = icuCollator.getComparisonKey(u8"p\u00EAche"); + auto graveAndAcute = icuCollator.getComparisonKey(u8"p\u00E8ch\u00E9"); + auto circumflexAndAcute = icuCollator.getComparisonKey(u8"p\u00EAch\u00E9"); + + ASSERT_LT(circumflex.getKeyData().compare(graveAndAcute.getKeyData()), 0); + ASSERT_LT(graveAndAcute.getKeyData().compare(circumflexAndAcute.getKeyData()), 0); + ASSERT_LT(circumflex.getKeyData().compare(circumflexAndAcute.getKeyData()), 0); + + ASSERT_GT(circumflexAndAcute.getKeyData().compare(graveAndAcute.getKeyData()), 0); + ASSERT_GT(graveAndAcute.getKeyData().compare(circumflex.getKeyData()), 0); + ASSERT_GT(circumflexAndAcute.getKeyData().compare(circumflex.getKeyData()), 0); +} + } // namespace diff --git a/src/third_party/icu4c-56.1/source/SConscript b/src/third_party/icu4c-56.1/source/SConscript index 1c9d3c713f8..1d4e95493f8 100644 --- a/src/third_party/icu4c-56.1/source/SConscript +++ b/src/third_party/icu4c-56.1/source/SConscript @@ -6,7 +6,7 @@ env = env.Clone() env.Append( CPPDEFINES=[ - 'U_CHARSET_ISUTF8=1', + 'U_CHARSET_IS_UTF8=1', 'U_I18N_IMPLEMENTATION=1', 'U_STATIC_IMPLEMENTATION=1', ], |