summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Storch <david.storch@10gen.com>2016-02-25 18:10:44 -0500
committerDavid Storch <david.storch@10gen.com>2016-03-09 15:50:28 -0500
commit937462b0017316587c8856a5e6a0d1d83418ef36 (patch)
tree146d74a3257d9b5936f8f7303f27341655d721b4
parente300d62fdc1af32c943e54dfb5e43b9fa2c8bb75 (diff)
downloadmongo-937462b0017316587c8856a5e6a0d1d83418ef36.tar.gz
SERVER-22738 add CollatorInterface::getComparisonKey()
-rw-r--r--src/mongo/db/query/collation/SConscript10
-rw-r--r--src/mongo/db/query/collation/collation_serializer.cpp (renamed from src/mongo/db/query/collation/collation_spec_serializer.cpp)17
-rw-r--r--src/mongo/db/query/collation/collation_serializer.h (renamed from src/mongo/db/query/collation/collation_spec_serializer.h)26
-rw-r--r--src/mongo/db/query/collation/collation_serializer_test.cpp (renamed from src/mongo/db/query/collation/collation_spec_serializer_test.cpp)38
-rw-r--r--src/mongo/db/query/collation/collation_spec.h13
-rw-r--r--src/mongo/db/query/collation/collator_factory_icu_test.cpp21
-rw-r--r--src/mongo/db/query/collation/collator_interface.h53
-rw-r--r--src/mongo/db/query/collation/collator_interface_icu.cpp45
-rw-r--r--src/mongo/db/query/collation/collator_interface_icu.h2
-rw-r--r--src/mongo/db/query/collation/collator_interface_icu_test.cpp230
-rw-r--r--src/third_party/icu4c-56.1/source/SConscript2
11 files changed, 414 insertions, 43 deletions
diff --git a/src/mongo/db/query/collation/SConscript b/src/mongo/db/query/collation/SConscript
index 0fad3d2bb8d..7a65216f7c2 100644
--- a/src/mongo/db/query/collation/SConscript
+++ b/src/mongo/db/query/collation/SConscript
@@ -33,9 +33,9 @@ env.CppUnitTest(
)
env.Library(
- target="collation_spec_serializer",
+ target="collation_serializer",
source=[
- "collation_spec_serializer.cpp",
+ "collation_serializer.cpp",
],
LIBDEPS=[
"$BUILD_DIR/mongo/base",
@@ -44,12 +44,12 @@ env.Library(
)
env.CppUnitTest(
- target="collation_spec_serializer_test",
+ target="collation_serializer_test",
source=[
- "collation_spec_serializer_test.cpp",
+ "collation_serializer_test.cpp",
],
LIBDEPS=[
- "collation_spec_serializer",
+ "collation_serializer",
],
)
diff --git a/src/mongo/db/query/collation/collation_spec_serializer.cpp b/src/mongo/db/query/collation/collation_serializer.cpp
index 33975e2bb9d..e5d38bba7cd 100644
--- a/src/mongo/db/query/collation/collation_spec_serializer.cpp
+++ b/src/mongo/db/query/collation/collation_serializer.cpp
@@ -28,14 +28,16 @@
#include "mongo/platform/basic.h"
-#include "mongo/db/query/collation/collation_spec_serializer.h"
+#include "mongo/db/query/collation/collation_serializer.h"
+#include "mongo/base/string_data.h"
#include "mongo/bson/bsonobj.h"
#include "mongo/bson/bsonobjbuilder.h"
+#include "mongo/db/query/collation/collation_spec.h"
namespace mongo {
-BSONObj CollationSpecSerializer::toBSON(const CollationSpec& spec) {
+BSONObj CollationSerializer::specToBSON(const CollationSpec& spec) {
BSONObjBuilder builder;
builder.append(CollationSpec::kLocaleField, spec.localeID);
builder.append(CollationSpec::kCaseLevelField, spec.caseLevel);
@@ -84,4 +86,15 @@ BSONObj CollationSpecSerializer::toBSON(const CollationSpec& spec) {
return builder.obj();
}
+// TODO SERVER-22372: Add test coverage for this method once the CollatorInterfaceMock is
+// implemented.
+void CollationSerializer::appendCollationKey(StringData fieldName,
+ const CollatorInterface::ComparisonKey& key,
+ BSONObjBuilder* bob) {
+ const auto keyData = key.getKeyData();
+ // 'keyData' should not contain a trailing null byte, but the BSONObjBuilder will add one after
+ // appending the string.
+ bob->append(fieldName, keyData);
+}
+
} // namespace mongo
diff --git a/src/mongo/db/query/collation/collation_spec_serializer.h b/src/mongo/db/query/collation/collation_serializer.h
index 774399610af..4a273abd451 100644
--- a/src/mongo/db/query/collation/collation_spec_serializer.h
+++ b/src/mongo/db/query/collation/collation_serializer.h
@@ -28,19 +28,35 @@
#pragma once
-#include "mongo/db/query/collation/collation_spec.h"
+#include "mongo/db/query/collation/collator_interface.h"
namespace mongo {
class BSONObj;
+class BSONObjBuilder;
+class StringData;
-class CollationSpecSerializer {
+struct CollationSpec;
+
+/**
+ * Provides functions for serializing collation-related objects.
+ */
+class CollationSerializer {
public:
/**
- * Converts 'spec' to its BSONObj representation. The resulting BSON can be stored and later
- * used to recreate the corresponding CollatorInterface.
+ * Converts CollationSpec 'spec' to its BSONObj representation. The resulting BSON can be stored
+ * and later used to recreate the corresponding CollatorInterface.
+ *
+ * The resulting BSONObj is owned by the caller.
+ */
+ static BSONObj specToBSON(const CollationSpec& spec);
+
+ /**
+ * Appends 'key' to 'bob' as a BSONElement of BSONType string with field name 'fieldName'.
*/
- static BSONObj toBSON(const CollationSpec& spec);
+ static void appendCollationKey(StringData fieldName,
+ const CollatorInterface::ComparisonKey& key,
+ BSONObjBuilder* bob);
};
} // namespace mongo
diff --git a/src/mongo/db/query/collation/collation_spec_serializer_test.cpp b/src/mongo/db/query/collation/collation_serializer_test.cpp
index b12b3d0e27f..9fa9592b5b2 100644
--- a/src/mongo/db/query/collation/collation_spec_serializer_test.cpp
+++ b/src/mongo/db/query/collation/collation_serializer_test.cpp
@@ -28,7 +28,7 @@
#include "mongo/platform/basic.h"
-#include "mongo/db/query/collation/collation_spec_serializer.h"
+#include "mongo/db/query/collation/collation_serializer.h"
#include "mongo/bson/bsonobjbuilder.h"
#include "mongo/unittest/unittest.h"
@@ -37,7 +37,7 @@ namespace {
using namespace mongo;
-TEST(CollationSpecSerializerTest, ToBSONCorrectlySerializesDefaults) {
+TEST(CollationSerializerTest, ToBSONCorrectlySerializesDefaults) {
CollationSpec collationSpec;
collationSpec.localeID = "myLocale";
@@ -51,10 +51,10 @@ TEST(CollationSpecSerializerTest, ToBSONCorrectlySerializesDefaults) {
<< "punct"
<< "normalization" << false << "backwards" << false);
- ASSERT_EQ(expectedObj, CollationSpecSerializer::toBSON(collationSpec));
+ ASSERT_EQ(expectedObj, CollationSerializer::specToBSON(collationSpec));
}
-TEST(CollationSpecSerializerTest, ToBSONCorrectlySerializesCaseFirstUpper) {
+TEST(CollationSerializerTest, ToBSONCorrectlySerializesCaseFirstUpper) {
CollationSpec collationSpec;
collationSpec.localeID = "myLocale";
collationSpec.caseFirst = CollationSpec::CaseFirstType::kUpper;
@@ -69,10 +69,10 @@ TEST(CollationSpecSerializerTest, ToBSONCorrectlySerializesCaseFirstUpper) {
<< "punct"
<< "normalization" << false << "backwards" << false);
- ASSERT_EQ(expectedObj, CollationSpecSerializer::toBSON(collationSpec));
+ ASSERT_EQ(expectedObj, CollationSerializer::specToBSON(collationSpec));
}
-TEST(CollationSpecSerializerTest, ToBSONCorrectlySerializesCaseFirstLower) {
+TEST(CollationSerializerTest, ToBSONCorrectlySerializesCaseFirstLower) {
CollationSpec collationSpec;
collationSpec.localeID = "myLocale";
collationSpec.caseFirst = CollationSpec::CaseFirstType::kLower;
@@ -87,10 +87,10 @@ TEST(CollationSpecSerializerTest, ToBSONCorrectlySerializesCaseFirstLower) {
<< "punct"
<< "normalization" << false << "backwards" << false);
- ASSERT_EQ(expectedObj, CollationSpecSerializer::toBSON(collationSpec));
+ ASSERT_EQ(expectedObj, CollationSerializer::specToBSON(collationSpec));
}
-TEST(CollationSpecSerializerTest, ToBSONCorrectlySerializesPrimaryStrength) {
+TEST(CollationSerializerTest, ToBSONCorrectlySerializesPrimaryStrength) {
CollationSpec collationSpec;
collationSpec.localeID = "myLocale";
collationSpec.strength = CollationSpec::StrengthType::kPrimary;
@@ -105,10 +105,10 @@ TEST(CollationSpecSerializerTest, ToBSONCorrectlySerializesPrimaryStrength) {
<< "punct"
<< "normalization" << false << "backwards" << false);
- ASSERT_EQ(expectedObj, CollationSpecSerializer::toBSON(collationSpec));
+ ASSERT_EQ(expectedObj, CollationSerializer::specToBSON(collationSpec));
}
-TEST(CollationSpecSerializerTest, ToBSONCorrectlySerializesSecondaryStrength) {
+TEST(CollationSerializerTest, ToBSONCorrectlySerializesSecondaryStrength) {
CollationSpec collationSpec;
collationSpec.localeID = "myLocale";
collationSpec.strength = CollationSpec::StrengthType::kSecondary;
@@ -123,10 +123,10 @@ TEST(CollationSpecSerializerTest, ToBSONCorrectlySerializesSecondaryStrength) {
<< "punct"
<< "normalization" << false << "backwards" << false);
- ASSERT_EQ(expectedObj, CollationSpecSerializer::toBSON(collationSpec));
+ ASSERT_EQ(expectedObj, CollationSerializer::specToBSON(collationSpec));
}
-TEST(CollationSpecSerializerTest, ToBSONCorrectlySerializesQuaternaryStrength) {
+TEST(CollationSerializerTest, ToBSONCorrectlySerializesQuaternaryStrength) {
CollationSpec collationSpec;
collationSpec.localeID = "myLocale";
collationSpec.strength = CollationSpec::StrengthType::kQuaternary;
@@ -141,10 +141,10 @@ TEST(CollationSpecSerializerTest, ToBSONCorrectlySerializesQuaternaryStrength) {
<< "punct"
<< "normalization" << false << "backwards" << false);
- ASSERT_EQ(expectedObj, CollationSpecSerializer::toBSON(collationSpec));
+ ASSERT_EQ(expectedObj, CollationSerializer::specToBSON(collationSpec));
}
-TEST(CollationSpecSerializerTest, ToBSONCorrectlySerializesIdenticalStrength) {
+TEST(CollationSerializerTest, ToBSONCorrectlySerializesIdenticalStrength) {
CollationSpec collationSpec;
collationSpec.localeID = "myLocale";
collationSpec.strength = CollationSpec::StrengthType::kIdentical;
@@ -159,10 +159,10 @@ TEST(CollationSpecSerializerTest, ToBSONCorrectlySerializesIdenticalStrength) {
<< "punct"
<< "normalization" << false << "backwards" << false);
- ASSERT_EQ(expectedObj, CollationSpecSerializer::toBSON(collationSpec));
+ ASSERT_EQ(expectedObj, CollationSerializer::specToBSON(collationSpec));
}
-TEST(CollationSpecSerializerTest, ToBSONCorrectlySerializesAlternateShifted) {
+TEST(CollationSerializerTest, ToBSONCorrectlySerializesAlternateShifted) {
CollationSpec collationSpec;
collationSpec.localeID = "myLocale";
collationSpec.alternate = CollationSpec::AlternateType::kShifted;
@@ -177,10 +177,10 @@ TEST(CollationSpecSerializerTest, ToBSONCorrectlySerializesAlternateShifted) {
<< "punct"
<< "normalization" << false << "backwards" << false);
- ASSERT_EQ(expectedObj, CollationSpecSerializer::toBSON(collationSpec));
+ ASSERT_EQ(expectedObj, CollationSerializer::specToBSON(collationSpec));
}
-TEST(CollationSpecSerializerTest, ToBSONCorrectlySerializesMaxVariableSpace) {
+TEST(CollationSerializerTest, ToBSONCorrectlySerializesMaxVariableSpace) {
CollationSpec collationSpec;
collationSpec.localeID = "myLocale";
collationSpec.maxVariable = CollationSpec::MaxVariableType::kSpace;
@@ -195,7 +195,7 @@ TEST(CollationSpecSerializerTest, ToBSONCorrectlySerializesMaxVariableSpace) {
<< "space"
<< "normalization" << false << "backwards" << false);
- ASSERT_EQ(expectedObj, CollationSpecSerializer::toBSON(collationSpec));
+ ASSERT_EQ(expectedObj, CollationSerializer::specToBSON(collationSpec));
}
} // namespace
diff --git a/src/mongo/db/query/collation/collation_spec.h b/src/mongo/db/query/collation/collation_spec.h
index d92621164be..63304c3393a 100644
--- a/src/mongo/db/query/collation/collation_spec.h
+++ b/src/mongo/db/query/collation/collation_spec.h
@@ -34,7 +34,7 @@ namespace mongo {
/**
* A CollationSpec is a parsed representation of a user-provided collation BSONObj. Can be
- * re-serialized to BSON using the CollationSpecSerializer.
+ * re-serialized to BSON using CollationSerializer.
*/
struct CollationSpec {
// Controls whether uppercase sorts before lowercase or vice versa.
@@ -110,6 +110,17 @@ struct CollationSpec {
static const char* kMaxVariablePunct;
static const char* kMaxVariableSpace;
+ /**
+ * Constructs a CollationSpec with no locale, where all other fields have their default values.
+ */
+ CollationSpec() = default;
+
+ /**
+ * Constructs a CollationSpec for the given locale, where all other fields have their default
+ * values.
+ */
+ CollationSpec(std::string locale) : localeID(std::move(locale)) {}
+
// A string such as "en_US", identifying the language, country, or other attributes of the
// locale for this collation.
// Required.
diff --git a/src/mongo/db/query/collation/collator_factory_icu_test.cpp b/src/mongo/db/query/collation/collator_factory_icu_test.cpp
index c47d065f5cf..cd4fca42430 100644
--- a/src/mongo/db/query/collation/collator_factory_icu_test.cpp
+++ b/src/mongo/db/query/collation/collator_factory_icu_test.cpp
@@ -660,4 +660,25 @@ TEST(CollatorFactoryICUTest, SecondaryStrengthBackwardsTrue) {
// u8"\u00E1" is latin small letter a with acute.
ASSERT_GT(collator.getValue()->compare(u8"a\u00E1", u8"\u00E1a"), 0);
}
+
+TEST(CollatorInterfaceICUTest, FactoryMadeCollatorComparisonKeysCorrectEnUS) {
+ CollatorFactoryICU factory;
+ auto collator = factory.makeFromBSON(BSON("locale"
+ << "en_US"));
+ ASSERT_OK(collator.getStatus());
+ const auto comparisonKeyAB = collator.getValue()->getComparisonKey("ab");
+ const auto comparisonKeyABB = collator.getValue()->getComparisonKey("abb");
+ const auto comparisonKeyBA = collator.getValue()->getComparisonKey("ba");
+
+ ASSERT_LT(comparisonKeyAB.getKeyData().compare(comparisonKeyBA.getKeyData()), 0);
+ ASSERT_GT(comparisonKeyBA.getKeyData().compare(comparisonKeyAB.getKeyData()), 0);
+ ASSERT_EQ(comparisonKeyAB.getKeyData().compare(comparisonKeyAB.getKeyData()), 0);
+
+ ASSERT_LT(comparisonKeyAB.getKeyData().compare(comparisonKeyABB.getKeyData()), 0);
+ ASSERT_GT(comparisonKeyABB.getKeyData().compare(comparisonKeyAB.getKeyData()), 0);
+
+ ASSERT_GT(comparisonKeyBA.getKeyData().compare(comparisonKeyABB.getKeyData()), 0);
+ ASSERT_LT(comparisonKeyABB.getKeyData().compare(comparisonKeyBA.getKeyData()), 0);
+}
+
} // namespace
diff --git a/src/mongo/db/query/collation/collator_interface.h b/src/mongo/db/query/collation/collator_interface.h
index be857e10221..767eca2bea3 100644
--- a/src/mongo/db/query/collation/collator_interface.h
+++ b/src/mongo/db/query/collation/collator_interface.h
@@ -28,13 +28,14 @@
#pragma once
+#include <string>
+
#include "mongo/base/disallow_copying.h"
+#include "mongo/base/string_data.h"
#include "mongo/db/query/collation/collation_spec.h"
namespace mongo {
-class StringData;
-
/**
* An interface for ordering and matching according to a collation. Instances should be retrieved
* from the CollatorFactoryInterface and may not be copied.
@@ -42,15 +43,46 @@ class StringData;
* All methods are thread-safe.
*
* Does not throw exceptions.
- *
- * TODO SERVER-22738: Extend interface with a getComparisonKey() method and implement a
- * MongoDB-specific abstraction for a collator-generated comparison key.
*/
class CollatorInterface {
MONGO_DISALLOW_COPYING(CollatorInterface);
public:
/**
+ * Every string has a corresponding ComparisonKey with respect to this collator. Two
+ * ComparisonKeys can be lexicographically ordered in order to obtain the collation's sort order
+ * and equivalence classes.
+ *
+ * A ComparisonKey is logically an owned array of bytes. It is cheap to move but potentially
+ * expensive to copy.
+ *
+ * ComparisonKeys may only be obtained via CollatorInterface::getComparisonKey().
+ *
+ * In general, two strings should be compared with respect to a collation using
+ * CollatorInterface::compare(). ComparisonKey::compare() may be faster if repeatedly comparing
+ * the same string(s).
+ */
+ class ComparisonKey {
+ public:
+ /**
+ * Returns the underlying byte array represented by this ComparisonKey.
+ *
+ * The returned StringData may not outlive the ComparisonKey used to create it, since the
+ * ComparisonKey owns the underlying byte array.
+ */
+ StringData getKeyData() const {
+ return StringData(_key);
+ }
+
+ private:
+ friend class CollatorInterface;
+
+ ComparisonKey(std::string key) : _key(std::move(key)) {}
+
+ std::string _key;
+ };
+
+ /**
* Constructs a CollatorInterface capable of computing the collation described by 'spec'.
*/
CollatorInterface(CollationSpec spec) : _spec(std::move(spec)) {}
@@ -65,6 +97,12 @@ public:
virtual int compare(StringData left, StringData right) = 0;
/**
+ * Returns the comparison key for 'stringData', according to this collation. See ComparisonKey's
+ * comments for details.
+ */
+ virtual ComparisonKey getComparisonKey(StringData stringData) = 0;
+
+ /**
* Returns whether this collation has the same matching and sorting semantics as 'other'.
*/
bool operator==(const CollatorInterface& other) const {
@@ -86,6 +124,11 @@ public:
return _spec;
}
+protected:
+ static ComparisonKey makeComparisonKey(std::string key) {
+ return ComparisonKey(std::move(key));
+ }
+
private:
const CollationSpec _spec;
};
diff --git a/src/mongo/db/query/collation/collator_interface_icu.cpp b/src/mongo/db/query/collation/collator_interface_icu.cpp
index d5a698e924b..0da6579a707 100644
--- a/src/mongo/db/query/collation/collator_interface_icu.cpp
+++ b/src/mongo/db/query/collation/collator_interface_icu.cpp
@@ -30,6 +30,8 @@
#include "mongo/db/query/collation/collator_interface_icu.h"
+#include <unicode/sortkey.h>
+
#include "mongo/util/assert_util.h"
namespace mongo {
@@ -39,14 +41,47 @@ CollatorInterfaceICU::CollatorInterfaceICU(CollationSpec spec,
: CollatorInterface(std::move(spec)), _collator(std::move(collator)) {}
int CollatorInterfaceICU::compare(StringData left, StringData right) {
- // TODO: What happens if 'status' is a failure code? In what circumstances could this happen?
+ // TODO SERVER-23028: What happens if 'status' is a failure code? In what circumstances could
+ // this happen?
UErrorCode status = U_ZERO_ERROR;
- auto compareResult = _collator->compare(icu::UnicodeString(left.rawData(), left.size()),
- icu::UnicodeString(right.rawData(), right.size()),
- status);
+ auto compareResult = _collator->compareUTF8(icu::StringPiece(left.rawData(), left.size()),
+ icu::StringPiece(right.rawData(), right.size()),
+ status);
invariant(U_SUCCESS(status));
- return compareResult;
+ switch (compareResult) {
+ case UCOL_EQUAL:
+ return 0;
+ case UCOL_GREATER:
+ return 1;
+ case UCOL_LESS:
+ return -1;
+ }
+
+ MONGO_UNREACHABLE;
+}
+
+CollatorInterface::ComparisonKey CollatorInterfaceICU::getComparisonKey(StringData stringData) {
+ // A StringPiece is ICU's StringData. They are logically the same abstraction.
+ const icu::StringPiece stringPiece(stringData.rawData(), stringData.size());
+
+ // TODO SERVER-23028: What happens if 'status' is a failure code? In what circumstances could
+ // this happen?
+ UErrorCode status = U_ZERO_ERROR;
+ icu::CollationKey icuKey;
+ _collator->getCollationKey(icu::UnicodeString::fromUTF8(stringPiece), icuKey, status);
+ invariant(U_SUCCESS(status));
+
+ int32_t keyLength;
+ const uint8_t* keyBuffer = icuKey.getByteArray(keyLength);
+ invariant(keyLength > 0);
+ invariant(keyBuffer);
+
+ // The last byte of the sort key should always be null. When we construct the comparison key, we
+ // omit the trailing null byte.
+ invariant(keyBuffer[keyLength - 1u] == '\0');
+ const char* charBuffer = reinterpret_cast<const char*>(keyBuffer);
+ return makeComparisonKey(std::string(charBuffer, keyLength - 1u));
}
} // namespace mongo
diff --git a/src/mongo/db/query/collation/collator_interface_icu.h b/src/mongo/db/query/collation/collator_interface_icu.h
index 19d73c3134e..c52de3f0b94 100644
--- a/src/mongo/db/query/collation/collator_interface_icu.h
+++ b/src/mongo/db/query/collation/collator_interface_icu.h
@@ -45,6 +45,8 @@ public:
int compare(StringData left, StringData right) final;
+ ComparisonKey getComparisonKey(StringData stringData) final;
+
private:
// The ICU implementation of the collator to which we delegate interesting work. Const methods
// on the ICU collator are expected to be thread-safe.
diff --git a/src/mongo/db/query/collation/collator_interface_icu_test.cpp b/src/mongo/db/query/collation/collator_interface_icu_test.cpp
index 969ddb30c75..bf74b9a74b5 100644
--- a/src/mongo/db/query/collation/collator_interface_icu_test.cpp
+++ b/src/mongo/db/query/collation/collator_interface_icu_test.cpp
@@ -69,6 +69,188 @@ TEST(CollatorInterfaceICUTest, ASCIIComparisonWorksUsingLocaleStringParsing) {
ASSERT_EQ(icuCollator.compare("ab", "ab"), 0);
}
+TEST(CollatorInterfaceICUTest, ASCIIComparisonWorksUsingComparisonKeys) {
+ CollationSpec collationSpec;
+ collationSpec.localeID = "en_US";
+
+ auto locale = icu::Locale::createFromName(collationSpec.localeID.c_str());
+ ASSERT_EQ(std::string("en"), locale.getLanguage());
+ ASSERT_EQ(std::string("US"), locale.getCountry());
+
+ UErrorCode status = U_ZERO_ERROR;
+ std::unique_ptr<icu::Collator> coll(icu::Collator::createInstance(locale, status));
+ ASSERT(U_SUCCESS(status));
+
+ CollatorInterfaceICU icuCollator(collationSpec, std::move(coll));
+ const auto comparisonKeyAB = icuCollator.getComparisonKey("ab");
+ const auto comparisonKeyABB = icuCollator.getComparisonKey("abb");
+ const auto comparisonKeyBA = icuCollator.getComparisonKey("ba");
+
+ ASSERT_LT(comparisonKeyAB.getKeyData().compare(comparisonKeyBA.getKeyData()), 0);
+ ASSERT_GT(comparisonKeyBA.getKeyData().compare(comparisonKeyAB.getKeyData()), 0);
+ ASSERT_EQ(comparisonKeyAB.getKeyData().compare(comparisonKeyAB.getKeyData()), 0);
+
+ ASSERT_LT(comparisonKeyAB.getKeyData().compare(comparisonKeyABB.getKeyData()), 0);
+ ASSERT_GT(comparisonKeyABB.getKeyData().compare(comparisonKeyAB.getKeyData()), 0);
+
+ ASSERT_GT(comparisonKeyBA.getKeyData().compare(comparisonKeyABB.getKeyData()), 0);
+ ASSERT_LT(comparisonKeyABB.getKeyData().compare(comparisonKeyBA.getKeyData()), 0);
+}
+
+TEST(CollatorInterfaceICUTest, ZeroLengthStringsCompareCorrectly) {
+ CollationSpec collationSpec;
+ collationSpec.localeID = "en_US";
+
+ UErrorCode status = U_ZERO_ERROR;
+ std::unique_ptr<icu::Collator> coll(
+ icu::Collator::createInstance(icu::Locale("en", "US"), status));
+ ASSERT(U_SUCCESS(status));
+
+ CollatorInterfaceICU icuCollator(collationSpec, std::move(coll));
+ ASSERT_EQ(icuCollator.compare(StringData(), StringData()), 0);
+ ASSERT_LT(icuCollator.compare(StringData(), "abc"), 0);
+ ASSERT_GT(icuCollator.compare("abc", StringData()), 0);
+}
+
+TEST(CollatorInterfaceICUTest, ZeroLengthStringsCompareCorrectlyUsingComparisonKeys) {
+ CollationSpec collationSpec;
+ collationSpec.localeID = "en_US";
+
+ UErrorCode status = U_ZERO_ERROR;
+ std::unique_ptr<icu::Collator> coll(
+ icu::Collator::createInstance(icu::Locale("en", "US"), status));
+ ASSERT(U_SUCCESS(status));
+
+ CollatorInterfaceICU icuCollator(collationSpec, std::move(coll));
+ auto emptyKey = icuCollator.getComparisonKey(StringData());
+ auto comparisonKeyABC = icuCollator.getComparisonKey("abc");
+ ASSERT_EQ(emptyKey.getKeyData().compare(emptyKey.getKeyData()), 0);
+ ASSERT_LT(emptyKey.getKeyData().compare(comparisonKeyABC.getKeyData()), 0);
+ ASSERT_GT(comparisonKeyABC.getKeyData().compare(emptyKey.getKeyData()), 0);
+}
+
+TEST(CollatorInterfaceICUTest, EmptyNullTerminatedStringComparesCorrectly) {
+ CollationSpec collationSpec;
+ collationSpec.localeID = "en_US";
+
+ UErrorCode status = U_ZERO_ERROR;
+ std::unique_ptr<icu::Collator> coll(
+ icu::Collator::createInstance(icu::Locale("en", "US"), status));
+ ASSERT(U_SUCCESS(status));
+
+ StringData emptyString("");
+ ASSERT(emptyString.rawData());
+ ASSERT_EQ(emptyString.size(), 0u);
+
+ CollatorInterfaceICU icuCollator(collationSpec, std::move(coll));
+ ASSERT_EQ(icuCollator.compare(emptyString, emptyString), 0);
+ ASSERT_LT(icuCollator.compare(emptyString, "abc"), 0);
+ ASSERT_GT(icuCollator.compare("abc", emptyString), 0);
+}
+
+TEST(CollatorInterfaceICUTest, EmptyNullTerminatedStringComparesCorrectlyUsingComparisonKeys) {
+ CollationSpec collationSpec;
+ collationSpec.localeID = "en_US";
+
+ UErrorCode status = U_ZERO_ERROR;
+ std::unique_ptr<icu::Collator> coll(
+ icu::Collator::createInstance(icu::Locale("en", "US"), status));
+ ASSERT(U_SUCCESS(status));
+
+ StringData emptyString("");
+ ASSERT(emptyString.rawData());
+ ASSERT_EQ(emptyString.size(), 0u);
+
+ CollatorInterfaceICU icuCollator(collationSpec, std::move(coll));
+ auto emptyKey = icuCollator.getComparisonKey(emptyString);
+ auto comparisonKeyABC = icuCollator.getComparisonKey("abc");
+ ASSERT_EQ(emptyKey.getKeyData().compare(emptyKey.getKeyData()), 0);
+ ASSERT_LT(emptyKey.getKeyData().compare(comparisonKeyABC.getKeyData()), 0);
+ ASSERT_GT(comparisonKeyABC.getKeyData().compare(emptyKey.getKeyData()), 0);
+}
+
+TEST(CollatorInterfaceICUTest, LengthOneStringWithNullByteComparesCorrectly) {
+ CollationSpec collationSpec;
+ collationSpec.localeID = "en_US";
+
+ UErrorCode status = U_ZERO_ERROR;
+ std::unique_ptr<icu::Collator> coll(
+ icu::Collator::createInstance(icu::Locale("en", "US"), status));
+ ASSERT(U_SUCCESS(status));
+
+ StringData nullByte("\0", StringData::LiteralTag());
+ ASSERT_EQ(nullByte.rawData()[0], '\0');
+ ASSERT_EQ(nullByte.size(), 1u);
+
+ CollatorInterfaceICU icuCollator(collationSpec, std::move(coll));
+ ASSERT_EQ(icuCollator.compare(nullByte, nullByte), 0);
+ ASSERT_LT(icuCollator.compare(nullByte, "abc"), 0);
+ ASSERT_GT(icuCollator.compare("abc", nullByte), 0);
+}
+
+TEST(CollatorInterfaceICUTest, LengthOneStringWithNullByteComparesCorrectlyUsingComparisonKeys) {
+ CollationSpec collationSpec;
+ collationSpec.localeID = "en_US";
+
+ UErrorCode status = U_ZERO_ERROR;
+ std::unique_ptr<icu::Collator> coll(
+ icu::Collator::createInstance(icu::Locale("en", "US"), status));
+ ASSERT(U_SUCCESS(status));
+
+ StringData nullByte("\0", StringData::LiteralTag());
+ ASSERT_EQ(nullByte.rawData()[0], '\0');
+ ASSERT_EQ(nullByte.size(), 1u);
+
+ CollatorInterfaceICU icuCollator(collationSpec, std::move(coll));
+ auto nullByteKey = icuCollator.getComparisonKey(nullByte);
+ auto comparisonKeyABC = icuCollator.getComparisonKey("abc");
+ ASSERT_EQ(nullByteKey.getKeyData().compare(nullByteKey.getKeyData()), 0);
+ ASSERT_LT(nullByteKey.getKeyData().compare(comparisonKeyABC.getKeyData()), 0);
+ ASSERT_GT(comparisonKeyABC.getKeyData().compare(nullByteKey.getKeyData()), 0);
+}
+
+TEST(CollatorInterfaceICUTest, StringsWithEmbeddedNullByteCompareCorrectly) {
+ CollationSpec collationSpec;
+ collationSpec.localeID = "en_US";
+
+ UErrorCode status = U_ZERO_ERROR;
+ std::unique_ptr<icu::Collator> coll(
+ icu::Collator::createInstance(icu::Locale("en", "US"), status));
+ ASSERT(U_SUCCESS(status));
+
+ StringData string1("a\0b", StringData::LiteralTag());
+ ASSERT_EQ(string1.size(), 3u);
+ StringData string2("a\0c", StringData::LiteralTag());
+ ASSERT_EQ(string2.size(), 3u);
+
+ CollatorInterfaceICU icuCollator(collationSpec, std::move(coll));
+ ASSERT_EQ(icuCollator.compare(string1, string1), 0);
+ ASSERT_LT(icuCollator.compare(string1, string2), 0);
+ ASSERT_GT(icuCollator.compare(string2, string1), 0);
+}
+
+TEST(CollatorInterfaceICUTest, StringsWithEmbeddedNullByteCompareCorrectlyUsingComparisonKeys) {
+ CollationSpec collationSpec;
+ collationSpec.localeID = "en_US";
+
+ UErrorCode status = U_ZERO_ERROR;
+ std::unique_ptr<icu::Collator> coll(
+ icu::Collator::createInstance(icu::Locale("en", "US"), status));
+ ASSERT(U_SUCCESS(status));
+
+ StringData string1("a\0b", StringData::LiteralTag());
+ ASSERT_EQ(string1.size(), 3u);
+ StringData string2("a\0c", StringData::LiteralTag());
+ ASSERT_EQ(string2.size(), 3u);
+
+ CollatorInterfaceICU icuCollator(collationSpec, std::move(coll));
+ auto key1 = icuCollator.getComparisonKey(string1);
+ auto key2 = icuCollator.getComparisonKey(string2);
+ ASSERT_EQ(key1.getKeyData().compare(key1.getKeyData()), 0);
+ ASSERT_LT(key1.getKeyData().compare(key2.getKeyData()), 0);
+ ASSERT_GT(key2.getKeyData().compare(key1.getKeyData()), 0);
+}
+
TEST(CollatorInterfaceICUTest, TwoUSEnglishCollationsAreEqual) {
CollationSpec collationSpec;
collationSpec.localeID = "en_US";
@@ -109,4 +291,52 @@ TEST(CollatorInterfaceICUTest, USEnglishAndBritishEnglishCollationsAreNotEqual)
ASSERT_TRUE(icuCollator1 != icuCollator2);
}
+TEST(CollatorInterfaceICUTest, FrenchCanadianCollatorComparesCorrectly) {
+ CollationSpec collationSpec;
+ collationSpec.localeID = "fr_CA";
+
+ UErrorCode status = U_ZERO_ERROR;
+ std::unique_ptr<icu::Collator> coll(
+ icu::Collator::createInstance(icu::Locale("fr", "CA"), status));
+ ASSERT(U_SUCCESS(status));
+
+ CollatorInterfaceICU icuCollator(collationSpec, std::move(coll));
+
+ StringData circumflex(u8"p\u00EAche");
+ StringData graveAndAcute(u8"p\u00E8ch\u00E9");
+ StringData circumflexAndAcute(u8"p\u00EAch\u00E9");
+
+ ASSERT_LT(icuCollator.compare(circumflex, graveAndAcute), 0);
+ ASSERT_LT(icuCollator.compare(graveAndAcute, circumflexAndAcute), 0);
+ ASSERT_LT(icuCollator.compare(circumflex, circumflexAndAcute), 0);
+
+ ASSERT_GT(icuCollator.compare(circumflexAndAcute, graveAndAcute), 0);
+ ASSERT_GT(icuCollator.compare(graveAndAcute, circumflex), 0);
+ ASSERT_GT(icuCollator.compare(circumflexAndAcute, circumflex), 0);
+}
+
+TEST(CollatorInterfaceICUTest, FrenchCanadianCollatorComparesCorrectlyUsingComparisonKeys) {
+ CollationSpec collationSpec;
+ collationSpec.localeID = "fr_CA";
+
+ UErrorCode status = U_ZERO_ERROR;
+ std::unique_ptr<icu::Collator> coll(
+ icu::Collator::createInstance(icu::Locale("fr", "CA"), status));
+ ASSERT(U_SUCCESS(status));
+
+ CollatorInterfaceICU icuCollator(collationSpec, std::move(coll));
+
+ auto circumflex = icuCollator.getComparisonKey(u8"p\u00EAche");
+ auto graveAndAcute = icuCollator.getComparisonKey(u8"p\u00E8ch\u00E9");
+ auto circumflexAndAcute = icuCollator.getComparisonKey(u8"p\u00EAch\u00E9");
+
+ ASSERT_LT(circumflex.getKeyData().compare(graveAndAcute.getKeyData()), 0);
+ ASSERT_LT(graveAndAcute.getKeyData().compare(circumflexAndAcute.getKeyData()), 0);
+ ASSERT_LT(circumflex.getKeyData().compare(circumflexAndAcute.getKeyData()), 0);
+
+ ASSERT_GT(circumflexAndAcute.getKeyData().compare(graveAndAcute.getKeyData()), 0);
+ ASSERT_GT(graveAndAcute.getKeyData().compare(circumflex.getKeyData()), 0);
+ ASSERT_GT(circumflexAndAcute.getKeyData().compare(circumflex.getKeyData()), 0);
+}
+
} // namespace
diff --git a/src/third_party/icu4c-56.1/source/SConscript b/src/third_party/icu4c-56.1/source/SConscript
index 1c9d3c713f8..1d4e95493f8 100644
--- a/src/third_party/icu4c-56.1/source/SConscript
+++ b/src/third_party/icu4c-56.1/source/SConscript
@@ -6,7 +6,7 @@ env = env.Clone()
env.Append(
CPPDEFINES=[
- 'U_CHARSET_ISUTF8=1',
+ 'U_CHARSET_IS_UTF8=1',
'U_I18N_IMPLEMENTATION=1',
'U_STATIC_IMPLEMENTATION=1',
],