diff options
author | David Storch <david.storch@10gen.com> | 2016-02-11 16:28:31 -0500 |
---|---|---|
committer | David Storch <david.storch@10gen.com> | 2016-02-23 15:35:29 -0500 |
commit | b2fe67f7f5b59ef231a8cf6a3e9a307c0b929188 (patch) | |
tree | f28c2abfef43b90da5936a1a57b416f610360670 | |
parent | 0933449805f020f1226c5a5f7cd39a442c42362e (diff) | |
download | mongo-b2fe67f7f5b59ef231a8cf6a3e9a307c0b929188.tar.gz |
SERVER-22374 initial implementation of collation classes
Includes:
- CollationSpec
- CollationSpecSerializer
- CollatorInterface
- CollatorInterfaceICU
- CollatorFactoryInterface
- CollatorFactoryICU
18 files changed, 1002 insertions, 7 deletions
diff --git a/src/mongo/db/query/SConscript b/src/mongo/db/query/SConscript index d846e741071..f5e0dc2b0fa 100644 --- a/src/mongo/db/query/SConscript +++ b/src/mongo/db/query/SConscript @@ -2,6 +2,12 @@ Import("env") +env.SConscript( + dirs=[ + "collation", + ], +) + env.Library( target='query_planner', source=[ diff --git a/src/mongo/db/query/collation/SConscript b/src/mongo/db/query/collation/SConscript new file mode 100644 index 00000000000..4bb3bbd4a9f --- /dev/null +++ b/src/mongo/db/query/collation/SConscript @@ -0,0 +1,88 @@ +# -*- mode: python -*- + +Import("env") +Import("icuEnabled") + +env = env.Clone() + +env.InjectThirdPartyIncludePaths("icu") + +env.Append( + CPPDEFINES=[ + 'U_USING_ICU_NAMESPACE=0', + ], +) + +env.Library( + target="collator_interface", + source=[ + "collation_spec.cpp", + ], + LIBDEPS=[ + ], +) + +env.CppUnitTest( + target="collation_spec_test", + source=[ + "collation_spec_test.cpp", + ], + LIBDEPS=[ + "collator_interface", + ], +) + +env.Library( + target="collation_spec_serializer", + source=[ + "collation_spec_serializer.cpp", + ], + LIBDEPS=[ + "$BUILD_DIR/mongo/base", + "collator_interface", + ], +) + +env.CppUnitTest( + target="collation_spec_serializer_test", + source=[ + "collation_spec_serializer_test.cpp", + ], + LIBDEPS=[ + "collation_spec_serializer", + ], +) + +if icuEnabled: + env.Library( + target="collator_icu", + source=[ + "collator_factory_icu.cpp", + "collator_interface_icu.cpp", + ], + LIBDEPS=[ + "$BUILD_DIR/mongo/base", + "$BUILD_DIR/third_party/shim_icu", + "collator_interface", + ], + ) + + env.CppUnitTest( + target="collator_interface_icu_test", + source=[ + "collator_interface_icu_test.cpp", + ], + LIBDEPS=[ + "collator_icu", + ], + ) + + env.CppUnitTest( + target="collator_factory_icu_test", + source=[ + "collator_factory_icu_test.cpp", + ], + LIBDEPS=[ + "collator_icu", + ], + ) diff --git a/src/mongo/db/query/collation/collation_spec.cpp b/src/mongo/db/query/collation/collation_spec.cpp new file mode 100644 index 00000000000..5f863428630 --- /dev/null +++ b/src/mongo/db/query/collation/collation_spec.cpp @@ -0,0 +1,37 @@ +/** + * Copyright (C) 2016 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/query/collation/collation_spec.h" + +namespace mongo { + +const char* CollationSpec::kLocaleField = "locale"; + +} // namespace mongo diff --git a/src/mongo/db/query/collation/collation_spec.h b/src/mongo/db/query/collation/collation_spec.h new file mode 100644 index 00000000000..4d753360dfa --- /dev/null +++ b/src/mongo/db/query/collation/collation_spec.h @@ -0,0 +1,61 @@ +/** + * Copyright (C) 2016 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include <string> + +namespace mongo { + +/** + * A CollationSpec is a parsed representation of a user-provided collation BSONObj. Can be + * re-serialized to BSON using the CollationSpecSerializer. + * + * TODO SERVER-22373: extend to support options other than the localeID. + */ +struct CollationSpec { + // Field name constants. + static const char* kLocaleField; + + // A string such as "en_US", identifying the language, country, or other attributes of the + // locale for this collation. + std::string localeID; +}; + +/** + * Returns whether 'left' and 'right' are logically equivalent collations. + */ +inline bool operator==(const CollationSpec& left, const CollationSpec& right) { + return left.localeID == right.localeID; +} + +inline bool operator!=(const CollationSpec& left, const CollationSpec& right) { + return !(left == right); +} + +} // namespace mongo diff --git a/src/mongo/db/query/collation/collation_spec_serializer.cpp b/src/mongo/db/query/collation/collation_spec_serializer.cpp new file mode 100644 index 00000000000..888de13d01a --- /dev/null +++ b/src/mongo/db/query/collation/collation_spec_serializer.cpp @@ -0,0 +1,44 @@ +/** + * Copyright (C) 2016 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/query/collation/collation_spec_serializer.h" + +#include "mongo/bson/bsonobj.h" +#include "mongo/bson/bsonobjbuilder.h" + +namespace mongo { + +BSONObj CollationSpecSerializer::toBSON(const CollationSpec& spec) { + BSONObjBuilder builder; + builder.append(CollationSpec::kLocaleField, spec.localeID); + return builder.obj(); +} + +} // namespace mongo diff --git a/src/mongo/db/query/collation/collation_spec_serializer.h b/src/mongo/db/query/collation/collation_spec_serializer.h new file mode 100644 index 00000000000..774399610af --- /dev/null +++ b/src/mongo/db/query/collation/collation_spec_serializer.h @@ -0,0 +1,46 @@ +/** + * Copyright (C) 2016 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/db/query/collation/collation_spec.h" + +namespace mongo { + +class BSONObj; + +class CollationSpecSerializer { +public: + /** + * Converts 'spec' to its BSONObj representation. The resulting BSON can be stored and later + * used to recreate the corresponding CollatorInterface. + */ + static BSONObj toBSON(const CollationSpec& spec); +}; + +} // namespace mongo diff --git a/src/mongo/db/query/collation/collation_spec_serializer_test.cpp b/src/mongo/db/query/collation/collation_spec_serializer_test.cpp new file mode 100644 index 00000000000..ba28580eadd --- /dev/null +++ b/src/mongo/db/query/collation/collation_spec_serializer_test.cpp @@ -0,0 +1,50 @@ +/** + * Copyright (C) 2016 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/query/collation/collation_spec_serializer.h" + +#include "mongo/bson/bsonobjbuilder.h" +#include "mongo/unittest/unittest.h" + +namespace { + +using namespace mongo; + +TEST(CollationSpecSerializerTest, ToBSONProducesCorrectSerializedObj) { + CollationSpec collationSpec; + collationSpec.localeID = "myLocale"; + + BSONObj expectedObj = BSON("locale" + << "myLocale"); + + ASSERT_EQ(expectedObj, CollationSpecSerializer::toBSON(collationSpec)); +} + +} // namespace diff --git a/src/mongo/db/query/collation/collation_spec_test.cpp b/src/mongo/db/query/collation/collation_spec_test.cpp new file mode 100644 index 00000000000..26f2566f2e1 --- /dev/null +++ b/src/mongo/db/query/collation/collation_spec_test.cpp @@ -0,0 +1,61 @@ +/** + * Copyright (C) 2016 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/query/collation/collation_spec.h" + +#include "mongo/unittest/unittest.h" + +namespace { + +using namespace mongo; + +TEST(CollationSpecTest, SpecsWithNonEqualLocaleStringsAreNotEqual) { + CollationSpec collationSpec1; + collationSpec1.localeID = "fr"; + + CollationSpec collationSpec2; + collationSpec2.localeID = "de"; + + ASSERT_FALSE(collationSpec1 == collationSpec2); + ASSERT_TRUE(collationSpec1 != collationSpec2); +} + +TEST(CollationSpecTest, EqualSpecs) { + CollationSpec collationSpec1; + collationSpec1.localeID = "fr"; + + CollationSpec collationSpec2; + collationSpec2.localeID = "fr"; + + ASSERT_TRUE(collationSpec1 == collationSpec2); + ASSERT_FALSE(collationSpec1 != collationSpec2); +} + +} // namespace diff --git a/src/mongo/db/query/collation/collator_factory_icu.cpp b/src/mongo/db/query/collation/collator_factory_icu.cpp new file mode 100644 index 00000000000..190960f6515 --- /dev/null +++ b/src/mongo/db/query/collation/collator_factory_icu.cpp @@ -0,0 +1,100 @@ +/** + * Copyright (C) 2016 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/query/collation/collator_factory_icu.h" + +#include <unicode/errorcode.h> + +#include "mongo/bson/bsonobj.h" +#include "mongo/db/query/collation/collator_interface_icu.h" +#include "mongo/stdx/memory.h" +#include "mongo/util/mongoutils/str.h" + +namespace mongo { + +namespace { + +// Extracts the collation options from 'spec' and performs basic validation. +// +// Validation or normalization requiring the ICU library is done later. +StatusWith<CollationSpec> parseToCollationSpec(const BSONObj& spec) { + CollationSpec parsedSpec; + + for (auto elem : spec) { + if (str::equals(CollationSpec::kLocaleField, elem.fieldName())) { + if (elem.type() != BSONType::String) { + return {ErrorCodes::FailedToParse, + str::stream() << "Field '" << CollationSpec::kLocaleField + << "' must be of type string in: " << spec}; + } + + parsedSpec.localeID = elem.String(); + } else { + return {ErrorCodes::FailedToParse, + str::stream() << "Unknown collation spec field: " << elem.fieldName()}; + } + } + + if (parsedSpec.localeID.empty()) { + return {ErrorCodes::FailedToParse, str::stream() << "Missing locale string"}; + } + + return parsedSpec; +} + +} // namespace + +StatusWith<std::unique_ptr<CollatorInterface>> CollatorFactoryICU::makeFromBSON( + const BSONObj& spec) { + auto parsedSpec = parseToCollationSpec(spec); + if (!parsedSpec.isOK()) { + return parsedSpec.getStatus(); + } + + // TODO SERVER-22373: use ICU to validate and normalize locale string. As part of this work, we + // should add unit test coverage for both locale string validation and normalization. + auto locale = icu::Locale::createFromName(parsedSpec.getValue().localeID.c_str()); + + UErrorCode status = U_ZERO_ERROR; + std::unique_ptr<icu::Collator> icuCollator(icu::Collator::createInstance(locale, status)); + if (U_FAILURE(status)) { + icu::ErrorCode icuError; + icuError.set(status); + return {ErrorCodes::OperationFailed, + str::stream() << "Failed to create collator: " << icuError.errorName() + << ". Collation spec: " << spec}; + } + + auto mongoCollator = stdx::make_unique<CollatorInterfaceICU>(std::move(parsedSpec.getValue()), + std::move(icuCollator)); + return {std::move(mongoCollator)}; +} + +} // namespace mongo diff --git a/src/mongo/db/query/collation/collator_factory_icu.h b/src/mongo/db/query/collation/collator_factory_icu.h new file mode 100644 index 00000000000..a7c36f76ed1 --- /dev/null +++ b/src/mongo/db/query/collation/collator_factory_icu.h @@ -0,0 +1,47 @@ +/** + * Copyright (C) 2016 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/db/query/collation/collator_factory_interface.h" + +namespace mongo { + +/** + * Creates CollatorInterface instances backed by the ICU library's collation implementation. + * + * TODO: The factory should open collations once, and then return clones when a caller needs a + * CollatorInterface. This is more efficient because the necessary read-only data will only be + * prepared once on collation open. + */ +class CollatorFactoryICU : public CollatorFactoryInterface { +public: + StatusWith<std::unique_ptr<CollatorInterface>> makeFromBSON(const BSONObj& spec) final; +}; + +} // namespace mongo diff --git a/src/mongo/db/query/collation/collator_factory_icu_test.cpp b/src/mongo/db/query/collation/collator_factory_icu_test.cpp new file mode 100644 index 00000000000..0d1fe09bed1 --- /dev/null +++ b/src/mongo/db/query/collation/collator_factory_icu_test.cpp @@ -0,0 +1,84 @@ +/** + * Copyright (C) 2016 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/query/collation/collator_factory_icu.h" + +#include "mongo/bson/bsonobjbuilder.h" +#include "mongo/unittest/unittest.h" + +namespace { + +using namespace mongo; + +TEST(CollatorFactoryICUTest, LocaleStringParsesSuccessfully) { + CollatorFactoryICU factory; + auto collator = factory.makeFromBSON(BSON("locale" + << "en_US")); + ASSERT_OK(collator.getStatus()); + ASSERT_EQ("en_US", collator.getValue()->getSpec().localeID); +} + +TEST(CollatorFactoryICUTest, LocaleFieldNotAStringFailsToParse) { + CollatorFactoryICU factory; + auto collator = factory.makeFromBSON(BSON("locale" << 3)); + ASSERT_NOT_OK(collator.getStatus()); + ASSERT_EQ(collator.getStatus(), ErrorCodes::FailedToParse); +} + +TEST(CollatorFactoryICUTest, MissingLocaleStringFailsToParse) { + CollatorFactoryICU factory; + auto collator = factory.makeFromBSON(BSONObj()); + ASSERT_NOT_OK(collator.getStatus()); + ASSERT_EQ(collator.getStatus(), ErrorCodes::FailedToParse); +} + +TEST(CollatorFactoryICUTest, UnknownSpecFieldFailsToParse) { + BSONObj spec = BSON("locale" + << "en_US" + << "unknown" + << "field"); + CollatorFactoryICU factory; + auto collator = factory.makeFromBSON(spec); + ASSERT_NOT_OK(collator.getStatus()); + ASSERT_EQ(collator.getStatus(), ErrorCodes::FailedToParse); +} + +TEST(CollatorFactoryICUTest, FactoryMadeCollatorComparesStringsCorrectlyEnUS) { + CollatorFactoryICU factory; + auto collator = factory.makeFromBSON(BSON("locale" + << "en_US")); + ASSERT_OK(collator.getStatus()); + + ASSERT_LT(collator.getValue()->compare("ab", "ba"), 0); + ASSERT_GT(collator.getValue()->compare("ba", "ab"), 0); + ASSERT_EQ(collator.getValue()->compare("ab", "ab"), 0); +} + +} // namespace diff --git a/src/mongo/db/query/collation/collator_factory_interface.h b/src/mongo/db/query/collation/collator_factory_interface.h new file mode 100644 index 00000000000..1503ef77327 --- /dev/null +++ b/src/mongo/db/query/collation/collator_factory_interface.h @@ -0,0 +1,62 @@ +/** + * Copyright (C) 2016 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include <memory> + +#include "mongo/base/disallow_copying.h" +#include "mongo/db/query/collation/collator_interface.h" + +namespace mongo { + +class BSONObj; +template <typename T> +class StatusWith; + +/** + * An interface which can be used to retrieve a collator. + */ +class CollatorFactoryInterface { + MONGO_DISALLOW_COPYING(CollatorFactoryInterface); + +public: + CollatorFactoryInterface() = default; + + virtual ~CollatorFactoryInterface() {} + + /** + * Parses 'spec' and, on success, returns the corresponding CollatorInterface. + * + * Returns a non-OK status if 'spec' is invalid or otherwise cannot be converted into a + * collator. + */ + virtual StatusWith<std::unique_ptr<CollatorInterface>> makeFromBSON(const BSONObj& spec) = 0; +}; + +} // namespace mongo diff --git a/src/mongo/db/query/collation/collator_interface.h b/src/mongo/db/query/collation/collator_interface.h new file mode 100644 index 00000000000..be857e10221 --- /dev/null +++ b/src/mongo/db/query/collation/collator_interface.h @@ -0,0 +1,93 @@ +/** + * Copyright (C) 2016 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/base/disallow_copying.h" +#include "mongo/db/query/collation/collation_spec.h" + +namespace mongo { + +class StringData; + +/** + * An interface for ordering and matching according to a collation. Instances should be retrieved + * from the CollatorFactoryInterface and may not be copied. + * + * All methods are thread-safe. + * + * Does not throw exceptions. + * + * TODO SERVER-22738: Extend interface with a getComparisonKey() method and implement a + * MongoDB-specific abstraction for a collator-generated comparison key. + */ +class CollatorInterface { + MONGO_DISALLOW_COPYING(CollatorInterface); + +public: + /** + * Constructs a CollatorInterface capable of computing the collation described by 'spec'. + */ + CollatorInterface(CollationSpec spec) : _spec(std::move(spec)) {} + + virtual ~CollatorInterface() {} + + /** + * Returns a number < 0 if 'left' is less than 'right' with respect to the collation, a number > + * 0 if 'left' is greater than 'right' w.r.t. the collation, and 0 if 'left' and 'right' are + * equal w.r.t. the collation. + */ + virtual int compare(StringData left, StringData right) = 0; + + /** + * Returns whether this collation has the same matching and sorting semantics as 'other'. + */ + bool operator==(const CollatorInterface& other) const { + return getSpec() == other.getSpec(); + } + + /** + * Returns whether this collation *does not* have the same matching and sorting semantics as + * 'other'. + */ + bool operator!=(const CollatorInterface& other) const { + return !(*this == other); + } + + /** + * Returns a reference to the CollationSpec. + */ + const CollationSpec& getSpec() const { + return _spec; + } + +private: + const CollationSpec _spec; +}; + +} // namespace mongo diff --git a/src/mongo/db/query/collation/collator_interface_icu.cpp b/src/mongo/db/query/collation/collator_interface_icu.cpp new file mode 100644 index 00000000000..d5a698e924b --- /dev/null +++ b/src/mongo/db/query/collation/collator_interface_icu.cpp @@ -0,0 +1,52 @@ +/** + * Copyright (C) 2016 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/query/collation/collator_interface_icu.h" + +#include "mongo/util/assert_util.h" + +namespace mongo { + +CollatorInterfaceICU::CollatorInterfaceICU(CollationSpec spec, + std::unique_ptr<icu::Collator> collator) + : CollatorInterface(std::move(spec)), _collator(std::move(collator)) {} + +int CollatorInterfaceICU::compare(StringData left, StringData right) { + // TODO: What happens if 'status' is a failure code? In what circumstances could this happen? + UErrorCode status = U_ZERO_ERROR; + auto compareResult = _collator->compare(icu::UnicodeString(left.rawData(), left.size()), + icu::UnicodeString(right.rawData(), right.size()), + status); + invariant(U_SUCCESS(status)); + + return compareResult; +} + +} // namespace mongo diff --git a/src/mongo/db/query/collation/collator_interface_icu.h b/src/mongo/db/query/collation/collator_interface_icu.h new file mode 100644 index 00000000000..19d73c3134e --- /dev/null +++ b/src/mongo/db/query/collation/collator_interface_icu.h @@ -0,0 +1,54 @@ +/** + * Copyright (C) 2016 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/db/query/collation/collator_interface.h" + +#include <memory> +#include <unicode/coll.h> + +namespace mongo { + +/** + * An implementation of the CollatorInterface which is backed by the implementation of collations + * from the ICU library. + */ +class CollatorInterfaceICU : public CollatorInterface { +public: + CollatorInterfaceICU(CollationSpec spec, std::unique_ptr<icu::Collator> collator); + + int compare(StringData left, StringData right) final; + +private: + // The ICU implementation of the collator to which we delegate interesting work. Const methods + // on the ICU collator are expected to be thread-safe. + const std::unique_ptr<icu::Collator> _collator; +}; + +} // namespace mongo diff --git a/src/mongo/db/query/collation/collator_interface_icu_test.cpp b/src/mongo/db/query/collation/collator_interface_icu_test.cpp new file mode 100644 index 00000000000..969ddb30c75 --- /dev/null +++ b/src/mongo/db/query/collation/collator_interface_icu_test.cpp @@ -0,0 +1,112 @@ +/** + * Copyright (C) 2016 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/query/collation/collator_interface_icu.h" + +#include "mongo/unittest/unittest.h" + +namespace { + +using namespace mongo; + +TEST(CollatorInterfaceICUTest, ASCIIComparisonWorksForUSEnglishCollation) { + CollationSpec collationSpec; + collationSpec.localeID = "en_US"; + + UErrorCode status = U_ZERO_ERROR; + std::unique_ptr<icu::Collator> coll( + icu::Collator::createInstance(icu::Locale("en", "US"), status)); + ASSERT(U_SUCCESS(status)); + + CollatorInterfaceICU icuCollator(collationSpec, std::move(coll)); + ASSERT_LT(icuCollator.compare("ab", "ba"), 0); + ASSERT_GT(icuCollator.compare("ba", "ab"), 0); + ASSERT_EQ(icuCollator.compare("ab", "ab"), 0); +} + +TEST(CollatorInterfaceICUTest, ASCIIComparisonWorksUsingLocaleStringParsing) { + CollationSpec collationSpec; + collationSpec.localeID = "en_US"; + + auto locale = icu::Locale::createFromName(collationSpec.localeID.c_str()); + ASSERT_EQ(std::string("en"), locale.getLanguage()); + ASSERT_EQ(std::string("US"), locale.getCountry()); + + UErrorCode status = U_ZERO_ERROR; + std::unique_ptr<icu::Collator> coll(icu::Collator::createInstance(locale, status)); + ASSERT(U_SUCCESS(status)); + + CollatorInterfaceICU icuCollator(collationSpec, std::move(coll)); + ASSERT_LT(icuCollator.compare("ab", "ba"), 0); + ASSERT_GT(icuCollator.compare("ba", "ab"), 0); + ASSERT_EQ(icuCollator.compare("ab", "ab"), 0); +} + +TEST(CollatorInterfaceICUTest, TwoUSEnglishCollationsAreEqual) { + CollationSpec collationSpec; + collationSpec.localeID = "en_US"; + auto locale = icu::Locale::createFromName(collationSpec.localeID.c_str()); + + UErrorCode status = U_ZERO_ERROR; + std::unique_ptr<icu::Collator> coll1(icu::Collator::createInstance(locale, status)); + ASSERT(U_SUCCESS(status)); + + std::unique_ptr<icu::Collator> coll2(icu::Collator::createInstance(locale, status)); + ASSERT(U_SUCCESS(status)); + + CollatorInterfaceICU icuCollator1(collationSpec, std::move(coll1)); + CollatorInterfaceICU icuCollator2(collationSpec, std::move(coll2)); + ASSERT_TRUE(icuCollator1 == icuCollator2); + ASSERT_FALSE(icuCollator1 != icuCollator2); +} + +TEST(CollatorInterfaceICUTest, USEnglishAndBritishEnglishCollationsAreNotEqual) { + CollationSpec collationSpec1; + collationSpec1.localeID = "en_US"; + auto locale1 = icu::Locale::createFromName(collationSpec1.localeID.c_str()); + + CollationSpec collationSpec2; + collationSpec2.localeID = "en_UK"; + auto locale2 = icu::Locale::createFromName(collationSpec2.localeID.c_str()); + + UErrorCode status = U_ZERO_ERROR; + std::unique_ptr<icu::Collator> coll1(icu::Collator::createInstance(locale1, status)); + ASSERT(U_SUCCESS(status)); + + std::unique_ptr<icu::Collator> coll2(icu::Collator::createInstance(locale2, status)); + ASSERT(U_SUCCESS(status)); + + CollatorInterfaceICU icuCollator1(collationSpec1, std::move(coll1)); + CollatorInterfaceICU icuCollator2(collationSpec2, std::move(coll2)); + ASSERT_FALSE(icuCollator1 == icuCollator2); + ASSERT_TRUE(icuCollator1 != icuCollator2); +} + +} // namespace diff --git a/src/third_party/SConscript b/src/third_party/SConscript index 21f0a920488..8da73288748 100644 --- a/src/third_party/SConscript +++ b/src/third_party/SConscript @@ -81,7 +81,9 @@ if not use_system_version_of_library('intel_decimal128'): if icuEnabled and not use_system_version_of_library('icu'): thirdPartyIncludePathList.append( - ('icu', '#/src/third_party/icu4c' + icuSuffix + '/source')) + ('icu', '#/src/third_party/icu4c' + icuSuffix + '/source/common')) + thirdPartyIncludePathList.append( + ('icu', '#/src/third_party/icu4c' + icuSuffix + '/source/i18n')) def injectAllThirdPartyIncludePaths(thisEnv): thisEnv.PrependUnique(CPPPATH=[entry[1] for entry in thirdPartyIncludePathList]) diff --git a/src/third_party/icu4c-56.1/source/SConscript b/src/third_party/icu4c-56.1/source/SConscript index 4381b6a0ab5..1c9d3c713f8 100644 --- a/src/third_party/icu4c-56.1/source/SConscript +++ b/src/third_party/icu4c-56.1/source/SConscript @@ -6,14 +6,10 @@ env = env.Clone() env.Append( CPPDEFINES=[ - 'U_USING_ICU_NAMESPACE=0', - 'U_STATIC_IMPLEMENTATION=1', + 'U_CHARSET_ISUTF8=1', 'U_I18N_IMPLEMENTATION=1', + 'U_STATIC_IMPLEMENTATION=1', ], - CPPPATH=[ - 'common', - 'i18n', - ] ) def removeIfPresent(lst, item): |