diff options
-rw-r--r-- | src/mongo/util/SConscript | 21 | ||||
-rw-r--r-- | src/mongo/util/icu.cpp | 180 | ||||
-rw-r--r-- | src/mongo/util/icu.h | 55 | ||||
-rw-r--r-- | src/mongo/util/icu_test.cpp | 79 |
4 files changed, 335 insertions, 0 deletions
diff --git a/src/mongo/util/SConscript b/src/mongo/util/SConscript index 581788915f4..5b5de7f89fa 100644 --- a/src/mongo/util/SConscript +++ b/src/mongo/util/SConscript @@ -711,3 +711,24 @@ icuEnv.Library( ], ) +icuEnv.Library( + target='icu', + source=[ + 'icu.cpp', + ], + LIBDEPS_PRIVATE=[ + 'icu_init', + '$BUILD_DIR/mongo/base', + '$BUILD_DIR/third_party/shim_icu', + ], +) + +icuEnv.CppUnitTest( + target='icu_test', + source=[ + 'icu_test.cpp', + ], + LIBDEPS=[ + 'icu', + ], +) diff --git a/src/mongo/util/icu.cpp b/src/mongo/util/icu.cpp new file mode 100644 index 00000000000..842fe458620 --- /dev/null +++ b/src/mongo/util/icu.cpp @@ -0,0 +1,180 @@ +/* + * Copyright (C) 2018 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/util/icu.h" + +#include <memory> +#include <unicode/localpointer.h> +#include <unicode/putil.h> +#include <unicode/uiter.h> +#include <unicode/unistr.h> +#include <unicode/usprep.h> +#include <unicode/ustring.h> +#include <unicode/utypes.h> +#include <vector> + +#include "mongo/util/assert_util.h" +#include "mongo/util/mongoutils/str.h" + +namespace mongo { +namespace { + +/** + * Convenience wrapper for a UChar[] string. + * Instantiate with UString::fromUTF8() and reseriealize with ustr.toUTF8() + */ +class UString { +public: + UString() = delete; + explicit UString(size_t size) { + _str.resize(size); + } + + const UChar* uc_str() const { + return _str.data(); + } + UChar* data() { + return _str.data(); + } + size_t capacity() const { + return _str.capacity(); + } + size_t size() const { + return _str.size(); + } + void resize(size_t len) { + _str.resize(len); + } + + static UString fromUTF8(StringData str) { + UErrorCode error = U_ZERO_ERROR; + int32_t len = 0; + u_strFromUTF8(nullptr, 0, &len, str.rawData(), str.size(), &error); + uassert(ErrorCodes::BadValue, "Non UTF-8 data encountered", error != U_INVALID_CHAR_FOUND); + uassert(50687, + str::stream() << "Error preflighting UTF-8 conversion: " << u_errorName(error), + error == U_BUFFER_OVERFLOW_ERROR); + + error = U_ZERO_ERROR; + UString ret(len); + u_strFromUTF8(ret.data(), ret.capacity(), &len, str.rawData(), str.size(), &error); + uassert(50688, + str::stream() << "Error converting UTF-8 string: " << u_errorName(error), + U_SUCCESS(error)); + ret.resize(len); + return ret; + } + + std::string toUTF8() const { + UErrorCode error = U_ZERO_ERROR; + int32_t len = 0; + u_strToUTF8(nullptr, 0, &len, _str.data(), _str.size(), &error); + uassert(50689, + str::stream() << "Error preflighting UTF-8 conversion: " << u_errorName(error), + error == U_BUFFER_OVERFLOW_ERROR); + + error = U_ZERO_ERROR; + std::string ret; + ret.resize(len); + u_strToUTF8(&ret[0], ret.capacity(), &len, _str.data(), _str.size(), &error); + uassert(50690, + str::stream() << "Error converting string to UTF-8: " << u_errorName(error), + U_SUCCESS(error)); + ret.resize(len); + return ret; + } + +private: + std::vector<UChar> _str; +}; + +/** + * Convenience wrapper for ICU unicode string prep API. + */ +class USPrep { +public: + USPrep() = delete; + USPrep(UStringPrepProfileType type) { + UErrorCode error = U_ZERO_ERROR; + _profile.reset(usprep_openByType(type, &error)); + uassert(50691, + str::stream() << "Unable to open unicode string prep profile: " + << u_errorName(error), + U_SUCCESS(error)); + } + + UString prepare(const UString& src, int32_t options = USPREP_DEFAULT) { + UErrorCode error = U_ZERO_ERROR; + auto len = usprep_prepare( + _profile.get(), src.uc_str(), src.size(), nullptr, 0, options, nullptr, &error); + uassert(ErrorCodes::BadValue, + "Unable to normalize input string", + error != U_INVALID_CHAR_FOUND); + uassert(50692, + str::stream() << "Error preflighting normalization: " << u_errorName(error), + error == U_BUFFER_OVERFLOW_ERROR); + + error = U_ZERO_ERROR; + UString ret(len); + len = usprep_prepare(_profile.get(), + src.uc_str(), + src.size(), + ret.data(), + ret.capacity(), + options, + nullptr, + &error); + uassert(50693, + str::stream() << "Failed normalizing string: " << u_errorName(error), + U_SUCCESS(error)); + ret.resize(len); + return ret; + } + +private: + class USPrepDeleter { + public: + void operator()(UStringPrepProfile* profile) { + usprep_close(profile); + } + }; + + std::unique_ptr<UStringPrepProfile, USPrepDeleter> _profile; +}; + +} // namespace +} // namespace mongo + +mongo::StatusWith<std::string> mongo::saslPrep(StringData str, UStringPrepOptions options) try { + const auto opts = (options == kUStringPrepDefault) ? USPREP_DEFAULT : USPREP_ALLOW_UNASSIGNED; + return USPrep(USPREP_RFC4013_SASLPREP).prepare(UString::fromUTF8(str), opts).toUTF8(); +} catch (const DBException& e) { + return e.toStatus(); +} diff --git a/src/mongo/util/icu.h b/src/mongo/util/icu.h new file mode 100644 index 00000000000..a4b08ffe8d8 --- /dev/null +++ b/src/mongo/util/icu.h @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2018 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include <string> + +#include "mongo/base/status_with.h" +#include "mongo/base/string_data.h" + +namespace mongo { + +/** + * Unicode string prepare options. + * By default, unassigned codepoints in the input string will result in an error. + * Using the AllowUnassigned option will pass them through without change, + * which may not turn out to be appropriate in later Unicode standards. + */ +enum UStringPrepOptions { + kUStringPrepDefault = 0, + kUStringPrepAllowUnassigned = 1, +}; + +/** + * Attempt to apply RFC4013 saslPrep to the target string. + * Normalizes unicode sequences for SCRAM authentication. + */ +StatusWith<std::string> saslPrep(StringData str, UStringPrepOptions = kUStringPrepDefault); + +} // namespace mongo diff --git a/src/mongo/util/icu_test.cpp b/src/mongo/util/icu_test.cpp new file mode 100644 index 00000000000..0cf53d8d1b7 --- /dev/null +++ b/src/mongo/util/icu_test.cpp @@ -0,0 +1,79 @@ +/** + * Copyright (C) 2018 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/unittest/unittest.h" +#include "mongo/util/icu.h" + +namespace mongo { +namespace { + +struct testCases { + std::string original; + std::string normalized; + bool success; +}; + +TEST(ICUTest, saslPrep) { + const testCases tests[] = { + // U+0065 LATIN SMALL LETTER E + U+0301 COMBINING ACUTE ACCENT + // U+00E9 LATIN SMALL LETTER E WITH ACUTE + {"\x65\xCC\x81", "\xC3\xA9", true}, + + // Test values from RFC4013 Section 3. + // #1 SOFT HYPHEN mapped to nothing. + {"I\xC2\xADX", "IX", true}, + // #2 no transformation + {"user", "user", true}, + // #3 case preserved, will not match #2 + {"USER", "USER", true}, + // #4 output is NFKC, input in ISO 8859-1 + {"\xC2\xAA", "a", true}, + // #5 output is NFKC, will match #1 + {"\xE2\x85\xA8", "IX", true}, + // #6 Error - prohibited character + {"\x07", "(invalid)", false}, + // #7 Error - bidirectional check + {"\xD8\xA7\x31", "(invalid)", false}, + }; + + for (const auto test : tests) { + auto ret = saslPrep(test.original); + ASSERT_EQ(ret.isOK(), test.success); + if (test.success) { + ASSERT_OK(ret); + ASSERT_EQ(ret.getValue(), test.normalized); + } else { + ASSERT_NOT_OK(ret); + } + } +} + +} // namespace +} // namespace mongo |