summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/mongo/util/SConscript21
-rw-r--r--src/mongo/util/icu.cpp180
-rw-r--r--src/mongo/util/icu.h55
-rw-r--r--src/mongo/util/icu_test.cpp79
4 files changed, 335 insertions, 0 deletions
diff --git a/src/mongo/util/SConscript b/src/mongo/util/SConscript
index 581788915f4..5b5de7f89fa 100644
--- a/src/mongo/util/SConscript
+++ b/src/mongo/util/SConscript
@@ -711,3 +711,24 @@ icuEnv.Library(
],
)
+icuEnv.Library(
+ target='icu',
+ source=[
+ 'icu.cpp',
+ ],
+ LIBDEPS_PRIVATE=[
+ 'icu_init',
+ '$BUILD_DIR/mongo/base',
+ '$BUILD_DIR/third_party/shim_icu',
+ ],
+)
+
+icuEnv.CppUnitTest(
+ target='icu_test',
+ source=[
+ 'icu_test.cpp',
+ ],
+ LIBDEPS=[
+ 'icu',
+ ],
+)
diff --git a/src/mongo/util/icu.cpp b/src/mongo/util/icu.cpp
new file mode 100644
index 00000000000..842fe458620
--- /dev/null
+++ b/src/mongo/util/icu.cpp
@@ -0,0 +1,180 @@
+/*
+ * Copyright (C) 2018 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/util/icu.h"
+
+#include <memory>
+#include <unicode/localpointer.h>
+#include <unicode/putil.h>
+#include <unicode/uiter.h>
+#include <unicode/unistr.h>
+#include <unicode/usprep.h>
+#include <unicode/ustring.h>
+#include <unicode/utypes.h>
+#include <vector>
+
+#include "mongo/util/assert_util.h"
+#include "mongo/util/mongoutils/str.h"
+
+namespace mongo {
+namespace {
+
+/**
+ * Convenience wrapper for a UChar[] string.
+ * Instantiate with UString::fromUTF8() and reseriealize with ustr.toUTF8()
+ */
+class UString {
+public:
+ UString() = delete;
+ explicit UString(size_t size) {
+ _str.resize(size);
+ }
+
+ const UChar* uc_str() const {
+ return _str.data();
+ }
+ UChar* data() {
+ return _str.data();
+ }
+ size_t capacity() const {
+ return _str.capacity();
+ }
+ size_t size() const {
+ return _str.size();
+ }
+ void resize(size_t len) {
+ _str.resize(len);
+ }
+
+ static UString fromUTF8(StringData str) {
+ UErrorCode error = U_ZERO_ERROR;
+ int32_t len = 0;
+ u_strFromUTF8(nullptr, 0, &len, str.rawData(), str.size(), &error);
+ uassert(ErrorCodes::BadValue, "Non UTF-8 data encountered", error != U_INVALID_CHAR_FOUND);
+ uassert(50687,
+ str::stream() << "Error preflighting UTF-8 conversion: " << u_errorName(error),
+ error == U_BUFFER_OVERFLOW_ERROR);
+
+ error = U_ZERO_ERROR;
+ UString ret(len);
+ u_strFromUTF8(ret.data(), ret.capacity(), &len, str.rawData(), str.size(), &error);
+ uassert(50688,
+ str::stream() << "Error converting UTF-8 string: " << u_errorName(error),
+ U_SUCCESS(error));
+ ret.resize(len);
+ return ret;
+ }
+
+ std::string toUTF8() const {
+ UErrorCode error = U_ZERO_ERROR;
+ int32_t len = 0;
+ u_strToUTF8(nullptr, 0, &len, _str.data(), _str.size(), &error);
+ uassert(50689,
+ str::stream() << "Error preflighting UTF-8 conversion: " << u_errorName(error),
+ error == U_BUFFER_OVERFLOW_ERROR);
+
+ error = U_ZERO_ERROR;
+ std::string ret;
+ ret.resize(len);
+ u_strToUTF8(&ret[0], ret.capacity(), &len, _str.data(), _str.size(), &error);
+ uassert(50690,
+ str::stream() << "Error converting string to UTF-8: " << u_errorName(error),
+ U_SUCCESS(error));
+ ret.resize(len);
+ return ret;
+ }
+
+private:
+ std::vector<UChar> _str;
+};
+
+/**
+ * Convenience wrapper for ICU unicode string prep API.
+ */
+class USPrep {
+public:
+ USPrep() = delete;
+ USPrep(UStringPrepProfileType type) {
+ UErrorCode error = U_ZERO_ERROR;
+ _profile.reset(usprep_openByType(type, &error));
+ uassert(50691,
+ str::stream() << "Unable to open unicode string prep profile: "
+ << u_errorName(error),
+ U_SUCCESS(error));
+ }
+
+ UString prepare(const UString& src, int32_t options = USPREP_DEFAULT) {
+ UErrorCode error = U_ZERO_ERROR;
+ auto len = usprep_prepare(
+ _profile.get(), src.uc_str(), src.size(), nullptr, 0, options, nullptr, &error);
+ uassert(ErrorCodes::BadValue,
+ "Unable to normalize input string",
+ error != U_INVALID_CHAR_FOUND);
+ uassert(50692,
+ str::stream() << "Error preflighting normalization: " << u_errorName(error),
+ error == U_BUFFER_OVERFLOW_ERROR);
+
+ error = U_ZERO_ERROR;
+ UString ret(len);
+ len = usprep_prepare(_profile.get(),
+ src.uc_str(),
+ src.size(),
+ ret.data(),
+ ret.capacity(),
+ options,
+ nullptr,
+ &error);
+ uassert(50693,
+ str::stream() << "Failed normalizing string: " << u_errorName(error),
+ U_SUCCESS(error));
+ ret.resize(len);
+ return ret;
+ }
+
+private:
+ class USPrepDeleter {
+ public:
+ void operator()(UStringPrepProfile* profile) {
+ usprep_close(profile);
+ }
+ };
+
+ std::unique_ptr<UStringPrepProfile, USPrepDeleter> _profile;
+};
+
+} // namespace
+} // namespace mongo
+
+mongo::StatusWith<std::string> mongo::saslPrep(StringData str, UStringPrepOptions options) try {
+ const auto opts = (options == kUStringPrepDefault) ? USPREP_DEFAULT : USPREP_ALLOW_UNASSIGNED;
+ return USPrep(USPREP_RFC4013_SASLPREP).prepare(UString::fromUTF8(str), opts).toUTF8();
+} catch (const DBException& e) {
+ return e.toStatus();
+}
diff --git a/src/mongo/util/icu.h b/src/mongo/util/icu.h
new file mode 100644
index 00000000000..a4b08ffe8d8
--- /dev/null
+++ b/src/mongo/util/icu.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2018 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include <string>
+
+#include "mongo/base/status_with.h"
+#include "mongo/base/string_data.h"
+
+namespace mongo {
+
+/**
+ * Unicode string prepare options.
+ * By default, unassigned codepoints in the input string will result in an error.
+ * Using the AllowUnassigned option will pass them through without change,
+ * which may not turn out to be appropriate in later Unicode standards.
+ */
+enum UStringPrepOptions {
+ kUStringPrepDefault = 0,
+ kUStringPrepAllowUnassigned = 1,
+};
+
+/**
+ * Attempt to apply RFC4013 saslPrep to the target string.
+ * Normalizes unicode sequences for SCRAM authentication.
+ */
+StatusWith<std::string> saslPrep(StringData str, UStringPrepOptions = kUStringPrepDefault);
+
+} // namespace mongo
diff --git a/src/mongo/util/icu_test.cpp b/src/mongo/util/icu_test.cpp
new file mode 100644
index 00000000000..0cf53d8d1b7
--- /dev/null
+++ b/src/mongo/util/icu_test.cpp
@@ -0,0 +1,79 @@
+/**
+ * Copyright (C) 2018 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/unittest/unittest.h"
+#include "mongo/util/icu.h"
+
+namespace mongo {
+namespace {
+
+struct testCases {
+ std::string original;
+ std::string normalized;
+ bool success;
+};
+
+TEST(ICUTest, saslPrep) {
+ const testCases tests[] = {
+ // U+0065 LATIN SMALL LETTER E + U+0301 COMBINING ACUTE ACCENT
+ // U+00E9 LATIN SMALL LETTER E WITH ACUTE
+ {"\x65\xCC\x81", "\xC3\xA9", true},
+
+ // Test values from RFC4013 Section 3.
+ // #1 SOFT HYPHEN mapped to nothing.
+ {"I\xC2\xADX", "IX", true},
+ // #2 no transformation
+ {"user", "user", true},
+ // #3 case preserved, will not match #2
+ {"USER", "USER", true},
+ // #4 output is NFKC, input in ISO 8859-1
+ {"\xC2\xAA", "a", true},
+ // #5 output is NFKC, will match #1
+ {"\xE2\x85\xA8", "IX", true},
+ // #6 Error - prohibited character
+ {"\x07", "(invalid)", false},
+ // #7 Error - bidirectional check
+ {"\xD8\xA7\x31", "(invalid)", false},
+ };
+
+ for (const auto test : tests) {
+ auto ret = saslPrep(test.original);
+ ASSERT_EQ(ret.isOK(), test.success);
+ if (test.success) {
+ ASSERT_OK(ret);
+ ASSERT_EQ(ret.getValue(), test.normalized);
+ } else {
+ ASSERT_NOT_OK(ret);
+ }
+ }
+}
+
+} // namespace
+} // namespace mongo