summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJason Rassi <rassi@10gen.com>2016-05-09 17:24:19 -0400
committerJason Rassi <rassi@10gen.com>2016-05-13 16:36:35 -0400
commitf59eac414ed7e177a8514119c43794941d6b3c7b (patch)
tree83661b155a7a4d30e295a0c4016905bf11d8b283
parent2c4b67364343eb7dc10528b66f5d6a3a70e60ee3 (diff)
downloadmongo-f59eac414ed7e177a8514119c43794941d6b3c7b.tar.gz
SERVER-22371 ICU data integration
-rw-r--r--src/mongo/db/query/collation/SConscript15
-rwxr-xr-xsrc/mongo/db/query/collation/generate_icu_init_cpp.py118
2 files changed, 133 insertions, 0 deletions
diff --git a/src/mongo/db/query/collation/SConscript b/src/mongo/db/query/collation/SConscript
index d5bf4bb70ed..4cfdd785094 100644
--- a/src/mongo/db/query/collation/SConscript
+++ b/src/mongo/db/query/collation/SConscript
@@ -1,5 +1,6 @@
# -*- mode: python -*-
+Import("endian")
Import("env")
Import("icuEnabled")
@@ -131,11 +132,25 @@ env.CppUnitTest(
)
if icuEnabled:
+ generateICUInit = env.Command(
+ target="icu_init.cpp",
+ source=[
+ "generate_icu_init_cpp.py",
+ ("$BUILD_DIR/third_party/icu4c-57.1/source/mongo_sources/icudt57l.dat"
+ if endian == "little"
+ else "$BUILD_DIR/third_party/icu4c-57.1/source/mongo_sources/icudt57b.dat"),
+ ],
+ action="$PYTHON ${SOURCES[0]} -o $TARGET -i ${SOURCES[1]}",
+ )
+
+ env.Alias("generated-sources", generateICUInit)
+
env.Library(
target="collator_icu",
source=[
"collator_factory_icu.cpp",
"collator_interface_icu.cpp",
+ "icu_init.cpp",
],
LIBDEPS=[
"$BUILD_DIR/mongo/base",
diff --git a/src/mongo/db/query/collation/generate_icu_init_cpp.py b/src/mongo/db/query/collation/generate_icu_init_cpp.py
new file mode 100755
index 00000000000..8ae084aeec6
--- /dev/null
+++ b/src/mongo/db/query/collation/generate_icu_init_cpp.py
@@ -0,0 +1,118 @@
+#!/usr/bin/env python
+
+# Copyright 2016 MongoDB Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License, version 3,
+# as published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+# As a special exception, the copyright holders give permission to link the
+# code of portions of this program with the OpenSSL library under certain
+# conditions as described in each individual source file and distribute
+# linked combinations including the program with the OpenSSL library. You
+# must comply with the GNU Affero General Public License in all respects
+# for all of the code used other than as permitted herein. If you modify
+# file(s) with this exception, you may extend this exception to your
+# version of the file(s), but you are not obligated to do so. If you do not
+# wish to do so, delete this exception statement from your version. If you
+# delete this exception statement from all source files in the program,
+# then also delete it in the license file.
+
+import optparse
+import os
+import sys
+
+def main(argv):
+ parser = optparse.OptionParser()
+ parser.add_option('-o', '--output', action='store', dest='output_cpp_file',
+ help='path to output cpp file')
+ parser.add_option('-i', '--input', action='store', dest='input_data_file',
+ help='input ICU data file, in common format (.dat)')
+ (options, args) = parser.parse_args(argv)
+ if len(args) > 1:
+ parser.error("too many arguments")
+ if options.output_cpp_file is None:
+ parser.error("output file unspecified")
+ if options.input_data_file is None:
+ parser.error("input ICU data file unspecified")
+ generate_cpp_file(options.input_data_file, options.output_cpp_file)
+
+def generate_cpp_file(data_file_path, cpp_file_path):
+ source_template = '''// AUTO-GENERATED FILE DO NOT EDIT
+// See generate_icu_init_cpp.py.
+/**
+ * Copyright 2016 MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects
+ * for all of the code used other than as permitted herein. If you modify
+ * file(s) with this exception, you may extend this exception to your
+ * version of the file(s), but you are not obligated to do so. If you do not
+ * wish to do so, delete this exception statement from your version. If you
+ * delete this exception statement from all source files in the program,
+ * then also delete it in the license file.
+ */
+
+#include "mongo/platform/basic.h"
+
+#include <unicode/udata.h>
+
+#include "mongo/base/init.h"
+#include "mongo/util/assert_util.h"
+
+namespace mongo {
+namespace {
+
+// alignas() is used here to ensure 16-alignment of ICU data. See the following excerpt from the
+// ICU user guide (<http://userguide.icu-project.org/icudata#TOC-Alignment>):
+//
+// "ICU data is designed to be 16-aligned, with natural alignment of values inside the data
+// structure, so that the data is usable as is when memory-mapped. Memory-mapping (as well as
+// memory allocation) provides at least 16-alignment on modern platforms. Some CPUs require
+// n-alignment of types of size n bytes (and crash on unaligned reads), other CPUs usually operate
+// faster on data that is aligned properly. Some of the ICU code explicitly checks for proper
+// alignment."
+alignas(16) const uint8_t kRawData[] = {%(decimal_encoded_data)s};
+
+} // namespace
+
+MONGO_INITIALIZER(LoadICUData)(InitializerContext* context) {
+ UErrorCode status = U_ZERO_ERROR;
+ udata_setCommonData(kRawData, &status);
+ fassert(40088, U_SUCCESS(status));
+ return Status::OK();
+}
+
+} // namespace mongo
+'''
+ decimal_encoded_data = ''
+ with open(data_file_path, 'rb') as data_file:
+ decimal_encoded_data = ','.join([str(ord(byte)) for byte in data_file.read()])
+ with open(cpp_file_path, 'wb') as cpp_file:
+ cpp_file.write(source_template % dict(decimal_encoded_data=decimal_encoded_data))
+
+if __name__ == '__main__':
+ main(sys.argv)