diff options
author | Jason Rassi <rassi@10gen.com> | 2016-05-09 17:24:19 -0400 |
---|---|---|
committer | Jason Rassi <rassi@10gen.com> | 2016-05-13 16:36:35 -0400 |
commit | f59eac414ed7e177a8514119c43794941d6b3c7b (patch) | |
tree | 83661b155a7a4d30e295a0c4016905bf11d8b283 | |
parent | 2c4b67364343eb7dc10528b66f5d6a3a70e60ee3 (diff) | |
download | mongo-f59eac414ed7e177a8514119c43794941d6b3c7b.tar.gz |
SERVER-22371 ICU data integration
-rw-r--r-- | src/mongo/db/query/collation/SConscript | 15 | ||||
-rwxr-xr-x | src/mongo/db/query/collation/generate_icu_init_cpp.py | 118 |
2 files changed, 133 insertions, 0 deletions
diff --git a/src/mongo/db/query/collation/SConscript b/src/mongo/db/query/collation/SConscript index d5bf4bb70ed..4cfdd785094 100644 --- a/src/mongo/db/query/collation/SConscript +++ b/src/mongo/db/query/collation/SConscript @@ -1,5 +1,6 @@ # -*- mode: python -*- +Import("endian") Import("env") Import("icuEnabled") @@ -131,11 +132,25 @@ env.CppUnitTest( ) if icuEnabled: + generateICUInit = env.Command( + target="icu_init.cpp", + source=[ + "generate_icu_init_cpp.py", + ("$BUILD_DIR/third_party/icu4c-57.1/source/mongo_sources/icudt57l.dat" + if endian == "little" + else "$BUILD_DIR/third_party/icu4c-57.1/source/mongo_sources/icudt57b.dat"), + ], + action="$PYTHON ${SOURCES[0]} -o $TARGET -i ${SOURCES[1]}", + ) + + env.Alias("generated-sources", generateICUInit) + env.Library( target="collator_icu", source=[ "collator_factory_icu.cpp", "collator_interface_icu.cpp", + "icu_init.cpp", ], LIBDEPS=[ "$BUILD_DIR/mongo/base", diff --git a/src/mongo/db/query/collation/generate_icu_init_cpp.py b/src/mongo/db/query/collation/generate_icu_init_cpp.py new file mode 100755 index 00000000000..8ae084aeec6 --- /dev/null +++ b/src/mongo/db/query/collation/generate_icu_init_cpp.py @@ -0,0 +1,118 @@ +#!/usr/bin/env python + +# Copyright 2016 MongoDB Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License, version 3, +# as published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +# As a special exception, the copyright holders give permission to link the +# code of portions of this program with the OpenSSL library under certain +# conditions as described in each individual source file and distribute +# linked combinations including the program with the OpenSSL library. You +# must comply with the GNU Affero General Public License in all respects +# for all of the code used other than as permitted herein. If you modify +# file(s) with this exception, you may extend this exception to your +# version of the file(s), but you are not obligated to do so. If you do not +# wish to do so, delete this exception statement from your version. If you +# delete this exception statement from all source files in the program, +# then also delete it in the license file. + +import optparse +import os +import sys + +def main(argv): + parser = optparse.OptionParser() + parser.add_option('-o', '--output', action='store', dest='output_cpp_file', + help='path to output cpp file') + parser.add_option('-i', '--input', action='store', dest='input_data_file', + help='input ICU data file, in common format (.dat)') + (options, args) = parser.parse_args(argv) + if len(args) > 1: + parser.error("too many arguments") + if options.output_cpp_file is None: + parser.error("output file unspecified") + if options.input_data_file is None: + parser.error("input ICU data file unspecified") + generate_cpp_file(options.input_data_file, options.output_cpp_file) + +def generate_cpp_file(data_file_path, cpp_file_path): + source_template = '''// AUTO-GENERATED FILE DO NOT EDIT +// See generate_icu_init_cpp.py. +/** + * Copyright 2016 MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include <unicode/udata.h> + +#include "mongo/base/init.h" +#include "mongo/util/assert_util.h" + +namespace mongo { +namespace { + +// alignas() is used here to ensure 16-alignment of ICU data. See the following excerpt from the +// ICU user guide (<http://userguide.icu-project.org/icudata#TOC-Alignment>): +// +// "ICU data is designed to be 16-aligned, with natural alignment of values inside the data +// structure, so that the data is usable as is when memory-mapped. Memory-mapping (as well as +// memory allocation) provides at least 16-alignment on modern platforms. Some CPUs require +// n-alignment of types of size n bytes (and crash on unaligned reads), other CPUs usually operate +// faster on data that is aligned properly. Some of the ICU code explicitly checks for proper +// alignment." +alignas(16) const uint8_t kRawData[] = {%(decimal_encoded_data)s}; + +} // namespace + +MONGO_INITIALIZER(LoadICUData)(InitializerContext* context) { + UErrorCode status = U_ZERO_ERROR; + udata_setCommonData(kRawData, &status); + fassert(40088, U_SUCCESS(status)); + return Status::OK(); +} + +} // namespace mongo +''' + decimal_encoded_data = '' + with open(data_file_path, 'rb') as data_file: + decimal_encoded_data = ','.join([str(ord(byte)) for byte in data_file.read()]) + with open(cpp_file_path, 'wb') as cpp_file: + cpp_file.write(source_template % dict(decimal_encoded_data=decimal_encoded_data)) + +if __name__ == '__main__': + main(sys.argv) |