diff options
author | Jason Rassi <rassi@10gen.com> | 2016-05-09 17:20:44 -0400 |
---|---|---|
committer | Jason Rassi <rassi@10gen.com> | 2016-05-13 16:36:34 -0400 |
commit | 2c4b67364343eb7dc10528b66f5d6a3a70e60ee3 (patch) | |
tree | b5aa784f53afdf756313278fa463af542debb650 /src/third_party | |
parent | d9a8e6a9db1bd48c2bbfb1ad18e7cb8e18eb7302 (diff) | |
download | mongo-2c4b67364343eb7dc10528b66f5d6a3a70e60ee3.tar.gz |
SERVER-22371 icu_get_sources.sh generates data, add generated data
Diffstat (limited to 'src/third_party')
-rw-r--r-- | src/third_party/icu4c-57.1/source/mongo_sources/icudt57b.dat | bin | 0 -> 2672864 bytes | |||
-rw-r--r-- | src/third_party/icu4c-57.1/source/mongo_sources/icudt57l.dat | bin | 0 -> 2672864 bytes | |||
-rw-r--r-- | src/third_party/icu4c-57.1/source/mongo_sources/languages.txt | 52 | ||||
-rwxr-xr-x | src/third_party/scripts/icu_get_sources.sh | 165 |
4 files changed, 166 insertions, 51 deletions
diff --git a/src/third_party/icu4c-57.1/source/mongo_sources/icudt57b.dat b/src/third_party/icu4c-57.1/source/mongo_sources/icudt57b.dat Binary files differnew file mode 100644 index 00000000000..74799950bb5 --- /dev/null +++ b/src/third_party/icu4c-57.1/source/mongo_sources/icudt57b.dat diff --git a/src/third_party/icu4c-57.1/source/mongo_sources/icudt57l.dat b/src/third_party/icu4c-57.1/source/mongo_sources/icudt57l.dat Binary files differnew file mode 100644 index 00000000000..da96c94e4a7 --- /dev/null +++ b/src/third_party/icu4c-57.1/source/mongo_sources/icudt57l.dat diff --git a/src/third_party/icu4c-57.1/source/mongo_sources/languages.txt b/src/third_party/icu4c-57.1/source/mongo_sources/languages.txt new file mode 100644 index 00000000000..05911c75573 --- /dev/null +++ b/src/third_party/icu4c-57.1/source/mongo_sources/languages.txt @@ -0,0 +1,52 @@ +# This file is taken as input to the ICU data packaging process, and contains the base names of all +# ICU locales that have collation data included in MongoDB's custom data file for ICU. +ar +bn +ca +cs +da +de +de_AT +el +en +en_US +en_US_POSIX +es +et +fa +fa_AF +fi +fil +fr +fr_CA +ga +he +hi +hr +hu +hy +id +is +it +ja +ka +ko +lt +lv +nn +pa +pl +ps +pt +ro +ru +sk +sl +sv +th +tr +uk +ur +vi +zh +zh_Hant diff --git a/src/third_party/scripts/icu_get_sources.sh b/src/third_party/scripts/icu_get_sources.sh index 41f1f18c3ba..e19cfd464db 100755 --- a/src/third_party/scripts/icu_get_sources.sh +++ b/src/third_party/scripts/icu_get_sources.sh @@ -1,65 +1,128 @@ #!/bin/bash -set -o verbose -set -o errexit -# This script fetches and creates a copy of sources for ICU. +# This script fetches sources for ICU and builds custom ICU data files (one big-endian data file +# and one-little endian data file). Both the sources and the data files are trimmed down for size. +# +# This script can be run from anywhere within the root of the source repository. This script +# expects the ICU third-party directory (src/third_party/icu4c-xx.yy/) to exist and contain a +# newline-separated language file in source/mongo_sources/languages.txt. This language file must +# list each locale for which collation data should be packaged as part of the generated custom data +# file. +# +# This script returns a zero exit code on success. + +set -euo pipefail +IFS=$'\n\t' + +if [ "$#" -ne 0 ]; then + echo "$0: too many arguments" >&2 + exit 1 +fi + +KERNEL="$(uname)" +if [ "$KERNEL" != Linux ]; then + echo "$0: kernel '$KERNEL' not supported" >&2 + exit 1 +fi NAME=icu4c MAJOR_VERSION=57 MINOR_VERSION=1 -VERSION=${MAJOR_VERSION}.${MINOR_VERSION} +VERSION="${MAJOR_VERSION}.${MINOR_VERSION}" -TARBALL=$NAME-$MAJOR_VERSION\_$MINOR_VERSION-src.tgz -TARBALL_DIR=icu -TARBALL_DEST_DIR=$NAME-$VERSION -TARBALL_DOWNLOAD_URL=http://download.icu-project.org/files/$NAME/$VERSION/$TARBALL +TARBALL="${NAME}-${MAJOR_VERSION}_${MINOR_VERSION}-src.tgz" +TARBALL_DOWNLOAD_URL="http://download.icu-project.org/files/${NAME}/${VERSION}/${TARBALL}" -TEMP_DIR=/tmp/temp-$NAME-$VERSION -DEST_DIR=$(git rev-parse --show-toplevel)/src/third_party/$NAME-$VERSION +ICU_THIRD_PARTY_DIR="$(git rev-parse --show-toplevel)/src/third_party/${NAME}-${VERSION}" +MONGO_SOURCES_DIR="${ICU_THIRD_PARTY_DIR}/source/mongo_sources" +LANGUAGE_FILE_IN="${MONGO_SOURCES_DIR}/languages.txt" +ICU_DATA_FILE_LITTLE_ENDIAN_OUT="${MONGO_SOURCES_DIR}/icudt${MAJOR_VERSION}l.dat" +ICU_DATA_FILE_BIG_ENDIAN_OUT="${MONGO_SOURCES_DIR}/icudt${MAJOR_VERSION}b.dat" -# Download and extract tarball. -if [ ! -f $TARBALL ]; then - echo "Get tarball" - wget $TARBALL_DOWNLOAD_URL -fi +# +# Set up temp directory. +# -echo $TARBALL -tar -zxvf $TARBALL +TEMP_DIR="$(mktemp -d /tmp/icu.XXXXXX)" +trap "rm -rf $TEMP_DIR" EXIT -# Move extracted files to a temporary directory. -rm -rf $TEMP_DIR -mv $TARBALL_DIR $TEMP_DIR +TARBALL_DIR="${TEMP_DIR}/tarball" +INSTALL_DIR="${TEMP_DIR}/install" +DATA_DIR="${TEMP_DIR}/data" +mkdir "$TARBALL_DIR" "$INSTALL_DIR" "$DATA_DIR" -# If the SConscript for building ICU already exists, move it into the temporary directory. -if [ -f $DEST_DIR/source/SConscript ]; then - echo "Saving SConscript" - mv $DEST_DIR/source/SConscript $TEMP_DIR/source - rm -rf $DEST_DIR -fi +# +# Download and extract tarball into temp directory. +# -# Copy all sources into their proper place in the mongo source tree. -if [ ! -d $DEST_DIR ]; then - mkdir $DEST_DIR -fi +cd "$TEMP_DIR" +wget "$TARBALL_DOWNLOAD_URL" +tar --strip-components=1 -C "$TARBALL_DIR" -zxf "$TARBALL" + +# +# Build and install ICU in temp directory, in order to use data packaging tools. +# + +cd "${TARBALL_DIR}/source" +./runConfigureICU "$KERNEL" --prefix="${TEMP_DIR}/install" +make -j +make install + +# +# Generate trimmed-down list of data to include in custom data files. +# + +ORIGINAL_DATA_FILE="${TARBALL_DIR}/source/data/in/icudt${MAJOR_VERSION}l.dat" +ORIGINAL_DATA_LIST="${DATA_DIR}/icudt${MAJOR_VERSION}l.lst.orig" +NEW_DATA_LIST="${DATA_DIR}/icudt${MAJOR_VERSION}l.lst" + +LD_LIBRARY_PATH= eval $("${INSTALL_DIR}/bin/icu-config" --invoke=icupkg) -l "$ORIGINAL_DATA_FILE" \ + > "$ORIGINAL_DATA_LIST" + +DESIRED_DATA_DIRECTORIES="coll" +BASE_FILES="root.res +ucadata.icu" +for DESIRED_DATA_DIRECTORY in $DESIRED_DATA_DIRECTORIES; do + for BASE_FILE in $BASE_FILES; do + # Using grep to sanity-check that the file indeed appears in the original data list. + grep -E "^${DESIRED_DATA_DIRECTORY}/${BASE_FILE}$" "$ORIGINAL_DATA_LIST" >> "$NEW_DATA_LIST" + done + for LANGUAGE in $(grep -Ev "^#" "$LANGUAGE_FILE_IN"); do + # Ditto above. + grep -E "^${DESIRED_DATA_DIRECTORY}/${LANGUAGE}.res$" "$ORIGINAL_DATA_LIST" \ + >> "$NEW_DATA_LIST" + done +done + +# +# Extract desired data, and use it to build custom data files. +# + +LD_LIBRARY_PATH= eval $("${INSTALL_DIR}/bin/icu-config" --invoke=icupkg) -d "$DATA_DIR" \ + -x "$NEW_DATA_LIST" "$ORIGINAL_DATA_FILE" +LD_LIBRARY_PATH= eval $("${INSTALL_DIR}/bin/icu-config" --invoke=icupkg) -s "$DATA_DIR" \ + -a "$NEW_DATA_LIST" -tl new "$ICU_DATA_FILE_LITTLE_ENDIAN_OUT" +LD_LIBRARY_PATH= eval $("${INSTALL_DIR}/bin/icu-config" --invoke=icupkg) -s "$DATA_DIR" \ + -a "$NEW_DATA_LIST" -tb new "$ICU_DATA_FILE_BIG_ENDIAN_OUT" + +# +# Re-extract pristine sources into final destination, prune unneeded sources. +# -cp -r $TEMP_DIR/* $DEST_DIR || true - -# Prune sources. -rm -f $DEST_DIR/source/*.in # Build system. -rm -f $DEST_DIR/source/*.m4 # Build system. -rm -f $DEST_DIR/source/install-sh # Build system. -rm -f $DEST_DIR/source/mkinstalldirs # Build system. -rm -f $DEST_DIR/source/runConfigureICU # Build system. -rm -rf $DEST_DIR/as_is/ # Scripts. -rm -rf $DEST_DIR/source/allinone/ # Workspace and project files. -rm -rf $DEST_DIR/source/config* # Build system. -rm -rf $DEST_DIR/source/data/ # Source data. -rm -rf $DEST_DIR/source/extra/ # Non-supported API additions. -rm -rf $DEST_DIR/source/io/ # ICU I/O library. -rm -rf $DEST_DIR/source/layout/ # ICU complex text layout engine. -rm -rf $DEST_DIR/source/layoutex/ # ICU paragraph layout engine. -rm -rf $DEST_DIR/source/samples/ # Sample programs. -rm -rf $DEST_DIR/source/test/ # Test suites. -rm -rf $DEST_DIR/source/tools/ # Tools for generating the data files. - -echo "Done" +tar --strip-components=1 -C "$ICU_THIRD_PARTY_DIR" -zxf "${TEMP_DIR}/${TARBALL}" +rm -f ${ICU_THIRD_PARTY_DIR}/source/*.in # Build system. +rm -f ${ICU_THIRD_PARTY_DIR}/source/*.m4 # Build system. +rm -f ${ICU_THIRD_PARTY_DIR}/source/install-sh # Build system. +rm -f ${ICU_THIRD_PARTY_DIR}/source/mkinstalldirs # Build system. +rm -f ${ICU_THIRD_PARTY_DIR}/source/runConfigureICU # Build system. +rm -rf ${ICU_THIRD_PARTY_DIR}/as_is/ # Scripts. +rm -rf ${ICU_THIRD_PARTY_DIR}/source/allinone/ # Workspace and project files. +rm -rf ${ICU_THIRD_PARTY_DIR}/source/config* # Build system. +rm -rf ${ICU_THIRD_PARTY_DIR}/source/data/ # Source data. +rm -rf ${ICU_THIRD_PARTY_DIR}/source/extra/ # Non-supported API additions. +rm -rf ${ICU_THIRD_PARTY_DIR}/source/io/ # ICU I/O library. +rm -rf ${ICU_THIRD_PARTY_DIR}/source/layout/ # ICU complex text layout engine. +rm -rf ${ICU_THIRD_PARTY_DIR}/source/layoutex/ # ICU paragraph layout engine. +rm -rf ${ICU_THIRD_PARTY_DIR}/source/samples/ # Sample programs. +rm -rf ${ICU_THIRD_PARTY_DIR}/source/test/ # Test suites. +rm -rf ${ICU_THIRD_PARTY_DIR}/source/tools/ # Tools for generating the data files. |