summaryrefslogtreecommitdiff
path: root/src/third_party
diff options
context:
space:
mode:
authorJason Rassi <rassi@10gen.com>2016-05-09 17:20:44 -0400
committerJason Rassi <rassi@10gen.com>2016-05-13 16:36:34 -0400
commit2c4b67364343eb7dc10528b66f5d6a3a70e60ee3 (patch)
treeb5aa784f53afdf756313278fa463af542debb650 /src/third_party
parentd9a8e6a9db1bd48c2bbfb1ad18e7cb8e18eb7302 (diff)
downloadmongo-2c4b67364343eb7dc10528b66f5d6a3a70e60ee3.tar.gz
SERVER-22371 icu_get_sources.sh generates data, add generated data
Diffstat (limited to 'src/third_party')
-rw-r--r--src/third_party/icu4c-57.1/source/mongo_sources/icudt57b.datbin0 -> 2672864 bytes
-rw-r--r--src/third_party/icu4c-57.1/source/mongo_sources/icudt57l.datbin0 -> 2672864 bytes
-rw-r--r--src/third_party/icu4c-57.1/source/mongo_sources/languages.txt52
-rwxr-xr-xsrc/third_party/scripts/icu_get_sources.sh165
4 files changed, 166 insertions, 51 deletions
diff --git a/src/third_party/icu4c-57.1/source/mongo_sources/icudt57b.dat b/src/third_party/icu4c-57.1/source/mongo_sources/icudt57b.dat
new file mode 100644
index 00000000000..74799950bb5
--- /dev/null
+++ b/src/third_party/icu4c-57.1/source/mongo_sources/icudt57b.dat
Binary files differ
diff --git a/src/third_party/icu4c-57.1/source/mongo_sources/icudt57l.dat b/src/third_party/icu4c-57.1/source/mongo_sources/icudt57l.dat
new file mode 100644
index 00000000000..da96c94e4a7
--- /dev/null
+++ b/src/third_party/icu4c-57.1/source/mongo_sources/icudt57l.dat
Binary files differ
diff --git a/src/third_party/icu4c-57.1/source/mongo_sources/languages.txt b/src/third_party/icu4c-57.1/source/mongo_sources/languages.txt
new file mode 100644
index 00000000000..05911c75573
--- /dev/null
+++ b/src/third_party/icu4c-57.1/source/mongo_sources/languages.txt
@@ -0,0 +1,52 @@
+# This file is taken as input to the ICU data packaging process, and contains the base names of all
+# ICU locales that have collation data included in MongoDB's custom data file for ICU.
+ar
+bn
+ca
+cs
+da
+de
+de_AT
+el
+en
+en_US
+en_US_POSIX
+es
+et
+fa
+fa_AF
+fi
+fil
+fr
+fr_CA
+ga
+he
+hi
+hr
+hu
+hy
+id
+is
+it
+ja
+ka
+ko
+lt
+lv
+nn
+pa
+pl
+ps
+pt
+ro
+ru
+sk
+sl
+sv
+th
+tr
+uk
+ur
+vi
+zh
+zh_Hant
diff --git a/src/third_party/scripts/icu_get_sources.sh b/src/third_party/scripts/icu_get_sources.sh
index 41f1f18c3ba..e19cfd464db 100755
--- a/src/third_party/scripts/icu_get_sources.sh
+++ b/src/third_party/scripts/icu_get_sources.sh
@@ -1,65 +1,128 @@
#!/bin/bash
-set -o verbose
-set -o errexit
-# This script fetches and creates a copy of sources for ICU.
+# This script fetches sources for ICU and builds custom ICU data files (one big-endian data file
+# and one-little endian data file). Both the sources and the data files are trimmed down for size.
+#
+# This script can be run from anywhere within the root of the source repository. This script
+# expects the ICU third-party directory (src/third_party/icu4c-xx.yy/) to exist and contain a
+# newline-separated language file in source/mongo_sources/languages.txt. This language file must
+# list each locale for which collation data should be packaged as part of the generated custom data
+# file.
+#
+# This script returns a zero exit code on success.
+
+set -euo pipefail
+IFS=$'\n\t'
+
+if [ "$#" -ne 0 ]; then
+ echo "$0: too many arguments" >&2
+ exit 1
+fi
+
+KERNEL="$(uname)"
+if [ "$KERNEL" != Linux ]; then
+ echo "$0: kernel '$KERNEL' not supported" >&2
+ exit 1
+fi
NAME=icu4c
MAJOR_VERSION=57
MINOR_VERSION=1
-VERSION=${MAJOR_VERSION}.${MINOR_VERSION}
+VERSION="${MAJOR_VERSION}.${MINOR_VERSION}"
-TARBALL=$NAME-$MAJOR_VERSION\_$MINOR_VERSION-src.tgz
-TARBALL_DIR=icu
-TARBALL_DEST_DIR=$NAME-$VERSION
-TARBALL_DOWNLOAD_URL=http://download.icu-project.org/files/$NAME/$VERSION/$TARBALL
+TARBALL="${NAME}-${MAJOR_VERSION}_${MINOR_VERSION}-src.tgz"
+TARBALL_DOWNLOAD_URL="http://download.icu-project.org/files/${NAME}/${VERSION}/${TARBALL}"
-TEMP_DIR=/tmp/temp-$NAME-$VERSION
-DEST_DIR=$(git rev-parse --show-toplevel)/src/third_party/$NAME-$VERSION
+ICU_THIRD_PARTY_DIR="$(git rev-parse --show-toplevel)/src/third_party/${NAME}-${VERSION}"
+MONGO_SOURCES_DIR="${ICU_THIRD_PARTY_DIR}/source/mongo_sources"
+LANGUAGE_FILE_IN="${MONGO_SOURCES_DIR}/languages.txt"
+ICU_DATA_FILE_LITTLE_ENDIAN_OUT="${MONGO_SOURCES_DIR}/icudt${MAJOR_VERSION}l.dat"
+ICU_DATA_FILE_BIG_ENDIAN_OUT="${MONGO_SOURCES_DIR}/icudt${MAJOR_VERSION}b.dat"
-# Download and extract tarball.
-if [ ! -f $TARBALL ]; then
- echo "Get tarball"
- wget $TARBALL_DOWNLOAD_URL
-fi
+#
+# Set up temp directory.
+#
-echo $TARBALL
-tar -zxvf $TARBALL
+TEMP_DIR="$(mktemp -d /tmp/icu.XXXXXX)"
+trap "rm -rf $TEMP_DIR" EXIT
-# Move extracted files to a temporary directory.
-rm -rf $TEMP_DIR
-mv $TARBALL_DIR $TEMP_DIR
+TARBALL_DIR="${TEMP_DIR}/tarball"
+INSTALL_DIR="${TEMP_DIR}/install"
+DATA_DIR="${TEMP_DIR}/data"
+mkdir "$TARBALL_DIR" "$INSTALL_DIR" "$DATA_DIR"
-# If the SConscript for building ICU already exists, move it into the temporary directory.
-if [ -f $DEST_DIR/source/SConscript ]; then
- echo "Saving SConscript"
- mv $DEST_DIR/source/SConscript $TEMP_DIR/source
- rm -rf $DEST_DIR
-fi
+#
+# Download and extract tarball into temp directory.
+#
-# Copy all sources into their proper place in the mongo source tree.
-if [ ! -d $DEST_DIR ]; then
- mkdir $DEST_DIR
-fi
+cd "$TEMP_DIR"
+wget "$TARBALL_DOWNLOAD_URL"
+tar --strip-components=1 -C "$TARBALL_DIR" -zxf "$TARBALL"
+
+#
+# Build and install ICU in temp directory, in order to use data packaging tools.
+#
+
+cd "${TARBALL_DIR}/source"
+./runConfigureICU "$KERNEL" --prefix="${TEMP_DIR}/install"
+make -j
+make install
+
+#
+# Generate trimmed-down list of data to include in custom data files.
+#
+
+ORIGINAL_DATA_FILE="${TARBALL_DIR}/source/data/in/icudt${MAJOR_VERSION}l.dat"
+ORIGINAL_DATA_LIST="${DATA_DIR}/icudt${MAJOR_VERSION}l.lst.orig"
+NEW_DATA_LIST="${DATA_DIR}/icudt${MAJOR_VERSION}l.lst"
+
+LD_LIBRARY_PATH= eval $("${INSTALL_DIR}/bin/icu-config" --invoke=icupkg) -l "$ORIGINAL_DATA_FILE" \
+ > "$ORIGINAL_DATA_LIST"
+
+DESIRED_DATA_DIRECTORIES="coll"
+BASE_FILES="root.res
+ucadata.icu"
+for DESIRED_DATA_DIRECTORY in $DESIRED_DATA_DIRECTORIES; do
+ for BASE_FILE in $BASE_FILES; do
+ # Using grep to sanity-check that the file indeed appears in the original data list.
+ grep -E "^${DESIRED_DATA_DIRECTORY}/${BASE_FILE}$" "$ORIGINAL_DATA_LIST" >> "$NEW_DATA_LIST"
+ done
+ for LANGUAGE in $(grep -Ev "^#" "$LANGUAGE_FILE_IN"); do
+ # Ditto above.
+ grep -E "^${DESIRED_DATA_DIRECTORY}/${LANGUAGE}.res$" "$ORIGINAL_DATA_LIST" \
+ >> "$NEW_DATA_LIST"
+ done
+done
+
+#
+# Extract desired data, and use it to build custom data files.
+#
+
+LD_LIBRARY_PATH= eval $("${INSTALL_DIR}/bin/icu-config" --invoke=icupkg) -d "$DATA_DIR" \
+ -x "$NEW_DATA_LIST" "$ORIGINAL_DATA_FILE"
+LD_LIBRARY_PATH= eval $("${INSTALL_DIR}/bin/icu-config" --invoke=icupkg) -s "$DATA_DIR" \
+ -a "$NEW_DATA_LIST" -tl new "$ICU_DATA_FILE_LITTLE_ENDIAN_OUT"
+LD_LIBRARY_PATH= eval $("${INSTALL_DIR}/bin/icu-config" --invoke=icupkg) -s "$DATA_DIR" \
+ -a "$NEW_DATA_LIST" -tb new "$ICU_DATA_FILE_BIG_ENDIAN_OUT"
+
+#
+# Re-extract pristine sources into final destination, prune unneeded sources.
+#
-cp -r $TEMP_DIR/* $DEST_DIR || true
-
-# Prune sources.
-rm -f $DEST_DIR/source/*.in # Build system.
-rm -f $DEST_DIR/source/*.m4 # Build system.
-rm -f $DEST_DIR/source/install-sh # Build system.
-rm -f $DEST_DIR/source/mkinstalldirs # Build system.
-rm -f $DEST_DIR/source/runConfigureICU # Build system.
-rm -rf $DEST_DIR/as_is/ # Scripts.
-rm -rf $DEST_DIR/source/allinone/ # Workspace and project files.
-rm -rf $DEST_DIR/source/config* # Build system.
-rm -rf $DEST_DIR/source/data/ # Source data.
-rm -rf $DEST_DIR/source/extra/ # Non-supported API additions.
-rm -rf $DEST_DIR/source/io/ # ICU I/O library.
-rm -rf $DEST_DIR/source/layout/ # ICU complex text layout engine.
-rm -rf $DEST_DIR/source/layoutex/ # ICU paragraph layout engine.
-rm -rf $DEST_DIR/source/samples/ # Sample programs.
-rm -rf $DEST_DIR/source/test/ # Test suites.
-rm -rf $DEST_DIR/source/tools/ # Tools for generating the data files.
-
-echo "Done"
+tar --strip-components=1 -C "$ICU_THIRD_PARTY_DIR" -zxf "${TEMP_DIR}/${TARBALL}"
+rm -f ${ICU_THIRD_PARTY_DIR}/source/*.in # Build system.
+rm -f ${ICU_THIRD_PARTY_DIR}/source/*.m4 # Build system.
+rm -f ${ICU_THIRD_PARTY_DIR}/source/install-sh # Build system.
+rm -f ${ICU_THIRD_PARTY_DIR}/source/mkinstalldirs # Build system.
+rm -f ${ICU_THIRD_PARTY_DIR}/source/runConfigureICU # Build system.
+rm -rf ${ICU_THIRD_PARTY_DIR}/as_is/ # Scripts.
+rm -rf ${ICU_THIRD_PARTY_DIR}/source/allinone/ # Workspace and project files.
+rm -rf ${ICU_THIRD_PARTY_DIR}/source/config* # Build system.
+rm -rf ${ICU_THIRD_PARTY_DIR}/source/data/ # Source data.
+rm -rf ${ICU_THIRD_PARTY_DIR}/source/extra/ # Non-supported API additions.
+rm -rf ${ICU_THIRD_PARTY_DIR}/source/io/ # ICU I/O library.
+rm -rf ${ICU_THIRD_PARTY_DIR}/source/layout/ # ICU complex text layout engine.
+rm -rf ${ICU_THIRD_PARTY_DIR}/source/layoutex/ # ICU paragraph layout engine.
+rm -rf ${ICU_THIRD_PARTY_DIR}/source/samples/ # Sample programs.
+rm -rf ${ICU_THIRD_PARTY_DIR}/source/test/ # Test suites.
+rm -rf ${ICU_THIRD_PARTY_DIR}/source/tools/ # Tools for generating the data files.