diff options
98 files changed, 4253 insertions, 1348 deletions
diff --git a/BUILD/build_mccge.sh b/BUILD/build_mccge.sh index fc0f8181692..cbc2e895815 100755 --- a/BUILD/build_mccge.sh +++ b/BUILD/build_mccge.sh @@ -63,10 +63,12 @@ sysadmin_usage() cat <<EOF This script can be used to build MySQL Cluster Carrier Grade Edition - based on a source code release you received from MySQL. + based on a source code release you received from MySQL. It can also + be used to build many variants other variants of MySQL, in particular + various performance-optimised versions of MySQL. It is assumed that you are building on a computer which is of the - same type as that on which you intend to run MySQL Cluster. + same type as that on which you intend to run MySQL/MySQL Cluster. The simplest possible way to run this script is to allow it to use the built-in defaults everywhere, invoking it simply as: @@ -75,29 +77,35 @@ cat <<EOF This performs the following operations: 1) Detects the operating system. Currently, Linux, FreeBSD, Solaris - 10/11, and Mac OS X are supported by this script. + 8/9/10/11, and Mac OS X are supported by this script. 2) Detect the type of CPU being used. Currently supported processors are: x86 for all supported operating systems, Itanium for Linux - with GCC, and SPARC for Solaris using the Forte compiler. + with GCC, and x86 + SPARC for Solaris using the Forte compiler and + finally x86 on Linux using the Intel compiler. 3) Invokes the GCC compiler. - 4) Builds a set of MySQL Cluster Carrier Grade Edition binaries; for + 4) Builds a set of MySQL/MySQL Cluster binaries; for more information about these, see --extended-help. + 5) Default compiler is always gcc. The default version assumes that you have a source code tarball from which you are building, and thus autoconf and automake do not need to be run. If you have downloaded a BitKeeper tree then you should read --developer-help. - If you are building MySQL Cluster Carrier Grade Edition for commercial + If you are building MySQL/MySQL Cluster for commercial use then you need to set the --commercial flag to ensure that the commercial libraries are compiled in, rather than the GPL-only libraries. The default is to build a GPL version of MySQL Cluster Carrier Grade Edition. - If your building on a Solaris SPARC machine you must set + If your building on a Solaris SPARC machine and you want to compile + using SunStudio you must set --compiler=forte; if you want to build using the Intel compiler on Linux, you need to set --compiler=icc. + A synonym for forte is SunStudio, so one can also use + --compiler=SunStudio. + If you want to make sure that a 64-bit version is built then you should add the flag --64. This is always set on Solaris machines and when check-cpu is able to discover that a 64-bit CPU is being used. If @@ -133,6 +141,7 @@ Usage: $0 [options] --help Show this help message. --sysadmin-help Show help for system administrators wishing to build MySQL Cluster Carrier Grade Edition + or other MySQL versions. --developer-help Show help for developers trying to build MySQL --with-help Show extended help on --with-xxx options to configure @@ -155,10 +164,10 @@ Usage: $0 [options] MySQL use --commercial Use commercial libraries --gpl Use gpl libraries - --compiler=[gcc|icc|forte] Select compiler - --cpu=[x86|x86_64|sparc] Select CPU type - x86 => 32-bit binary - x86_64 => 64 bit binary unless Mac OS X + --compiler=[gcc|icc|forte|SunStudio] Select compiler + --cpu=[x86|x86_64|sparc|itanium] Select CPU type + x86 => x86 and 32-bit binary + x86_64 => x86 and 64 bit binary --warning-mode=[extra|pedantic|normal|no] Set warning mode level --warnings Set warning mode to normal --32 Build a 32-bit binary even if CPU is 64-bit @@ -170,8 +179,9 @@ Usage: $0 [options] --error-inject Enable error injection into MySQL Server and data nodes --valgrind Build with valgrind - --fast Optimise for CPU architecture buildt on + --fast Optimise for CPU architecture built on --static-linking Statically link system libraries into binaries + --use-tcmalloc Link with tcmalloc instead of standard malloc (Linux only) --with-flags * Pass extra --with-xxx options to configure EOF if test "x$1" != "x" ; then @@ -186,13 +196,14 @@ extended_usage() Extended help text for this script: ----------------------------------- This script is intended to make it easier for customers using MySQL - Cluster Carrier Grade Edition to build the product from source on - these platforms/compilers: Linux/x86 (32-bit and 64-bit), - Solaris 10 and 11/x86/gcc, Solaris 9/Sparc/Forte, and MacOSX/x86/gcc. - The script automatically detects CPU type and operating system; in - most cases this also determines which compiler to use, the exception - being Linux/x86 where you can choose between gcc and icc (gcc is the - default). + Cluster Carrier Grade Edition, customers using performance-optimised + MySQL versions and developers to build the product from source on + these platforms/compilers: Linux/x86 (32-bit and 64-bit) (either using + gcc or icc), Linux Itanium, Solaris 8,9,10 and 11 x86 and SPARC using + gcc or SunStudio and MacOSX/x86/gcc. + + The script automatically detects CPU type and operating system; The + default compiler is always gcc. To build on other platforms you can use the --print-only option on a supported platform and edit the output for a proper set of commands on @@ -213,7 +224,7 @@ extended_usage() --package=cge storage engines: - ARCHIVE, BLACKHOLE, CSV, EXAMPLE, FEDERATED, MYISAM, NDB + ARCHIVE, BLACKHOLE, CSV, FEDERATED, MYISAM, NDB (All storage engines except InnoDB) comment: MySQL Cluster Carrier Grade Edition GPL/Commercial version built from source @@ -221,7 +232,7 @@ extended_usage() --package=extended storage engines: - ARCHIVE, BLACKHOLE, CSV, EXAMPLE, FEDERATED, MYISAM, INNODB, NDB + ARCHIVE, BLACKHOLE, CSV, FEDERATED, MYISAM, INNODB, NDB (All storage engines) comment: MySQL Cluster Carrier Grade Extended Edition GPL/Commercial version built from source @@ -229,7 +240,7 @@ extended_usage() --package=pro storage engines: - ARCHIVE, BLACKHOLE, CSV, EXAMPLE, FEDERATED, INNODB, MYISAM + ARCHIVE, BLACKHOLE, CSV, FEDERATED, INNODB, MYISAM (All storage engines except NDB) comment: MySQL Pro GPL/Commercial version built from source @@ -296,6 +307,10 @@ extended_usage() --with-pic: Build all binaries using position independent assembler to avoid problems with dynamic linkers (cannot be overridden). + --without-example-engine: Ensure that the example engine isn't built, + it cannot do any useful things, it's merely intended as documentation. + (cannot be overridden) + --with-csv-storage-engine: Ensure that the CSV storage engine is included in all builds. Since CSV is required for log tables in MySQL 5.1, this option cannot be overridden. @@ -314,10 +329,6 @@ extended_usage() In addition there are some configure options that are specific to Linux operating systems: - --with-fast-mutexes - Include an alternative implementation of mutexes that is faster on - Linux systems - --enable-assembler Include assembler code optimisations for a number of mostly string methods. Used for x86 processors only. @@ -364,18 +375,25 @@ extended_usage() --with-mysqld-libs=-lmtmalloc Used on Solaris to ensure that the proper malloc library is used. + Investigations have shown mtmalloc to be the best choice on Solaris, + also umem has good performance on Solaris but better debugging + capabilities. Compiler options: ----------------- This section describes the compiler options for each of the different - platforms supported by thisscript. + platforms supported by this script. The --fast option adds -mtune=cpu_arg to the C/C++ flags (provides support for Nocona, K8, and other processors). Use of the --debug option adds -g to the C/C++ flags. + In all cases it is possible to override the definition of CC and CXX + by calling the script as follows: + CC="/usr/local/bin/gcc" CXX="/usr/local/bin/gcc" BUILD/build_mccge.sh + FreeBSD/x86/gcc --------------- No flags are used. Instead, configure determines the proper flags to @@ -383,8 +401,7 @@ extended_usage() Linux/x86+Itanium/gcc ------------- - No flags are used. Instead the configure script determines the - proper flags to use for both normal and debug builds. Discovery of a + For debug builds -O is used and otherwise -O3 is used.Discovery of a Nocona or Core 2 Duo CPU causes a 64-bit binary to be built; otherwise, the binary is 32-bit. To build a 64-bit binary, -m64 is added to the C/C++ flags. (To build a 32-bit binary on a 64-bit CPU, @@ -393,11 +410,11 @@ extended_usage() Linux/x86+Itanium/icc ------------- Flags used: - CC = icc -static-libgcc -static-libcxa -i-static - C++ = icpc -static-libgcc -static-libcxa -i-static + CC = icc -static-libgcc -static-intel + C++ = icpc -static-libgcc -static-intel C/C++ flags = -mp -restrict - On Itanium we also add -no-ftz and -no-prefetch to CC and C++ flags. + On Itanium we also add -no-ftz and to CC and C++ flags. The non-debug versions also add the following: C/C++ flags += -O3 unroll2 -ip @@ -411,20 +428,60 @@ extended_usage() Solaris/x86/gcc --------------- - All builds on Solaris are 64-bit, so -m64 is always used in the - C/C++ flags. LDFLAGS is set to -m64 -static-libgcc -O/-O2. + All builds on Solaris are by default 64-bit, so -m64 is always used in + the C/C++ flags. LDFLAGS is set to -m64 -O/-O2/-O3. If for + some reason a 32-bit Solaris is used it is necessary to add the flag + --32 to the script invocation. Due to bugs in compiling with -O3 on + Solaris only -O2 is used by default, when --fast flag is used -O3 will + be used instead. + + Sets -m64 (default) or -m32 (if specifically set) in LDFLAGS and + C/C++ flags. Solaris/Sparc/Forte ------------------- - Uses cc-5.0 as CC - Sets ASFLAGS=LDFLAGS=xarch=v9, so that we compile Sparc v9 binaries - C flags = -Xa -strconst -xc99=none + Uses cc as CC and CC as CXX + Note that SunStudio uses different binaries for C and C++ compilers. + + Set -m64 (default) or -m32 (if specifically set) in ASFLAGS, + LDFLAGS and C/C++ flags. + + Sets ASFLAGS=LDFLAGS=compiler flags=xarch=sparc, so that we compile + Sparc v9 binaries, also -mt is set in all those since we're always + building a multithreaded program. + + C flags = -xstrconst C++ flags = -noex - C/C++ flags = -mt -D_FORTEC -xarch=v9 - For non-debug builds, the following flags are also used: + Set the following C/C++ flags: + -fsimple=1 + -ftrap=%none + -nofstore This flag is set only on x86 + -xbuiltin=%all + -xlibmil + -xlibmopt + + Set the C++ flag: + -noex + + When compiling with fast we set: + C/C++ flags: -xtarget=native -xunroll=3 -xipo + LDFLAGS: -xipo + + When not compiling with fast we always set -xtarget=generic - C/C++ flags = -xO3 + When compiling with fast on SPARC we also set: + C/C++ flags: -xbinopt=prepare + LDFLAGS: -xbinopt=prepare + + When compiling with fast on x86 we also set: + C/C++ flags: -xregs=frameptr + + The optimisation level is + -xO Debug builds + -xO2 Production build on SPARC + -xO3 Production build on x86 + -xO4 Fast builds on SPARC/x86 MacOSX/x86/gcc -------------- @@ -433,6 +490,10 @@ extended_usage() Non-debug versions also add -Os -felide-constructors, where "-Os" means the build is space-optimised as long as the space optimisations do not negatively affect performance. Debug versions use -O. + + Mac OS X builds will always be 32-bit by default, when --64 is added + the build will be 64 bit instead. Thus the flag --m64 is added only + when specifically given as an option. EOF } with_usage() @@ -537,7 +598,7 @@ parse_cpu_type() case "$cpu_type" in x86 ) cpu_type="x86" - m32="yes" + m64="no" ;; x86_64 ) cpu_type="x86" @@ -572,6 +633,9 @@ parse_compiler() forte ) compiler="forte" ;; + SunStudio | sunstudio ) + compiler="forte" + ;; *) echo "Unknown compiler '$compiler'" exit 1 @@ -601,6 +665,9 @@ parse_options() fast_flag="generic" fi ;; + --use-tcmalloc) + use_tcmalloc="yes" + ;; --with-debug) with_debug_flag="yes" fast_flag="no" @@ -640,10 +707,10 @@ parse_options() echo "Cannot set both --32 and --64" exit 1 fi - m32="yes" + m64="no" ;; --64) - if test "x$m32" != "x" ; then + if test "x$m64" != "x" ; then echo "Cannot set both --32 and --64" exit 1 fi @@ -750,7 +817,7 @@ set_cpu_base() if test "x$cpu_type" = "x" ; then if test "x$cpu_arg" = "x" ; then usage "CPU type not discovered, cannot proceed" - return 1 + exit 1 fi case "$cpu_arg" in core2 | nocona | prescott | pentium* | i*86 ) @@ -775,14 +842,18 @@ set_cpu_base() check_cpu_cflags="" fi if test "x$os" = "xMacOSX" ; then - m64="no" + if test "x$m64" = "x" ; then + m64="no" + fi elif test "x$os" = "xSolaris" ; then - m64="yes" - elif test "x$m32" = "x" ; then + if test "x$m64" = "x" ; then + m64="yes" + fi + elif test "x$m64" = "x" ; then if test "x$cpu_arg" = "xnocona" || test "x$cpu_arg" = "xcore2" || \ test "x$cpu_arg" = "xathlon64" || test "x$cpu_arg" = "xopteron" ; then m64="yes" - elif test "x$m64" != "xyes" ; then + else m64="no" fi else @@ -806,18 +877,15 @@ init_configure_commands() cxxflags="$cxx_warnings $base_cxxflags $compiler_flags" configure="./configure $base_configs $with_flags" - commands="$commands - CC=\"$CC\" CFLAGS=\"$cflags\" CXX=\"$CXX\" CXXFLAGS=\"$cxxflags\"" + flags="CC=\"$CC\" CFLAGS=\"$cflags\" CXX=\"$CXX\" CXXFLAGS=\"$cxxflags\"" if test "x$LDFLAGS" != "x" ; then - commands="$commands - LDFLAGS=\"$LDFLAGS\"" + flags="$flags LDFLAGS=\"$LDFLAGS\"" fi if test "x$ASFLAGS" != "x" ; then - commands="$commands - ASFLAGS=\"$ASFLAGS\"" + flags="$flags ASFLAGS=\"$ASFLAGS\"" fi commands="$commands - $configure" + $flags $configure" } # @@ -920,7 +988,7 @@ set_libtoolize_version() # We do not use ccache when gcov is used. Also only when # gcc is used. # -set_up_ccache() +set_ccache_usage() { if test "x$compiler" = "xgcc" ; then if ccache -V > /dev/null 2>&1 && test "$USING_GCOV" != "1" @@ -993,7 +1061,7 @@ set_with_debug_flags() if test "x$with_debug_flag" = "xyes" ; then if test "x$developer_flag" = "xyes" ; then loc_debug_flags="-DUNIV_MUST_NOT_INLINE -DEXTRA_DEBUG -DFORCE_INIT_OF_VARS " - loc_debug_flags="$loc_debug_cflags -DSAFEMALLOC -DPEDANTIC_SAFEMALLOC" + loc_debug_flags="$loc_debug_flags -DSAFEMALLOC -DPEDANTIC_SAFEMALLOC" compiler_flags="$compiler_flags $loc_debug_flags" fi fi @@ -1046,7 +1114,7 @@ set_base_configs() base_configs="$base_configs --enable-local-infile" base_configs="$base_configs --enable-thread-safe-client" base_configs="$base_configs --with-big-tables" - base_configs="$base_configs --with-extra-charsets=all" + base_configs="$base_configs --with-extra-charsets=complex" base_configs="$base_configs --with-ssl" base_configs="$base_configs --with-pic" base_configs="$base_configs --with-csv-storage-engine" @@ -1059,17 +1127,27 @@ set_base_configs() # set_base_engines() { - engine_configs="$engine_configs --with-archive-storage-engine" + engine_configs="--with-archive-storage-engine" engine_configs="$engine_configs --with-blackhole-storage-engine" - engine_configs="$engine_configs --with-example-storage-engine" + engine_configs="$engine_configs --without-example-storage-engine" engine_configs="$engine_configs --with-federated-storage-engine" engine_configs="$engine_configs --with-partition" + base_configs="$base_configs $engine_configs" } -set_pro_package() +set_innodb_engine() { - base_configs="$base_configs $engine_configs" base_configs="$base_configs --with-innodb" +} + +set_ndb_engine() +{ + base_configs="$base_configs --with-ndbcluster" + base_configs="$base_configs --without-ndb-debug" +} + +set_pro_package() +{ base_configs="$base_configs --with-comment=\"MySQL Pro $version_text built from source\"" if test "x$with_debug_flag" = "xyes" ; then base_configs="$base_configs --with-server-suffix=\"-debug\"" @@ -1081,10 +1159,6 @@ set_cge_extended_package() if test "x$gpl" = "xno" ; then echo "Cannot build Extended Carrier Grade Edition as Commercial version" fi - base_configs="$base_configs --with-ndbcluster" - base_configs="$base_configs --without-ndb-debug" - base_configs="$base_configs $engine_configs" - base_configs="$base_configs --with-innodb" base_configs="$base_configs --with-comment=\"MySQL Cluster Carrier Grade Extended Edition $version_text built from source\"" if test "x$with_debug_flag" = "xyes" ; then base_configs="$base_configs --with-server-suffix=\"-cge-extended-debug\"" @@ -1095,9 +1169,6 @@ set_cge_extended_package() set_cge_package() { - base_configs="$base_configs --with-ndbcluster" - base_configs="$base_configs --without-ndb-debug" - base_configs="$base_configs $engine_configs" base_configs="$base_configs --with-comment=\"MySQL Cluster Carrier Grade Edition $version_text built from source\"" if test "x$with_debug_flag" = "xyes" ; then base_configs="$base_configs --with-server-suffix=\"-cge-debug\"" @@ -1139,6 +1210,36 @@ set_gcc_special_options() fi } +set_cc_and_cxx_for_gcc() +{ + if test "x$CC" = "x" ; then + CC="gcc -static-libgcc" + fi + if test "x$CXX" = "x" ; then + CXX="gcc -static-libgcc" + fi +} + +set_cc_and_cxx_for_icc() +{ + if test "x$CC" = "x" ; then + CC="icc -static-intel -static-libgcc" + fi + if test "x$CXX" = "x" ; then + CXX="icpc -static-intel -static-libgcc" + fi +} + +set_cc_and_cxx_for_forte() +{ + if test "x$CC" = "x" ; then + CC="cc" + fi + if test "x$CXX" = "x" ; then + CXX="CC" + fi +} + # # If we discover a Core 2 Duo architecture and we have enabled the fast # flag, we enable a compile especially optimised for Core 2 Duo. This @@ -1166,8 +1267,12 @@ set_bsd_configs() exit 1 fi base_configs="$base_configs --enable-assembler" - CC="gcc" - CXX="gcc" + if test "x$fast_flag" != "xno" ; then + compiler_flags="$compiler_flags -O3" + else + compiler_flags="$compiler_flags -O" + fi + set_cc_and_cxx_for_gcc } # @@ -1177,24 +1282,31 @@ set_linux_configs() { if test "x$cpu_base_type" != "xx86" && \ test "x$cpu_base_type" != "xitanium" ; then - usage "Only x86 and Itanium CPUs supported for 32-bit Linux" + usage "Only x86 and Itanium CPUs supported for Linux" exit 1 fi - base_configs="$base_configs --with-fast-mutexes" + if test "x$use_tcmalloc" = "xyes" ; then + base_configs="$base_configs --with-mysqld-libs=-ltcmalloc_minimal" + fi if test "x$cpu_base_type" = "xx86" ; then base_configs="$base_configs --enable-assembler" fi if test "x$compiler" = "xgcc" ; then - CC="gcc" - CXX="gcc" + set_cc_and_cxx_for_gcc if test "x$m64" = "xyes" ; then compiler_flags="$compiler_flags -m64" + else + compiler_flags="$compiler_flags -m32" + fi + if test "x$fast_flag" != "xno" ; then + compiler_flags="$compiler_flags -O2" + else + compiler_flags="$compiler_flags -O" fi # configure will set proper compiler flags for gcc on Linux elif test "x$compiler" = "xicc" ; then compiler_flags="$compiler_flags -mp -restrict" - CC="icc -static-intel" - CXX="icpc -static-intel" + set_cc_and_cxx_for_icc if test "x$cpu_base_type" = "xitanium" ; then compiler_flags="$compiler_flags -no-ftz" fi @@ -1215,53 +1327,99 @@ set_linux_configs() # set_solaris_configs() { +# Use mtmalloc as malloc, see Tim Cook blog base_configs="$base_configs --with-mysqld-libs=-lmtmalloc" + base_configs="$base_configs --with-named-curses=-lcurses" case "`uname -a`" in - *5.10*|*5.11*) + *5.8* | *5.9* | *5.10* | *5.11*) + ;; *) - die "Only versions 10 and 11 supported for Solaris" + usage "Only versions 8,9, 10 and 11 supported for Solaris" + exit 1 esac if test "x$cpu_base_type" != "xx86" && \ test "x$cpu_base_type" != "xsparc" ; then usage "Only x86 and Sparc CPUs supported for Solaris" exit 1 fi + if test "x$compiler" != "xgcc" && \ + test "x$compiler" != "xforte" ; then + usage "Only gcc and Forte compilers supported for Solaris" + exit 1 + fi + if test "x$m64" = "xyes" ; then + compiler_flags="$compiler_flags -m64" + LDFLAGS="-m64" + ASFLAGS="$ASFLAGS -m64" + else + compiler_flags="$compiler_flags -m32" + LDFLAGS="-m32" + ASFLAGS="$ASFLAGS -m32" + fi if test "x$compiler" = "xgcc" ; then - CC="gcc" - CXX="gcc" + set_cc_and_cxx_for_gcc if test "x$cpu_base_type" != "xx86" ; then - usage "Only gcc supported for Solaris 10/11 on SPARC" + usage "gcc currently not supported for Solaris on SPARC" + exit 1 fi - compiler_flags="$compiler_flags -m64 -DMY_ATOMIC_MODE_RWLOCKS" - LDFLAGS="-m64 -static-libgcc" - if test "x$fast_flag" != "xno" ; then - LDFLAGS="$LDFLAGS -O2" - compiler_flags="$compiler_flags -O2" + if test "x$fast_flag" = "xyes" ; then + LDFLAGS="$LDFLAGS -O3" + compiler_flags="$compiler_flags -O3" else - LDFLAGS="$LDFLAGS -O" - compiler_flags="$compiler_flags -O" - fi - elif test "x$compiler" = "xforte" ; then - if test "x$cpu_base_type" = "xx86" ; then - usage "Only gcc supported for Solaris/x86" - fi - if test "x$cpu_base_type" != "xsparc" ; then - usage "Forte compiler supported for Solaris 9/SPARC only" + if test "x$fast_flag" = "xgeneric" ; then + LDFLAGS="$LDFLAGS -O2" + compiler_flags="$compiler_flags -O2" + else + LDFLAGS="$LDFLAGS -O" + compiler_flags="$compiler_flags -O" + fi fi - CC="cc-5.0" - CXX=CC - ASFLAGS="xarch=v9" - LDFLAGS="xarch=v9" - base_cflags="$base_cflags -Xa -xstrconst -xc99=none" + else +#Using Forte compiler (SunStudio) + set_cc_and_cxx_for_forte + base_cflags="$base_cflags -xstrconst" + compiler_flags="$compiler_flags -mt" + LD_FLAGS="$LD_FLAGS -mt" + compiler_flags="$compiler_flags -fsimple=1" + compiler_flags="$compiler_flags -ftrap=%none" + compiler_flags="$compiler_flags -xbuiltin=%all" + compiler_flags="$compiler_flags -xlibmil" + compiler_flags="$compiler_flags -xlibmopt" base_cxxflags="$base_cxxflags -noex" - compiler_flags="$compiler_flags -mt -D_FORTEC -xarch=v9" - if test "x$fast_flag" != "xno" ; then - compiler_flags="$compiler_flags -xO3" + if test "x$fast_flag" = "xyes" ; then + compiler_flags="$compiler_flags -xtarget=native" + compiler_flags="$compiler_flags -xipo" + compiler_flags="$compiler_flags -xunroll=3" + LD_FLAGS="$LD_FLAGS -xipo" + else + compiler_flags="$compiler_flags -xtarget=generic" + fi + if test "x$cpu_base_type" = "xx86" ; then + compiler_flags="$compiler_flags -nofstore" + if test "x$fast_flag" = "xyes" ; then + compiler_flags="$compiler_flags -xregs=frameptr" + compiler_flags="$compiler_flags -xO4" + elif test "x$fast_flag" = "xgeneric" ; then + compiler_flags="$compiler_flags -xO2" + else + compiler_flags="$compiler_flags -xO" + fi + else +#Using SPARC cpu with SunStudio (Forte) compiler + ASFLAGS="$ASFLAGS xarch=sparc" + LDFLAGS="$LDFLAGS xarch=sparc" + compiler_flags="$compiler_flags -xarch=sparc" + if test "x$fast_flag" = "xyes" ; then + compiler_flags="$compiler_flags -xbinopt=prepare" + LDFLAGS="$LDFLAGS -xbinopt=prepare" + compiler_flags="$compiler_flags -xO4" + elif test "x$fast_flag" = "xgeneric" ; then + compiler_flags="$compiler_flags -xO2" + else + compiler_flags="$compiler_flags -xO" + fi fi - else - usage "Only gcc and Forte compilers supported for Solaris" - exit 1 fi } @@ -1270,10 +1428,7 @@ set_solaris_configs() # set_macosx_configs() { - base_cxxflags="$base_cxxflags -fno-common" - if test "x$cpu_base_type" = "xx86" && test "x$compiler" = "xgcc" ; then - compiler_flags="$compiler_flags -arch i386" - else + if test "x$cpu_base_type" != "xx86" || test "x$compiler" != "xgcc" ; then usage "Only gcc/x86 supported for Mac OS X" exit 1 fi @@ -1281,14 +1436,21 @@ set_macosx_configs() # Optimize for space as long as it doesn't affect performance, use some # optimisations also when not in fast mode. # + base_cxxflags="$base_cxxflags -felide-constructors" + base_cxxflags="$base_cxxflags -fno-common" + if test "x$m64" = "xyes" ; then + compiler_flags="$compiler_flags -m64" + compiler_flags="$compiler_flags -arch x86_64" + else + compiler_flags="$compiler_flags -m32" + compiler_flags="$compiler_flags -arch i386" + fi if test "x$fast_flag" != "xno" ; then compiler_flags="$compiler_flags -Os" - base_cxxflags="$base_cxxflags -felide-constructors" else compiler_flags="$compiler_flags -O" fi - CC="gcc" - CXX="gcc" + set_cc_and_cxx_for_gcc } # @@ -1397,11 +1559,14 @@ base_cxxflags= base_configs= debug_flags= cxxflags= -m32= m64= datadir= commands= use_autotools= +engine_configs= +ASFLAGS= +LDFLAGS= +use_tcmalloc= set_defaults_based_on_environment @@ -1418,7 +1583,14 @@ set -e # This call sets the cpu_arg and check_cpu_args parameters # path=`dirname $0` +if test "x$compiler" = "xgcc" ; then + compiler= +fi . "$path/check-cpu" +if test "x$compiler" = "x" ; then + compiler="gcc" +fi +check_os set_cpu_base if test "x$?" = "x1" ; then exit 1 @@ -1446,17 +1618,23 @@ set_icc_special_options # including all storage engines except InnoDB, and to use GPL libraries. # set_base_configs -set_base_engines if test "x$gpl" = "xyes" ; then version_text="GPL version" else version_text="Commercial version" fi if test "x$package" = "xpro" ; then + set_base_engines + set_innodb_engine set_pro_package elif test "x$package" = "xextended" ; then + set_base_engines + set_ndb_engine + set_innodb_engine set_cge_extended_package elif test "x$package" = "xcge" ; then + set_base_engines + set_ndb_engine set_cge_package elif test "x$package" = "xclassic" ; then set_classic_package @@ -1472,7 +1650,6 @@ set_error_inject_configs # operating systems, and processors. # -check_os if test "x$os" = "xlinux" ; then set_linux_configs elif test "x$os" = "xSolaris" ; then @@ -1490,7 +1667,7 @@ fi # proper libtoolize versions, and to determine whether to use ccache. # set_make_version -set_up_ccache +set_ccache_usage # # Set up commands variable from variables prepared for base diff --git a/BUILD/check-cpu b/BUILD/check-cpu index 33bf857b845..27e0acf69a0 100755 --- a/BUILD/check-cpu +++ b/BUILD/check-cpu @@ -16,6 +16,9 @@ check_cpu () { # on Linux (and others?) we can get detailed CPU information out of /proc cpuinfo="cat $CPUINFO" + # detect CPU architecture + cpu_arch=`$cpuinfo | grep 'arch' | cut -d ':' -f 2 | cut -d ' ' -f 2 | head -1` + # detect CPU family cpu_family=`$cpuinfo | grep 'family' | cut -d ':' -f 2 | cut -d ' ' -f 2 | head -1` if test -z "$cpu_family" ; then @@ -51,8 +54,8 @@ check_cpu () { model_name=`machine` ;; *) - cpu_family=`uname -m`; - model_name=`uname -p`; + cpu_family=`uname -p`; + model_name=`uname -m`; ;; esac fi @@ -60,7 +63,7 @@ check_cpu () { # detect CPU shortname as used by gcc options # this list is not complete, feel free to add further entries cpu_arg="" - case "$cpu_family--$model_name" in + case "$cpu_family--$model_name--$spu_arch" in # DEC Alpha Alpha*EV6*) cpu_arg="ev6"; @@ -137,8 +140,11 @@ check_cpu () { *Itanium*) cpu_arg="itanium" ;; + *IA-64*) + cpu_arg="itanium" + ;; # Solaris Sparc - *sparc*sun4u*) + *sparc*sun4[uv]*) cpu_arg="sparc" ;; # Power PC @@ -175,67 +181,69 @@ check_cpu () { cc=$CC fi - cc_ver=`$cc --version | sed 1q` - cc_verno=`echo $cc_ver | sed -e 's/^.*(GCC)//g; s/[^0-9. ]//g; s/^ *//g; s/ .*//g'` - set -- `echo $cc_verno | tr '.' ' '` - cc_major=$1 - cc_minor=$2 - cc_patch=$3 - cc_comp=`expr $cc_major '*' 100 '+' $cc_minor` + if test "x$compiler" = "x" ; then + cc_ver=`$cc --version | sed 1q` + cc_verno=`echo $cc_ver | sed -e 's/^.*(GCC)//g; s/[^0-9. ]//g; s/^ *//g; s/ .*//g'` + set -- `echo $cc_verno | tr '.' ' '` + cc_major=$1 + cc_minor=$2 + cc_patch=$3 + cc_comp=`expr $cc_major '*' 100 '+' $cc_minor` - case "$cc_ver--$cc_verno" in - *GCC*) - # different gcc backends (and versions) have different CPU flags - case `gcc -dumpmachine` in - i?86-* | x86_64-*) - if test "$cc_comp" -lt 304 ; then - check_cpu_cflags="-mcpu=${cpu_arg}" - elif test "$cc_comp" -ge 402 ; then - check_cpu_cflags="-mtune=native" - else - check_cpu_cflags="-mtune=${cpu_arg}" - fi - ;; - ppc-*) - check_cpu_cflags="-mcpu=${cpu_arg} -mtune=${cpu_arg}" - ;; - *) - check_cpu_cflags="" - return - ;; - esac - ;; - 2.95.*) - # GCC 2.95 doesn't expose its name in --version output - check_cpu_cflags="-m${cpu_arg}" - ;; - *) - check_cpu_cflags="" - return - ;; - esac + case "$cc_ver--$cc_verno" in + *GCC*) + # different gcc backends (and versions) have different CPU flags + case `gcc -dumpmachine` in + i?86-* | x86_64-*) + if test "$cc_comp" -lt 304 ; then + check_cpu_cflags="-mcpu=${cpu_arg}" + elif test "$cc_comp" -ge 402 ; then + check_cpu_cflags="-mtune=native" + else + check_cpu_cflags="-mtune=${cpu_arg}" + fi + ;; + ppc-*) + check_cpu_cflags="-mcpu=${cpu_arg} -mtune=${cpu_arg}" + ;; + *) + check_cpu_cflags="" + return + ;; + esac + ;; + 2.95.*) + # GCC 2.95 doesn't expose its name in --version output + check_cpu_cflags="-m${cpu_arg}" + ;; + *) + check_cpu_cflags="" + return + ;; + esac + # now we check whether the compiler really understands the cpu type + touch __test.c - # now we check whether the compiler really understands the cpu type - touch __test.c + while [ "$cpu_arg" ] ; do + printf "testing $cpu_arg ... " >&2 + + # compile check + eval "$cc -c $check_cpu_cflags __test.c" 2>/dev/null + if test "x$?" = "x0" ; then + echo ok >&2 + break; + fi + echo failed >&2 + check_cpu_cflags="" + break; + done + rm __test.* + fi if test "x$core2" = "xyes" ; then cpu_arg="core2" fi - while [ "$cpu_arg" ] ; do - printf "testing $cpu_arg ... " >&2 - - # compile check - eval "$cc -c $check_cpu_cflags __test.c" 2>/dev/null - if test "x$?" = "x0" ; then - echo ok >&2 - break; - fi - - echo failed >&2 - check_cpu_cflags="" - break; - done - rm __test.* + return 0 } check_cpu diff --git a/CMakeLists.txt b/CMakeLists.txt index 0657013089b..1bb50cbade3 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -252,7 +252,6 @@ IF(WITH_INNOBASE_STORAGE_ENGINE) ADD_SUBDIRECTORY(storage/innobase) ENDIF(WITH_INNOBASE_STORAGE_ENGINE) ADD_SUBDIRECTORY(sql) -ADD_SUBDIRECTORY(server-tools/instance-manager) ADD_SUBDIRECTORY(libmysql) ADD_SUBDIRECTORY(tests) IF(WITH_EMBEDDED_SERVER) diff --git a/Makefile.am b/Makefile.am index adab4dc41f4..0435489456b 100644 --- a/Makefile.am +++ b/Makefile.am @@ -27,7 +27,7 @@ SUBDIRS = . include @docs_dirs@ @zlib_dir@ \ @sql_union_dirs@ unittest storage plugin \ @sql_server@ @man_dirs@ tests \ netware @libmysqld_dirs@ \ - mysql-test support-files sql-bench @tools_dirs@ \ + mysql-test support-files sql-bench \ win DIST_SUBDIRS = . include Docs zlib \ @@ -36,7 +36,7 @@ DIST_SUBDIRS = . include Docs zlib \ strings mysys dbug extra regex libmysql libmysql_r client unittest storage plugin \ vio sql man tests \ netware libmysqld \ - mysql-test support-files sql-bench server-tools \ + mysql-test support-files sql-bench \ win \ BUILD DISTCLEANFILES = ac_available_languages_fragment diff --git a/config/ac-macros/dtrace.m4 b/config/ac-macros/dtrace.m4 new file mode 100644 index 00000000000..e2c8de61a19 --- /dev/null +++ b/config/ac-macros/dtrace.m4 @@ -0,0 +1,38 @@ +dnl --------------------------------------------------------------------------- +dnl Macro: DTRACE_TEST +dnl --------------------------------------------------------------------------- +AC_ARG_ENABLE(dtrace, + AC_HELP_STRING([--enable-dtrace],[Build with support for the DTRACE.]), + [ + ENABLE_DTRACE="$enable_dtrace" + ], + [ + ENABLE_DTRACE="yes" + ] +) +DTRACEFLAGS="" +HAVE_DTRACE="" +HAVE_DTRACE_DASH_G="" +if test "$ENABLE_DTRACE" = "yes"; then + AC_CHECK_PROGS(DTRACE, dtrace, [not found]) + if test "$DTRACE" = "not found"; then + ENABLE_DTRACE="no" + else + AC_DEFINE([HAVE_DTRACE], [1], [Defined to 1 if DTrace support is enabled]) + case "$target_os" in + *solaris*) + HAVE_DTRACE_DASH_G="yes" + ;; + *) + HAVE_DTRACE_DASH_G="no" + ;; + esac + fi +fi +AC_SUBST(DTRACEFLAGS) +AC_SUBST(HAVE_DTRACE) +AM_CONDITIONAL([HAVE_DTRACE], [ test "$ENABLE_DTRACE" = "yes" ]) +AM_CONDITIONAL([HAVE_DTRACE_DASH_G], [ test "$HAVE_DTRACE_DASH_G" = "yes" ]) +dnl --------------------------------------------------------------------------- +dnl End Macro: DTRACE_TEST +dnl --------------------------------------------------------------------------- diff --git a/configure.in b/configure.in index bdeb0f24c40..2a56c6554cb 100644 --- a/configure.in +++ b/configure.in @@ -10,7 +10,7 @@ AC_CANONICAL_SYSTEM # # When changing major version number please also check switch statement # in mysqlbinlog::check_master_version(). -AM_INIT_AUTOMAKE(mysql, 5.1.33) +AM_INIT_AUTOMAKE(mysql, 5.1.33-pv-0.2.1) AM_CONFIG_HEADER([include/config.h:config.h.in]) PROTOCOL_VERSION=10 @@ -50,6 +50,7 @@ sinclude(config/ac-macros/check_cpu.m4) sinclude(config/ac-macros/character_sets.m4) sinclude(config/ac-macros/compiler_flag.m4) sinclude(config/ac-macros/plugins.m4) +sinclude(config/ac-macros/dtrace.m4) sinclude(config/ac-macros/ha_ndbcluster.m4) sinclude(config/ac-macros/large_file.m4) sinclude(config/ac-macros/misc.m4) @@ -861,6 +862,8 @@ then AC_CHECK_DECLS(SHM_HUGETLB, AC_DEFINE([HAVE_LARGE_PAGES], [1], [Define if you have large pages support]) + AC_DEFINE([HAVE_LARGE_PAGE_OPTION], [1], + [Define if you have large page option]) AC_DEFINE([HUGETLB_USE_PROC_MEMINFO], [1], [Define if /proc/meminfo shows the huge page size (Linux only)]) , , @@ -868,6 +871,20 @@ then #include <sys/shm.h> ] ) +else +# For large pages support on Solaris +AC_CHECK_DECLS(MHA_MAPSIZE_VA, + AC_DEFINE([HAVE_SOLARIS_LARGE_PAGES], [1], + [Define to 1 if you have large pages support]) + AC_DEFINE([HAVE_LARGE_PAGE_OPTION], [1], + [Define if you have large page option]) + , , + [ +#include <sys/mman.h> + ] +) + + fi #-------------------------------------------------------------------- @@ -1762,6 +1779,32 @@ if test "x$mysql_cv_gcc_atomic_builtins" = xyes; then [Define to 1 if compiler provides atomic builtins.]) fi +AC_CACHE_CHECK([whether the OS provides atomic_* functions like Solaris], + [mysql_cv_solaris_atomic], [AC_TRY_RUN([ +#include <atomic.h> +int +main() +{ + int foo = -10; int bar = 10; + if (atomic_add_int_nv((uint_t *)&foo, bar) || foo) + return -1; + bar = atomic_swap_uint((uint_t *)&foo, (uint_t)bar); + if (bar || foo != 10) + return -1; + bar = atomic_cas_uint((uint_t *)&bar, (uint_t)foo, 15); + if (bar) + return -1; + return 0; +} +], [mysql_cv_solaris_atomic=yes], + [mysql_cv_solaris_atomic=no], + [mysql_cv_solaris_atomic=no])]) + +if test "x$mysql_cv_solaris_atomic" = xyes; then + AC_DEFINE(HAVE_SOLARIS_ATOMIC, 1, + [Define to 1 if OS provides atomic_* functions like Solaris.]) +fi + # Force static compilation to avoid linking problems/get more speed AC_ARG_WITH(mysqld-ldflags, [ --with-mysqld-ldflags Extra linking arguments for mysqld], @@ -2321,6 +2364,42 @@ fi fi #---END: +#Check for x86 PAUSE instruction +AC_MSG_CHECKING("for x86 PAUSE instruction") +# We have to actually try running the test program, because of a bug +# in Solaris on x86_64, where it wrongly reports that PAUSE is not +# supported when trying to run an application. See +# http://bugs.opensolaris.org/bugdatabase/printableBug.do?bug_id=6478684 +AC_TRY_RUN([ + int main() { + __asm__ __volatile__ ("pause"); + return 0; + } + ], + [x86_pause_exists=yes], + [x86_pause_exists=no], + [x86_pause_exists=no] # Cross-compile, assume no PAUSE instruction +) +AC_TRY_RUN([ + int main() { + __asm__ __volatile__ ("rep; nop"); + return 0; + } + ], + [x86_fake_pause_exists=yes], + [x86_fake_pause_exists=no], + [x86_fake_pause_exists=no] # Cross-compile, assume no x86 NOP instruction +) +if test "$x86_pause_exists" = "yes" +then + AC_DEFINE([HAVE_PAUSE_INSTRUCTION], [1], [Does x86 PAUSE instruction exist]) +else + if test "$x86_fake_pause_exists" = "yes" + then + AC_DEFINE([HAVE_FAKE_PAUSE_INSTRUCTION], [1], [Does x86 NOP instruction exist]) + fi +fi + # Check if pthread_attr_setscope() exists AC_CACHE_CHECK("for pthread_attr_setscope", mysql_cv_pthread_attr_setscope, AC_TRY_LINK( @@ -2462,19 +2541,6 @@ then [Access checks in embedded library]) fi -tools_dirs="" - -AC_ARG_WITH([mysqlmanager], - AC_HELP_STRING([--with-mysqlmanager], [Build the mysqlmanager binary: yes/no (default: build if server is built.)]),,) - -if test "$with_mysqlmanager" = "yes" -o \ - '(' "$with_mysqlmanager:$with_server" = ":yes" -a \ - -d "$srcdir/server-tools" ')' ; then - tools_dirs="server-tools" -fi - -AC_SUBST(tools_dirs) - #MYSQL_CHECK_CPU libmysqld_dirs= @@ -2804,7 +2870,6 @@ AC_CONFIG_FILES(Makefile extra/Makefile mysys/Makefile dnl dbug/Makefile scripts/Makefile include/Makefile dnl tests/Makefile Docs/Makefile support-files/Makefile dnl support-files/MacOSX/Makefile support-files/RHEL4-SElinux/Makefile dnl - server-tools/Makefile server-tools/instance-manager/Makefile dnl cmd-line-utils/Makefile cmd-line-utils/libedit/Makefile dnl libmysqld/Makefile libmysqld/examples/Makefile dnl mysql-test/Makefile mysql-test/lib/My/SafeProcess/Makefile dnl diff --git a/include/Makefile.am b/include/Makefile.am index 64f73af8606..9326901bd6e 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -39,7 +39,9 @@ noinst_HEADERS = config-win.h config-netware.h my_bit.h \ my_handler.h my_time.h \ my_vle.h my_user.h my_atomic.h atomic/nolock.h \ atomic/rwlock.h atomic/x86-gcc.h atomic/x86-msvc.h \ - atomic/gcc_builtins.h my_libwrap.h my_stacktrace.h + atomic/solaris.h \ + atomic/gcc_builtins.h my_libwrap.h my_stacktrace.h \ + probes_mysql.h probes_mysql_nodtrace.h EXTRA_DIST = mysql.h.pp mysql/plugin.h.pp @@ -67,5 +69,17 @@ my_config.h: config.h dist-hook: $(RM) -f $(distdir)/mysql_version.h $(distdir)/my_config.h +if HAVE_DTRACE +BUILT_SOURCES += probes_mysql_dtrace.h +CLEANFILES += probes_mysql_dtrace.h +DTRACEPROVIDER = $(abs_top_srcdir)/sql/probes_mysql.d + +probes_mysql_dtrace.h: $(DTRACEPROVIDER) + $(DTRACE) $(DTRACEFLAGS) -h -s $< -o $@ +endif + +probes_mysql_nodtrace.h: $(DTRACEPROVIDER) + $(abs_top_srcdir)/scripts/dheadgen.pl -f $^ > $@ + # Don't update the files from bitkeeper %::SCCS/s.% diff --git a/include/atomic/nolock.h b/include/atomic/nolock.h index 10ac17884b6..0e6c8089b84 100644 --- a/include/atomic/nolock.h +++ b/include/atomic/nolock.h @@ -28,11 +28,28 @@ #elif defined(_MSC_VER) #include "x86-msvc.h" #endif -#endif -#ifdef make_atomic_cas_body +#elif defined(HAVE_SOLARIS_ATOMIC) + +#include "solaris.h" + +#endif /* __i386__ || _M_IX86 || HAVE_GCC_ATOMIC_BUILTINS */ + +#if defined(make_atomic_cas_body) || defined(MY_ATOMICS_MADE) +/* + * We have atomics that require no locking + */ +#define MY_ATOMIC_NOLOCK +#ifdef __SUNPRO_C +/* + * Sun Studio 12 (and likely earlier) does not accept a typedef struct {} + */ +typedef char my_atomic_rwlock_t; +#else typedef struct { } my_atomic_rwlock_t; +#endif + #define my_atomic_rwlock_destroy(name) #define my_atomic_rwlock_init(name) #define my_atomic_rwlock_rdlock(name) diff --git a/include/atomic/solaris.h b/include/atomic/solaris.h new file mode 100644 index 00000000000..4c51253d2d5 --- /dev/null +++ b/include/atomic/solaris.h @@ -0,0 +1,210 @@ +/* Copyright (C) 2008 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include <atomic.h> + +#define MY_ATOMIC_MODE "solaris-atomic" + +/* + * This is defined to indicate we fully define the my_atomic_* (inline) + * functions here, so there is no need to "make" them in my_atomic.h + * using make_atomic_* and make_atomic_*_body. + */ +#define MY_ATOMICS_MADE + +STATIC_INLINE int +my_atomic_cas8(int8 volatile *a, int8 *cmp, int8 set) +{ + int ret; + int8 sav; + sav = (int8) atomic_cas_8((volatile uint8_t *)a, (uint8_t)*cmp, + (uint8_t)set); + if (! (ret = (sav == *cmp))) + *cmp = sav; + return ret; +} + +STATIC_INLINE int +my_atomic_cas16(int16 volatile *a, int16 *cmp, int16 set) +{ + int ret; + int16 sav; + sav = (int16) atomic_cas_16((volatile uint16_t *)a, (uint16_t)*cmp, + (uint16_t)set); + if (! (ret = (sav == *cmp))) + *cmp = sav; + return ret; +} + +STATIC_INLINE int +my_atomic_cas32(int32 volatile *a, int32 *cmp, int32 set) +{ + int ret; + int32 sav; + sav = (int32) atomic_cas_32((volatile uint32_t *)a, (uint32_t)*cmp, + (uint32_t)set); + if (! (ret = (sav == *cmp))) + *cmp = sav; + return ret; +} + +STATIC_INLINE int +my_atomic_casptr(void * volatile *a, void **cmp, void *set) +{ + int ret; + void *sav; + sav = atomic_cas_ptr(a, *cmp, set); + if (! (ret = (sav == *cmp))) + *cmp = sav; + return ret; +} + +/* ------------------------------------------------------------------------ */ + +STATIC_INLINE int8 +my_atomic_add8(int8 volatile *a, int8 v) +{ + int8 nv; + nv = atomic_add_8_nv((volatile uint8_t *)a, v); + return (nv - v); +} + +STATIC_INLINE int16 +my_atomic_add16(int16 volatile *a, int16 v) +{ + int16 nv; + nv = atomic_add_16_nv((volatile uint16_t *)a, v); + return (nv - v); +} + +STATIC_INLINE int32 +my_atomic_add32(int32 volatile *a, int32 v) +{ + int32 nv; + nv = atomic_add_32_nv((volatile uint32_t *)a, v); + return (nv - v); +} + +/* ------------------------------------------------------------------------ */ + +#ifdef MY_ATOMIC_MODE_DUMMY + +STATIC_INLINE int8 +my_atomic_load8(int8 volatile *a) { return (*a); } + +STATIC_INLINE int16 +my_atomic_load16(int16 volatile *a) { return (*a); } + +STATIC_INLINE int32 +my_atomic_load32(int32 volatile *a) { return (*a); } + +STATIC_INLINE void * +my_atomic_loadptr(void * volatile *a) { return (*a); } + +/* ------------------------------------------------------------------------ */ + +STATIC_INLINE void +my_atomic_store8(int8 volatile *a, int8 v) { *a = v; } + +STATIC_INLINE void +my_atomic_store16(int16 volatile *a, int16 v) { *a = v; } + +STATIC_INLINE void +my_atomic_store32(int32 volatile *a, int32 v) { *a = v; } + +STATIC_INLINE void +my_atomic_storeptr(void * volatile *a, void *v) { *a = v; } + +/* ------------------------------------------------------------------------ */ + +#else /* MY_ATOMIC_MODE_DUMMY */ + +STATIC_INLINE int8 +my_atomic_load8(int8 volatile *a) +{ + return ((int8) atomic_or_8_nv((volatile uint8_t *)a, 0)); +} + +STATIC_INLINE int16 +my_atomic_load16(int16 volatile *a) +{ + return ((int16) atomic_or_16_nv((volatile uint16_t *)a, 0)); +} + +STATIC_INLINE int32 +my_atomic_load32(int32 volatile *a) +{ + return ((int32) atomic_or_32_nv((volatile uint32_t *)a, 0)); +} + +STATIC_INLINE void * +my_atomic_loadptr(void * volatile *a) +{ + return ((void *) atomic_or_ulong_nv((volatile ulong_t *)a, 0)); +} + +/* ------------------------------------------------------------------------ */ + +STATIC_INLINE void +my_atomic_store8(int8 volatile *a, int8 v) +{ + (void) atomic_swap_8((volatile uint8_t *)a, (uint8_t)v); +} + +STATIC_INLINE void +my_atomic_store16(int16 volatile *a, int16 v) +{ + (void) atomic_swap_16((volatile uint16_t *)a, (uint16_t)v); +} + +STATIC_INLINE void +my_atomic_store32(int32 volatile *a, int32 v) +{ + (void) atomic_swap_32((volatile uint32_t *)a, (uint32_t)v); +} + +STATIC_INLINE void +my_atomic_storeptr(void * volatile *a, void *v) +{ + (void) atomic_swap_ptr(a, v); +} + +#endif + +/* ------------------------------------------------------------------------ */ + +STATIC_INLINE int8 +my_atomic_swap8(int8 volatile *a, int8 v) +{ + return ((int8) atomic_swap_8((volatile uint8_t *)a, (uint8_t)v)); +} + +STATIC_INLINE int16 +my_atomic_swap16(int16 volatile *a, int16 v) +{ + return ((int16) atomic_swap_16((volatile uint16_t *)a, (uint16_t)v)); +} + +STATIC_INLINE int32 +my_atomic_swap32(int32 volatile *a, int32 v) +{ + return ((int32) atomic_swap_32((volatile uint32_t *)a, (uint32_t)v)); +} + +STATIC_INLINE void * +my_atomic_swapptr(void * volatile *a, void *v) +{ + return (atomic_swap_ptr(a, v)); +} diff --git a/include/my_atomic.h b/include/my_atomic.h index ed439e5fe87..03f7d081981 100644 --- a/include/my_atomic.h +++ b/include/my_atomic.h @@ -18,13 +18,21 @@ #define intptr void * #ifndef MY_ATOMIC_MODE_RWLOCKS +/* + * Attempt to do atomic ops without locks + */ #include "atomic/nolock.h" #endif -#ifndef make_atomic_cas_body +#ifndef MY_ATOMIC_NOLOCK +/* + * Have to use rw-locks for atomic ops + */ #include "atomic/rwlock.h" #endif +#ifndef MY_ATOMICS_MADE + #ifndef make_atomic_add_body #define make_atomic_add_body(S) \ int ## S tmp=*a; \ @@ -91,7 +99,7 @@ extern int ## S my_atomic_load ## S(int ## S volatile *a); #define make_atomic_store(S) \ extern void my_atomic_store ## S(int ## S volatile *a, int ## S v); -#endif +#endif /* HAVE_INLINE */ make_atomic_cas( 8) make_atomic_cas(16) @@ -129,6 +137,8 @@ make_atomic_swap(ptr) #undef make_atomic_swap_body #undef intptr +#endif /* MY_ATOMICS_MADE */ + #ifdef _atomic_h_cleanup_ #include _atomic_h_cleanup_ #undef _atomic_h_cleanup_ diff --git a/include/probes_mysql.h b/include/probes_mysql.h new file mode 100644 index 00000000000..13491c661fd --- /dev/null +++ b/include/probes_mysql.h @@ -0,0 +1,13 @@ +#ifndef PROBES_MYSQL_H + +#define PROBES_MYSQL_H + +#include <my_global.h> + +#if defined(HAVE_DTRACE) && !defined(DISABLE_DTRACE) +#include "probes_mysql_dtrace.h" +#else +#include "probes_mysql_nodtrace.h" +#endif + +#endif /* PROBES_MYSQL_H */ diff --git a/include/probes_mysql_nodtrace.h b/include/probes_mysql_nodtrace.h new file mode 100644 index 00000000000..bc3b65a00e5 --- /dev/null +++ b/include/probes_mysql_nodtrace.h @@ -0,0 +1,129 @@ +/* + * Generated by dheadgen(1). + */ + +#ifndef _PROBES_MYSQL_D +#define _PROBES_MYSQL_D + +#ifdef __cplusplus +extern "C" { +#endif + +#define MYSQL_CONNECTION_START(arg0, arg1, arg2) +#define MYSQL_CONNECTION_START_ENABLED() (0) +#define MYSQL_CONNECTION_DONE(arg0, arg1) +#define MYSQL_CONNECTION_DONE_ENABLED() (0) +#define MYSQL_COMMAND_START(arg0, arg1, arg2, arg3) +#define MYSQL_COMMAND_START_ENABLED() (0) +#define MYSQL_COMMAND_DONE(arg0) +#define MYSQL_COMMAND_DONE_ENABLED() (0) +#define MYSQL_QUERY_START(arg0, arg1, arg2, arg3, arg4) +#define MYSQL_QUERY_START_ENABLED() (0) +#define MYSQL_QUERY_DONE(arg0) +#define MYSQL_QUERY_DONE_ENABLED() (0) +#define MYSQL_QUERY_PARSE_START(arg0) +#define MYSQL_QUERY_PARSE_START_ENABLED() (0) +#define MYSQL_QUERY_PARSE_DONE(arg0) +#define MYSQL_QUERY_PARSE_DONE_ENABLED() (0) +#define MYSQL_QUERY_CACHE_HIT(arg0, arg1) +#define MYSQL_QUERY_CACHE_HIT_ENABLED() (0) +#define MYSQL_QUERY_CACHE_MISS(arg0) +#define MYSQL_QUERY_CACHE_MISS_ENABLED() (0) +#define MYSQL_QUERY_EXEC_START(arg0, arg1, arg2, arg3, arg4, arg5) +#define MYSQL_QUERY_EXEC_START_ENABLED() (0) +#define MYSQL_QUERY_EXEC_DONE(arg0) +#define MYSQL_QUERY_EXEC_DONE_ENABLED() (0) +#define MYSQL_INSERT_ROW_START(arg0, arg1) +#define MYSQL_INSERT_ROW_START_ENABLED() (0) +#define MYSQL_INSERT_ROW_DONE(arg0) +#define MYSQL_INSERT_ROW_DONE_ENABLED() (0) +#define MYSQL_UPDATE_ROW_START(arg0, arg1) +#define MYSQL_UPDATE_ROW_START_ENABLED() (0) +#define MYSQL_UPDATE_ROW_DONE(arg0) +#define MYSQL_UPDATE_ROW_DONE_ENABLED() (0) +#define MYSQL_DELETE_ROW_START(arg0, arg1) +#define MYSQL_DELETE_ROW_START_ENABLED() (0) +#define MYSQL_DELETE_ROW_DONE(arg0) +#define MYSQL_DELETE_ROW_DONE_ENABLED() (0) +#define MYSQL_READ_ROW_START(arg0, arg1, arg2) +#define MYSQL_READ_ROW_START_ENABLED() (0) +#define MYSQL_READ_ROW_DONE(arg0) +#define MYSQL_READ_ROW_DONE_ENABLED() (0) +#define MYSQL_INDEX_READ_ROW_START(arg0, arg1) +#define MYSQL_INDEX_READ_ROW_START_ENABLED() (0) +#define MYSQL_INDEX_READ_ROW_DONE(arg0) +#define MYSQL_INDEX_READ_ROW_DONE_ENABLED() (0) +#define MYSQL_HANDLER_RDLOCK_START(arg0, arg1) +#define MYSQL_HANDLER_RDLOCK_START_ENABLED() (0) +#define MYSQL_HANDLER_WRLOCK_START(arg0, arg1) +#define MYSQL_HANDLER_WRLOCK_START_ENABLED() (0) +#define MYSQL_HANDLER_UNLOCK_START(arg0, arg1) +#define MYSQL_HANDLER_UNLOCK_START_ENABLED() (0) +#define MYSQL_HANDLER_RDLOCK_DONE(arg0) +#define MYSQL_HANDLER_RDLOCK_DONE_ENABLED() (0) +#define MYSQL_HANDLER_WRLOCK_DONE(arg0) +#define MYSQL_HANDLER_WRLOCK_DONE_ENABLED() (0) +#define MYSQL_HANDLER_UNLOCK_DONE(arg0) +#define MYSQL_HANDLER_UNLOCK_DONE_ENABLED() (0) +#define MYSQL_FILESORT_START(arg0, arg1) +#define MYSQL_FILESORT_START_ENABLED() (0) +#define MYSQL_FILESORT_DONE(arg0, arg1) +#define MYSQL_FILESORT_DONE_ENABLED() (0) +#define MYSQL_SELECT_START(arg0) +#define MYSQL_SELECT_START_ENABLED() (0) +#define MYSQL_SELECT_DONE(arg0, arg1) +#define MYSQL_SELECT_DONE_ENABLED() (0) +#define MYSQL_INSERT_START(arg0) +#define MYSQL_INSERT_START_ENABLED() (0) +#define MYSQL_INSERT_DONE(arg0, arg1) +#define MYSQL_INSERT_DONE_ENABLED() (0) +#define MYSQL_INSERT_SELECT_START(arg0) +#define MYSQL_INSERT_SELECT_START_ENABLED() (0) +#define MYSQL_INSERT_SELECT_DONE(arg0, arg1) +#define MYSQL_INSERT_SELECT_DONE_ENABLED() (0) +#define MYSQL_UPDATE_START(arg0) +#define MYSQL_UPDATE_START_ENABLED() (0) +#define MYSQL_UPDATE_DONE(arg0, arg1, arg2) +#define MYSQL_UPDATE_DONE_ENABLED() (0) +#define MYSQL_MULTI_UPDATE_START(arg0) +#define MYSQL_MULTI_UPDATE_START_ENABLED() (0) +#define MYSQL_MULTI_UPDATE_DONE(arg0, arg1, arg2) +#define MYSQL_MULTI_UPDATE_DONE_ENABLED() (0) +#define MYSQL_DELETE_START(arg0) +#define MYSQL_DELETE_START_ENABLED() (0) +#define MYSQL_DELETE_DONE(arg0, arg1) +#define MYSQL_DELETE_DONE_ENABLED() (0) +#define MYSQL_MULTI_DELETE_START(arg0) +#define MYSQL_MULTI_DELETE_START_ENABLED() (0) +#define MYSQL_MULTI_DELETE_DONE(arg0, arg1) +#define MYSQL_MULTI_DELETE_DONE_ENABLED() (0) +#define MYSQL_NET_READ_START() +#define MYSQL_NET_READ_START_ENABLED() (0) +#define MYSQL_NET_READ_DONE(arg0, arg1) +#define MYSQL_NET_READ_DONE_ENABLED() (0) +#define MYSQL_NET_WRITE_START(arg0) +#define MYSQL_NET_WRITE_START_ENABLED() (0) +#define MYSQL_NET_WRITE_DONE(arg0) +#define MYSQL_NET_WRITE_DONE_ENABLED() (0) +#define MYSQL_KEYCACHE_READ_START(arg0, arg1, arg2, arg3) +#define MYSQL_KEYCACHE_READ_START_ENABLED() (0) +#define MYSQL_KEYCACHE_READ_BLOCK(arg0) +#define MYSQL_KEYCACHE_READ_BLOCK_ENABLED() (0) +#define MYSQL_KEYCACHE_READ_HIT() +#define MYSQL_KEYCACHE_READ_HIT_ENABLED() (0) +#define MYSQL_KEYCACHE_READ_MISS() +#define MYSQL_KEYCACHE_READ_MISS_ENABLED() (0) +#define MYSQL_KEYCACHE_READ_DONE(arg0, arg1) +#define MYSQL_KEYCACHE_READ_DONE_ENABLED() (0) +#define MYSQL_KEYCACHE_WRITE_START(arg0, arg1, arg2, arg3) +#define MYSQL_KEYCACHE_WRITE_START_ENABLED() (0) +#define MYSQL_KEYCACHE_WRITE_BLOCK(arg0) +#define MYSQL_KEYCACHE_WRITE_BLOCK_ENABLED() (0) +#define MYSQL_KEYCACHE_WRITE_DONE(arg0, arg1) +#define MYSQL_KEYCACHE_WRITE_DONE_ENABLED() (0) + +#ifdef __cplusplus +} +#endif + +#endif /* _PROBES_MYSQL_D */ diff --git a/libmysql/Makefile.shared b/libmysql/Makefile.shared index 43e86ea31e8..c66c1de2f22 100644 --- a/libmysql/Makefile.shared +++ b/libmysql/Makefile.shared @@ -89,7 +89,8 @@ DEFS = -DDEFAULT_CHARSET_HOME="\"$(MYSQLBASEdir)\"" \ -DDEFAULT_HOME_ENV=MYSQL_HOME \ -DDEFAULT_GROUP_SUFFIX_ENV=MYSQL_GROUP_SUFFIX \ -DDEFAULT_SYSCONFDIR="\"$(sysconfdir)\"" \ - -DSHAREDIR="\"$(MYSQLSHAREdir)\"" $(target_defs) + -DSHAREDIR="\"$(MYSQLSHAREdir)\"" -DDISABLE_DTRACE \ + $(target_defs) if HAVE_YASSL yassl_las = $(top_builddir)/extra/yassl/src/libyassl.la \ diff --git a/libmysqld/Makefile.am b/libmysqld/Makefile.am index ef2fab2da6d..1657afc47fd 100644 --- a/libmysqld/Makefile.am +++ b/libmysqld/Makefile.am @@ -28,7 +28,8 @@ DEFS = -DEMBEDDED_LIBRARY -DMYSQL_SERVER \ -DDEFAULT_MYSQL_HOME="\"$(MYSQLBASEdir)\"" \ -DDATADIR="\"$(MYSQLDATAdir)\"" \ -DSHAREDIR="\"$(MYSQLSHAREdir)\"" \ - -DPLUGINDIR="\"$(pkgplugindir)\"" + -DPLUGINDIR="\"$(pkgplugindir)\"" \ + -DDISABLE_DTRACE INCLUDES= -I$(top_builddir)/include -I$(top_srcdir)/include \ -I$(top_builddir)/sql -I$(top_srcdir)/sql \ -I$(top_srcdir)/sql/examples \ diff --git a/mysql-test/r/innodb.result b/mysql-test/r/innodb.result index 2ce9587ef1a..56c8fe2fda8 100644 --- a/mysql-test/r/innodb.result +++ b/mysql-test/r/innodb.result @@ -1738,7 +1738,7 @@ count(*) drop table t1; show status like "Innodb_buffer_pool_pages_total"; Variable_name Value -Innodb_buffer_pool_pages_total 512 +Innodb_buffer_pool_pages_total 65536 show status like "Innodb_page_size"; Variable_name Value Innodb_page_size 16384 @@ -1784,7 +1784,7 @@ innodb_sync_spin_loops 20 SET @old_innodb_thread_concurrency= @@global.innodb_thread_concurrency; show variables like "innodb_thread_concurrency"; Variable_name Value -innodb_thread_concurrency 8 +innodb_thread_concurrency 0 set global innodb_thread_concurrency=1001; Warnings: Warning 1292 Truncated incorrect thread_concurrency value: '1001' diff --git a/mysql-test/r/partition_innodb.result b/mysql-test/r/partition_innodb.result index ad4d08e89ff..af277e5ce40 100644 --- a/mysql-test/r/partition_innodb.result +++ b/mysql-test/r/partition_innodb.result @@ -11,13 +11,7 @@ SET @old_tx_isolation := @@session.tx_isolation; SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; SET autocommit = 0; UPDATE t1 SET DATA = data*2 WHERE id = 3; -SHOW ENGINE InnoDB STATUS; -Type Name Status -InnoDB 2 lock struct(s) 1 row lock(s) UPDATE t1 SET data = data*2 WHERE data = 2; -SHOW ENGINE InnoDB STATUS; -Type Name Status -InnoDB 6 lock struct(s) 2 row lock(s) SET @@session.tx_isolation = @old_tx_isolation; DROP TABLE t1; # Bug#37721, test of ORDER BY on PK and WHERE on INDEX diff --git a/mysql-test/suite/sys_vars/r/innodb_autoextend_increment_basic.result b/mysql-test/suite/sys_vars/r/innodb_autoextend_increment_basic.result index 69a8b6cc430..ea87526c42a 100644 --- a/mysql-test/suite/sys_vars/r/innodb_autoextend_increment_basic.result +++ b/mysql-test/suite/sys_vars/r/innodb_autoextend_increment_basic.result @@ -6,13 +6,13 @@ Warning 1292 Truncated incorrect autoextend_increment value: '0' SET @@global.innodb_autoextend_increment = DEFAULT; SELECT @@global.innodb_autoextend_increment ; @@global.innodb_autoextend_increment -8 +64 '#---------------------FN_DYNVARS_046_02-------------------------#' SET innodb_autoextend_increment = 1; ERROR HY000: Variable 'innodb_autoextend_increment' is a GLOBAL variable and should be set with SET GLOBAL SELECT @@innodb_autoextend_increment ; @@innodb_autoextend_increment -8 +64 SELECT local.innodb_autoextend_increment ; ERROR 42S02: Unknown table 'local' in field list SET global innodb_autoextend_increment = 0; diff --git a/mysql-test/suite/sys_vars/r/innodb_file_io_threads_basic.result b/mysql-test/suite/sys_vars/r/innodb_file_io_threads_basic.result index 4c1c3ae8d54..5ef7e1099e0 100644 --- a/mysql-test/suite/sys_vars/r/innodb_file_io_threads_basic.result +++ b/mysql-test/suite/sys_vars/r/innodb_file_io_threads_basic.result @@ -1,53 +1,101 @@ '#---------------------BS_STVARS_027_01----------------------#' -SELECT COUNT(@@GLOBAL.innodb_file_io_threads); -COUNT(@@GLOBAL.innodb_file_io_threads) +SELECT COUNT(@@GLOBAL.innodb_read_io_threads); +COUNT(@@GLOBAL.innodb_read_io_threads) +1 +1 Expected +SELECT COUNT(@@GLOBAL.innodb_write_io_threads); +COUNT(@@GLOBAL.innodb_write_io_threads) 1 1 Expected '#---------------------BS_STVARS_027_02----------------------#' -SET @@GLOBAL.innodb_file_io_threads=1; -ERROR HY000: Variable 'innodb_file_io_threads' is a read only variable +SET @@GLOBAL.innodb_read_io_threads=1; +ERROR HY000: Variable 'innodb_read_io_threads' is a read only variable +Expected error 'Read only variable' +SELECT COUNT(@@GLOBAL.innodb_read_io_threads); +COUNT(@@GLOBAL.innodb_read_io_threads) +1 +1 Expected +SET @@GLOBAL.innodb_write_io_threads=1; +ERROR HY000: Variable 'innodb_write_io_threads' is a read only variable Expected error 'Read only variable' -SELECT COUNT(@@GLOBAL.innodb_file_io_threads); -COUNT(@@GLOBAL.innodb_file_io_threads) +SELECT COUNT(@@GLOBAL.innodb_write_io_threads); +COUNT(@@GLOBAL.innodb_write_io_threads) 1 1 Expected '#---------------------BS_STVARS_027_03----------------------#' -SELECT @@GLOBAL.innodb_file_io_threads = VARIABLE_VALUE +SELECT @@GLOBAL.innodb_read_io_threads = VARIABLE_VALUE +FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES +WHERE VARIABLE_NAME='innodb_read_io_threads'; +@@GLOBAL.innodb_read_io_threads = VARIABLE_VALUE +1 +1 Expected +SELECT COUNT(@@GLOBAL.innodb_read_io_threads); +COUNT(@@GLOBAL.innodb_read_io_threads) +1 +1 Expected +SELECT COUNT(VARIABLE_VALUE) +FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES +WHERE VARIABLE_NAME='innodb_read_io_threads'; +COUNT(VARIABLE_VALUE) +1 +1 Expected +SELECT @@GLOBAL.innodb_write_io_threads = VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES -WHERE VARIABLE_NAME='innodb_file_io_threads'; -@@GLOBAL.innodb_file_io_threads = VARIABLE_VALUE +WHERE VARIABLE_NAME='innodb_write_io_threads'; +@@GLOBAL.innodb_write_io_threads = VARIABLE_VALUE 1 1 Expected -SELECT COUNT(@@GLOBAL.innodb_file_io_threads); -COUNT(@@GLOBAL.innodb_file_io_threads) +SELECT COUNT(@@GLOBAL.innodb_write_io_threads); +COUNT(@@GLOBAL.innodb_write_io_threads) 1 1 Expected SELECT COUNT(VARIABLE_VALUE) FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES -WHERE VARIABLE_NAME='innodb_file_io_threads'; +WHERE VARIABLE_NAME='innodb_write_io_threads'; COUNT(VARIABLE_VALUE) 1 1 Expected '#---------------------BS_STVARS_027_04----------------------#' -SELECT @@innodb_file_io_threads = @@GLOBAL.innodb_file_io_threads; -@@innodb_file_io_threads = @@GLOBAL.innodb_file_io_threads +SELECT @@innodb_read_io_threads = @@GLOBAL.innodb_read_io_threads; +@@innodb_read_io_threads = @@GLOBAL.innodb_read_io_threads +1 +1 Expected +SELECT @@innodb_write_io_threads = @@GLOBAL.innodb_write_io_threads; +@@innodb_write_io_threads = @@GLOBAL.innodb_write_io_threads 1 1 Expected '#---------------------BS_STVARS_027_05----------------------#' -SELECT COUNT(@@innodb_file_io_threads); -COUNT(@@innodb_file_io_threads) +SELECT COUNT(@@innodb_read_io_threads); +COUNT(@@innodb_read_io_threads) +1 +1 Expected +SELECT COUNT(@@local.innodb_read_io_threads); +ERROR HY000: Variable 'innodb_read_io_threads' is a GLOBAL variable +Expected error 'Variable is a GLOBAL variable' +SELECT COUNT(@@SESSION.innodb_read_io_threads); +ERROR HY000: Variable 'innodb_read_io_threads' is a GLOBAL variable +Expected error 'Variable is a GLOBAL variable' +SELECT COUNT(@@GLOBAL.innodb_read_io_threads); +COUNT(@@GLOBAL.innodb_read_io_threads) +1 +1 Expected +SELECT innodb_read_io_threads = @@SESSION.innodb_read_io_threads; +ERROR 42S22: Unknown column 'innodb_read_io_threads' in 'field list' +Expected error 'Readonly variable' +SELECT COUNT(@@innodb_write_io_threads); +COUNT(@@innodb_write_io_threads) 1 1 Expected -SELECT COUNT(@@local.innodb_file_io_threads); -ERROR HY000: Variable 'innodb_file_io_threads' is a GLOBAL variable +SELECT COUNT(@@local.innodb_write_io_threads); +ERROR HY000: Variable 'innodb_write_io_threads' is a GLOBAL variable Expected error 'Variable is a GLOBAL variable' -SELECT COUNT(@@SESSION.innodb_file_io_threads); -ERROR HY000: Variable 'innodb_file_io_threads' is a GLOBAL variable +SELECT COUNT(@@SESSION.innodb_write_io_threads); +ERROR HY000: Variable 'innodb_write_io_threads' is a GLOBAL variable Expected error 'Variable is a GLOBAL variable' -SELECT COUNT(@@GLOBAL.innodb_file_io_threads); -COUNT(@@GLOBAL.innodb_file_io_threads) +SELECT COUNT(@@GLOBAL.innodb_write_io_threads); +COUNT(@@GLOBAL.innodb_write_io_threads) 1 1 Expected -SELECT innodb_file_io_threads = @@SESSION.innodb_file_io_threads; -ERROR 42S22: Unknown column 'innodb_file_io_threads' in 'field list' +SELECT innodb_write_io_threads = @@SESSION.innodb_write_io_threads; +ERROR 42S22: Unknown column 'innodb_write_io_threads' in 'field list' Expected error 'Readonly variable' diff --git a/mysql-test/suite/sys_vars/r/innodb_max_dirty_pages_pct_basic.result b/mysql-test/suite/sys_vars/r/innodb_max_dirty_pages_pct_basic.result index 6c52f79f9ad..8e48957258f 100644 --- a/mysql-test/suite/sys_vars/r/innodb_max_dirty_pages_pct_basic.result +++ b/mysql-test/suite/sys_vars/r/innodb_max_dirty_pages_pct_basic.result @@ -1,19 +1,19 @@ SET @global_start_value = @@global.innodb_max_dirty_pages_pct; SELECT @global_start_value; @global_start_value -90 +75 '#--------------------FN_DYNVARS_046_01------------------------#' SET @@global.innodb_max_dirty_pages_pct = 0; SET @@global.innodb_max_dirty_pages_pct = DEFAULT; SELECT @@global.innodb_max_dirty_pages_pct; @@global.innodb_max_dirty_pages_pct -90 +75 '#---------------------FN_DYNVARS_046_02-------------------------#' SET innodb_max_dirty_pages_pct = 1; ERROR HY000: Variable 'innodb_max_dirty_pages_pct' is a GLOBAL variable and should be set with SET GLOBAL SELECT @@innodb_max_dirty_pages_pct; @@innodb_max_dirty_pages_pct -90 +75 SELECT local.innodb_max_dirty_pages_pct; ERROR 42S02: Unknown table 'local' in field list SET global innodb_max_dirty_pages_pct = 0; @@ -29,33 +29,33 @@ SET @@global.innodb_max_dirty_pages_pct = 1; SELECT @@global.innodb_max_dirty_pages_pct; @@global.innodb_max_dirty_pages_pct 1 -SET @@global.innodb_max_dirty_pages_pct = 100; +SET @@global.innodb_max_dirty_pages_pct = 99; SELECT @@global.innodb_max_dirty_pages_pct; @@global.innodb_max_dirty_pages_pct -100 +99 '#--------------------FN_DYNVARS_046_04-------------------------#' SET @@global.innodb_max_dirty_pages_pct = -1; Warnings: Warning 1292 Truncated incorrect max_dirty_pages_pct value: '18446744073709551615' SELECT @@global.innodb_max_dirty_pages_pct; @@global.innodb_max_dirty_pages_pct -100 +99 SET @@global.innodb_max_dirty_pages_pct = "T"; ERROR 42000: Incorrect argument type to variable 'innodb_max_dirty_pages_pct' SELECT @@global.innodb_max_dirty_pages_pct; @@global.innodb_max_dirty_pages_pct -100 +99 SET @@global.innodb_max_dirty_pages_pct = "Y"; ERROR 42000: Incorrect argument type to variable 'innodb_max_dirty_pages_pct' SELECT @@global.innodb_max_dirty_pages_pct; @@global.innodb_max_dirty_pages_pct -100 +99 SET @@global.innodb_max_dirty_pages_pct = 1001; Warnings: Warning 1292 Truncated incorrect max_dirty_pages_pct value: '1001' SELECT @@global.innodb_max_dirty_pages_pct; @@global.innodb_max_dirty_pages_pct -100 +99 '#----------------------FN_DYNVARS_046_05------------------------#' SELECT @@global.innodb_max_dirty_pages_pct = VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES @@ -65,22 +65,22 @@ VARIABLE_VALUE 1 SELECT @@global.innodb_max_dirty_pages_pct; @@global.innodb_max_dirty_pages_pct -100 +99 SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES WHERE VARIABLE_NAME='innodb_max_dirty_pages_pct'; VARIABLE_VALUE -100 +99 '#---------------------FN_DYNVARS_046_06-------------------------#' SET @@global.innodb_max_dirty_pages_pct = OFF; ERROR 42000: Incorrect argument type to variable 'innodb_max_dirty_pages_pct' SELECT @@global.innodb_max_dirty_pages_pct; @@global.innodb_max_dirty_pages_pct -100 +99 SET @@global.innodb_max_dirty_pages_pct = ON; ERROR 42000: Incorrect argument type to variable 'innodb_max_dirty_pages_pct' SELECT @@global.innodb_max_dirty_pages_pct; @@global.innodb_max_dirty_pages_pct -100 +99 '#---------------------FN_DYNVARS_046_07----------------------#' SET @@global.innodb_max_dirty_pages_pct = TRUE; SELECT @@global.innodb_max_dirty_pages_pct; @@ -93,4 +93,4 @@ SELECT @@global.innodb_max_dirty_pages_pct; SET @@global.innodb_max_dirty_pages_pct = @global_start_value; SELECT @@global.innodb_max_dirty_pages_pct; @@global.innodb_max_dirty_pages_pct -90 +75 diff --git a/mysql-test/suite/sys_vars/r/innodb_thread_concurrency_basic.result b/mysql-test/suite/sys_vars/r/innodb_thread_concurrency_basic.result index 65a79fecd97..b27bbce5b3c 100644 --- a/mysql-test/suite/sys_vars/r/innodb_thread_concurrency_basic.result +++ b/mysql-test/suite/sys_vars/r/innodb_thread_concurrency_basic.result @@ -1,19 +1,19 @@ SET @global_start_value = @@global.innodb_thread_concurrency; SELECT @global_start_value; @global_start_value -8 +0 '#--------------------FN_DYNVARS_046_01------------------------#' SET @@global.innodb_thread_concurrency = 0; SET @@global.innodb_thread_concurrency = DEFAULT; SELECT @@global.innodb_thread_concurrency; @@global.innodb_thread_concurrency -8 +0 '#---------------------FN_DYNVARS_046_02-------------------------#' SET innodb_thread_concurrency = 1; ERROR HY000: Variable 'innodb_thread_concurrency' is a GLOBAL variable and should be set with SET GLOBAL SELECT @@innodb_thread_concurrency; @@innodb_thread_concurrency -8 +0 SELECT local.innodb_thread_concurrency; ERROR 42S02: Unknown table 'local' in field list SET global innodb_thread_concurrency = 0; @@ -93,4 +93,4 @@ SELECT @@global.innodb_thread_concurrency; SET @@global.innodb_thread_concurrency = @global_start_value; SELECT @@global.innodb_thread_concurrency; @@global.innodb_thread_concurrency -8 +0 diff --git a/mysql-test/suite/sys_vars/r/table_definition_cache_basic.result b/mysql-test/suite/sys_vars/r/table_definition_cache_basic.result index 5f0e1960358..13055d00600 100644 --- a/mysql-test/suite/sys_vars/r/table_definition_cache_basic.result +++ b/mysql-test/suite/sys_vars/r/table_definition_cache_basic.result @@ -1,7 +1,7 @@ SET @start_value = @@global.table_definition_cache; SELECT @start_value; @start_value -256 +400 '#--------------------FN_DYNVARS_019_01------------------------#' SET @@global.table_definition_cache = 100; Warnings: @@ -9,12 +9,12 @@ Warning 1292 Truncated incorrect table_definition_cache value: '100' SET @@global.table_definition_cache = DEFAULT; SELECT @@global.table_definition_cache; @@global.table_definition_cache -256 +400 '#---------------------FN_DYNVARS_019_02-------------------------#' SET @@global.table_definition_cache = DEFAULT; -SELECT @@global.table_definition_cache = 128; -@@global.table_definition_cache = 128 -0 +SELECT @@global.table_definition_cache = 400; +@@global.table_definition_cache = 400 +1 '#--------------------FN_DYNVARS_019_03------------------------#' SET @@global.table_definition_cache = 1; Warnings: @@ -109,4 +109,4 @@ ERROR 42S22: Unknown column 'table_definition_cache' in 'field list' SET @@global.table_definition_cache = @start_value; SELECT @@global.table_definition_cache; @@global.table_definition_cache -256 +400 diff --git a/mysql-test/suite/sys_vars/r/table_open_cache_basic.result b/mysql-test/suite/sys_vars/r/table_open_cache_basic.result index ca02d32386f..b6891956489 100644 --- a/mysql-test/suite/sys_vars/r/table_open_cache_basic.result +++ b/mysql-test/suite/sys_vars/r/table_open_cache_basic.result @@ -1,17 +1,17 @@ SET @start_value = @@global.table_open_cache ; SELECT @start_value; @start_value -64 +400 '#--------------------FN_DYNVARS_001_01------------------------#' SET @@global.table_open_cache = 99; SET @@global.table_open_cache = DeFAULT; SELECT @@global.table_open_cache; @@global.table_open_cache -64 +400 '#---------------------FN_DYNVARS_001_02-------------------------#' SET @@global.table_open_cache = Default; -SELECT @@global.table_open_cache = 64; -@@global.table_open_cache = 64 +SELECT @@global.table_open_cache = 400; +@@global.table_open_cache = 400 1 '#--------------------FN_DYNVARS_001_03------------------------#' SET @@global.table_open_cache = 8; @@ -105,4 +105,4 @@ ERROR 42S22: Unknown column 'table_open_cache' in 'field list' SET @@global.table_open_cache = @start_value; SELECT @@global.table_open_cache ; @@global.table_open_cache -64 +400 diff --git a/mysql-test/suite/sys_vars/t/innodb_file_io_threads_basic.test b/mysql-test/suite/sys_vars/t/innodb_file_io_threads_basic.test index 8e09c75ded6..32cdd0beac4 100644 --- a/mysql-test/suite/sys_vars/t/innodb_file_io_threads_basic.test +++ b/mysql-test/suite/sys_vars/t/innodb_file_io_threads_basic.test @@ -28,7 +28,9 @@ #################################################################### # Displaying default value # #################################################################### -SELECT COUNT(@@GLOBAL.innodb_file_io_threads); +SELECT COUNT(@@GLOBAL.innodb_read_io_threads); +--echo 1 Expected +SELECT COUNT(@@GLOBAL.innodb_write_io_threads); --echo 1 Expected @@ -38,10 +40,17 @@ SELECT COUNT(@@GLOBAL.innodb_file_io_threads); #################################################################### --error ER_INCORRECT_GLOBAL_LOCAL_VAR -SET @@GLOBAL.innodb_file_io_threads=1; +SET @@GLOBAL.innodb_read_io_threads=1; --echo Expected error 'Read only variable' -SELECT COUNT(@@GLOBAL.innodb_file_io_threads); +SELECT COUNT(@@GLOBAL.innodb_read_io_threads); +--echo 1 Expected + +--error ER_INCORRECT_GLOBAL_LOCAL_VAR +SET @@GLOBAL.innodb_write_io_threads=1; +--echo Expected error 'Read only variable' + +SELECT COUNT(@@GLOBAL.innodb_write_io_threads); --echo 1 Expected @@ -52,51 +61,84 @@ SELECT COUNT(@@GLOBAL.innodb_file_io_threads); # Check if the value in GLOBAL Table matches value in variable # ################################################################# -SELECT @@GLOBAL.innodb_file_io_threads = VARIABLE_VALUE +SELECT @@GLOBAL.innodb_read_io_threads = VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES -WHERE VARIABLE_NAME='innodb_file_io_threads'; +WHERE VARIABLE_NAME='innodb_read_io_threads'; --echo 1 Expected -SELECT COUNT(@@GLOBAL.innodb_file_io_threads); +SELECT COUNT(@@GLOBAL.innodb_read_io_threads); --echo 1 Expected SELECT COUNT(VARIABLE_VALUE) FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES -WHERE VARIABLE_NAME='innodb_file_io_threads'; +WHERE VARIABLE_NAME='innodb_read_io_threads'; +--echo 1 Expected + +SELECT @@GLOBAL.innodb_write_io_threads = VARIABLE_VALUE +FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES +WHERE VARIABLE_NAME='innodb_write_io_threads'; +--echo 1 Expected + +SELECT COUNT(@@GLOBAL.innodb_write_io_threads); --echo 1 Expected +SELECT COUNT(VARIABLE_VALUE) +FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES +WHERE VARIABLE_NAME='innodb_write_io_threads'; +--echo 1 Expected --echo '#---------------------BS_STVARS_027_04----------------------#' ################################################################################ # Check if accessing variable with and without GLOBAL point to same variable # ################################################################################ -SELECT @@innodb_file_io_threads = @@GLOBAL.innodb_file_io_threads; +SELECT @@innodb_read_io_threads = @@GLOBAL.innodb_read_io_threads; --echo 1 Expected +SELECT @@innodb_write_io_threads = @@GLOBAL.innodb_write_io_threads; +--echo 1 Expected --echo '#---------------------BS_STVARS_027_05----------------------#' ################################################################################ -# Check if innodb_file_io_threads can be accessed with and without @@ sign # +# Check if innodb_read_io_threads can be accessed with and without @@ sign # +# Check if innodb_write_io_threads can be accessed with and without @@ sign # ################################################################################ -SELECT COUNT(@@innodb_file_io_threads); +SELECT COUNT(@@innodb_read_io_threads); +--echo 1 Expected + +--Error ER_INCORRECT_GLOBAL_LOCAL_VAR +SELECT COUNT(@@local.innodb_read_io_threads); +--echo Expected error 'Variable is a GLOBAL variable' + +--Error ER_INCORRECT_GLOBAL_LOCAL_VAR +SELECT COUNT(@@SESSION.innodb_read_io_threads); +--echo Expected error 'Variable is a GLOBAL variable' + +SELECT COUNT(@@GLOBAL.innodb_read_io_threads); +--echo 1 Expected + +--Error ER_BAD_FIELD_ERROR +SELECT innodb_read_io_threads = @@SESSION.innodb_read_io_threads; +--echo Expected error 'Readonly variable' + +SELECT COUNT(@@innodb_write_io_threads); --echo 1 Expected --Error ER_INCORRECT_GLOBAL_LOCAL_VAR -SELECT COUNT(@@local.innodb_file_io_threads); +SELECT COUNT(@@local.innodb_write_io_threads); --echo Expected error 'Variable is a GLOBAL variable' --Error ER_INCORRECT_GLOBAL_LOCAL_VAR -SELECT COUNT(@@SESSION.innodb_file_io_threads); +SELECT COUNT(@@SESSION.innodb_write_io_threads); --echo Expected error 'Variable is a GLOBAL variable' -SELECT COUNT(@@GLOBAL.innodb_file_io_threads); +SELECT COUNT(@@GLOBAL.innodb_write_io_threads); --echo 1 Expected --Error ER_BAD_FIELD_ERROR -SELECT innodb_file_io_threads = @@SESSION.innodb_file_io_threads; +SELECT innodb_write_io_threads = @@SESSION.innodb_write_io_threads; --echo Expected error 'Readonly variable' diff --git a/mysql-test/suite/sys_vars/t/innodb_max_dirty_pages_pct_basic.test b/mysql-test/suite/sys_vars/t/innodb_max_dirty_pages_pct_basic.test index 830617866ca..38c3acd92a2 100644 --- a/mysql-test/suite/sys_vars/t/innodb_max_dirty_pages_pct_basic.test +++ b/mysql-test/suite/sys_vars/t/innodb_max_dirty_pages_pct_basic.test @@ -72,7 +72,7 @@ SELECT @@global.innodb_max_dirty_pages_pct; SET @@global.innodb_max_dirty_pages_pct = 1; SELECT @@global.innodb_max_dirty_pages_pct; -SET @@global.innodb_max_dirty_pages_pct = 100; +SET @@global.innodb_max_dirty_pages_pct = 99; SELECT @@global.innodb_max_dirty_pages_pct; --echo '#--------------------FN_DYNVARS_046_04-------------------------#' diff --git a/mysql-test/suite/sys_vars/t/innodb_thread_concurrency_basic.test b/mysql-test/suite/sys_vars/t/innodb_thread_concurrency_basic.test index 40ce09ce4db..d30ec214f4a 100644 --- a/mysql-test/suite/sys_vars/t/innodb_thread_concurrency_basic.test +++ b/mysql-test/suite/sys_vars/t/innodb_thread_concurrency_basic.test @@ -4,7 +4,7 @@ # Scope: GLOBAL # # Access Type: Dynamic # # Data Type: Numeric # -# Default Value: 8 # +# Default Value: 0 # # Range: 0-1000 # # # # # diff --git a/mysql-test/suite/sys_vars/t/table_definition_cache_basic.test b/mysql-test/suite/sys_vars/t/table_definition_cache_basic.test index 63296590511..741079091bc 100644 --- a/mysql-test/suite/sys_vars/t/table_definition_cache_basic.test +++ b/mysql-test/suite/sys_vars/t/table_definition_cache_basic.test @@ -4,7 +4,7 @@ # Scope: GLOBAL # # Access Type: Dynamic # # Data Type: Numeric # -# Default Value: 128 # +# Default Value: 400 # # Range: 1 - 524288 # # # # # @@ -52,7 +52,7 @@ SELECT @@global.table_definition_cache; ############################################### SET @@global.table_definition_cache = DEFAULT; -SELECT @@global.table_definition_cache = 128; +SELECT @@global.table_definition_cache = 400; --echo '#--------------------FN_DYNVARS_019_03------------------------#' diff --git a/mysql-test/suite/sys_vars/t/table_open_cache_basic.test b/mysql-test/suite/sys_vars/t/table_open_cache_basic.test index 23f60bde00a..c32463ca030 100644 --- a/mysql-test/suite/sys_vars/t/table_open_cache_basic.test +++ b/mysql-test/suite/sys_vars/t/table_open_cache_basic.test @@ -4,8 +4,8 @@ # Scope: GLOBAL # # Access Type: Dynamic # # Data Type: numeric # -# Default Value: 64 # -# Range: 1-524288 # +# Default Value: 400 # +# Range: 64-524288 # # # # # # Creation Date: 2008-02-13 # @@ -54,7 +54,7 @@ SELECT @@global.table_open_cache; ############################################### SET @@global.table_open_cache = Default; -SELECT @@global.table_open_cache = 64; +SELECT @@global.table_open_cache = 400; --echo '#--------------------FN_DYNVARS_001_03------------------------#' ######################################################################## diff --git a/mysql-test/t/partition_innodb.test b/mysql-test/t/partition_innodb.test index 2abbceffbb0..c6bf0af0b6f 100644 --- a/mysql-test/t/partition_innodb.test +++ b/mysql-test/t/partition_innodb.test @@ -25,17 +25,21 @@ SET autocommit = 0; UPDATE t1 SET DATA = data*2 WHERE id = 3; +# SHOW ENGINE InnoDB STATUS does not show transaction info in +# PERFORMANCE-VERSION # grouping/referencing in replace_regex is very slow on long strings, # removing all before/after the interesting row before grouping/referencing ---replace_regex /.*---TRANSACTION [0-9]+ [0-9]+, .*, OS thread id [0-9]+// /MySQL thread id [0-9]+, query id [0-9]+ .*// /.*([0-9]+ lock struct\(s\)), heap size [0-9]+, ([0-9]+ row lock\(s\)).*/\1 \2/ -SHOW ENGINE InnoDB STATUS; +#--replace_regex /.*---TRANSACTION [0-9]+ [0-9]+, .*, OS thread id [0-9]+// /MySQL thread id [0-9]+, query id [0-9]+ .*// /.*([0-9]+) lock struct\(s\), heap size [0-9]+, ([0-9]+) row lock\(s\).*/\1 lock struct(s) \2 row lock(s)/ +#SHOW ENGINE InnoDB STATUS; UPDATE t1 SET data = data*2 WHERE data = 2; +# SHOW ENGINE InnoDB STATUS does not show transaction info in +# PERFORMANCE-VERSION # grouping/referencing in replace_regex is very slow on long strings, # removing all before/after the interesting row before grouping/referencing ---replace_regex /.*---TRANSACTION [0-9]+ [0-9]+, .*, OS thread id [0-9]+// /MySQL thread id [0-9]+, query id [0-9]+ .*// /.*([0-9]+ lock struct\(s\)), heap size [0-9]+, ([0-9]+ row lock\(s\)).*/\1 \2/ -SHOW ENGINE InnoDB STATUS; +#--replace_regex /.*---TRANSACTION [0-9]+ [0-9]+, .*, OS thread id [0-9]+// /MySQL thread id [0-9]+, query id [0-9]+ .*// /.*([0-9]+ lock struct\(s\)), heap size [0-9]+, ([0-9]+ row lock\(s\)).*/\1 \2/ +#SHOW ENGINE InnoDB STATUS; SET @@session.tx_isolation = @old_tx_isolation; diff --git a/mysys/Makefile.am b/mysys/Makefile.am index 3312c692c09..045c80f32e4 100644 --- a/mysys/Makefile.am +++ b/mysys/Makefile.am @@ -126,5 +126,20 @@ test_base64$(EXEEXT): base64.c $(LIBRARIES) $(LINK) $(FLAGS) -DMAIN ./test_base64.c $(LDADD) $(LIBS) $(RM) -f ./test_base64.c +if HAVE_DTRACE_DASH_G +libmysys_a_LIBADD += probes_mysql.o +libmysys_a_DEPENDENCIES += probes_mysql.o dtrace_files dtrace_providers +CLEANFILES = probes_mysql.o dtrace_files dtrace_providers +DTRACEFILES = mf_keycache.o +DTRACEPROVIDER = $(abs_top_srcdir)/sql/probes_mysql.d + +dtrace_files: + echo $(DTRACEFILES) > $@ +dtrace_providers: + echo $(DTRACEPROVIDER) > $@ +probes_mysql.o: $(DTRACEPROVIDER) $(DTRACEFILES) + $(DTRACE) $(DTRACEFLAGS) -G -s $< $(DTRACEFILES) -o $@ +endif + # Don't update the files from bitkeeper %::SCCS/s.% diff --git a/mysys/mf_keycache.c b/mysys/mf_keycache.c index 7cbd35580f1..5274509a1d5 100644 --- a/mysys/mf_keycache.c +++ b/mysys/mf_keycache.c @@ -109,6 +109,7 @@ #include <my_bit.h> #include <errno.h> #include <stdarg.h> +#include "probes_mysql.h" /* Some compilation flags have been added specifically for this module @@ -2537,6 +2538,15 @@ uchar *key_cache_read(KEY_CACHE *keycache, uint status; int page_st; + if (MYSQL_KEYCACHE_READ_START_ENABLED()) + { + MYSQL_KEYCACHE_READ_START(my_filename(file), length, + (ulong) (keycache->blocks_used * + keycache->key_cache_block_size), + (ulong) (keycache->blocks_unused * + keycache->key_cache_block_size)); + } + /* When the key cache is once initialized, we use the cache_lock to reliably distinguish the cases of normal operation, resizing, and @@ -2586,6 +2596,9 @@ uchar *key_cache_read(KEY_CACHE *keycache, /* Request the cache block that matches file/pos. */ keycache->global_cache_r_requests++; + + MYSQL_KEYCACHE_READ_BLOCK(keycache->key_cache_block_size); + block=find_key_block(keycache, file, filepos, level, 0, &page_st); if (!block) { @@ -2605,6 +2618,7 @@ uchar *key_cache_read(KEY_CACHE *keycache, { if (page_st != PAGE_READ) { + MYSQL_KEYCACHE_READ_MISS(); /* The requested page is to be read into the block buffer */ read_block(keycache, block, keycache->key_cache_block_size, read_length+offset, @@ -2629,6 +2643,10 @@ uchar *key_cache_read(KEY_CACHE *keycache, my_errno= -1; block->status|= BLOCK_ERROR; } + else + { + MYSQL_KEYCACHE_READ_HIT(); + } } /* block status may have added BLOCK_ERROR in the above 'if'. */ @@ -2673,7 +2691,16 @@ uchar *key_cache_read(KEY_CACHE *keycache, #ifndef THREAD /* This is only true if we where able to read everything in one block */ if (return_buffer) + { + if (MYSQL_KEYCACHE_READ_DONE_ENABLED()) + { + MYSQL_KEYCACHE_READ_DONE((ulong) (keycache->blocks_used * + keycache->key_cache_block_size), + (ulong) (keycache->blocks_unused * + keycache->key_cache_block_size)); + } DBUG_RETURN(block->buffer); + } #endif next_block: buff+= read_length; @@ -2681,6 +2708,13 @@ uchar *key_cache_read(KEY_CACHE *keycache, offset= 0; } while ((length-= read_length)); + if (MYSQL_KEYCACHE_READ_DONE_ENABLED()) + { + MYSQL_KEYCACHE_READ_DONE((ulong) (keycache->blocks_used * + keycache->key_cache_block_size), + (ulong) (keycache->blocks_unused * + keycache->key_cache_block_size)); + } goto end; } @@ -3009,6 +3043,15 @@ int key_cache_write(KEY_CACHE *keycache, uint offset; int page_st; + if (MYSQL_KEYCACHE_WRITE_START_ENABLED()) + { + MYSQL_KEYCACHE_WRITE_START(my_filename(file), length, + (ulong) (keycache->blocks_used * + keycache->key_cache_block_size), + (ulong) (keycache->blocks_unused * + keycache->key_cache_block_size)); + } + /* When the key cache is once initialized, we use the cache_lock to reliably distinguish the cases of normal operation, resizing, and @@ -3044,6 +3087,8 @@ int key_cache_write(KEY_CACHE *keycache, /* Cache could be disabled in a later iteration. */ if (!keycache->can_be_used) goto no_key_cache; + + MYSQL_KEYCACHE_WRITE_BLOCK(keycache->key_cache_block_size); /* Start writing at the beginning of the cache block. */ filepos-= offset; /* Do not write beyond the end of the cache block. */ @@ -3247,6 +3292,15 @@ end: dec_counter_for_resize_op(keycache); keycache_pthread_mutex_unlock(&keycache->cache_lock); } + + if (MYSQL_KEYCACHE_WRITE_DONE_ENABLED()) + { + MYSQL_KEYCACHE_WRITE_DONE((ulong) (keycache->blocks_used * + keycache->key_cache_block_size), + (ulong) (keycache->blocks_unused * + keycache->key_cache_block_size)); + } + #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG) DBUG_EXECUTE("exec", test_key_cache(keycache, "end of key_cache_write", 1);); diff --git a/scripts/Makefile.am b/scripts/Makefile.am index 0292617c7a5..e493c68827c 100644 --- a/scripts/Makefile.am +++ b/scripts/Makefile.am @@ -37,7 +37,8 @@ bin_SCRIPTS = @server_scripts@ \ mysqld_multi noinst_SCRIPTS = make_binary_distribution \ - make_sharedlib_distribution + make_sharedlib_distribution \ + dheadgen.pl EXTRA_SCRIPTS = make_binary_distribution.sh \ make_sharedlib_distribution.sh \ @@ -59,7 +60,8 @@ EXTRA_SCRIPTS = make_binary_distribution.sh \ mysqlhotcopy.sh \ mysqldumpslow.sh \ mysqld_multi.sh \ - mysqld_safe.sh + mysqld_safe.sh \ + dheadgen.pl EXTRA_DIST = $(EXTRA_SCRIPTS) \ mysqlaccess.conf \ @@ -110,7 +112,7 @@ mysqlbug: ${top_builddir}/config.status mysqlbug.sh mysql_fix_privilege_tables.sql: mysql_system_tables.sql \ mysql_system_tables_fix.sql @echo "Building $@"; - @cat mysql_system_tables.sql mysql_system_tables_fix.sql > $@ + @cat $(srcdir)/mysql_system_tables.sql $(srcdir)/mysql_system_tables_fix.sql > $@ # # Build mysql_fix_privilege_tables_sql.c from @@ -123,7 +125,7 @@ mysql_fix_privilege_tables_sql.c: comp_sql.c mysql_fix_privilege_tables.sql sleep 2 $(top_builddir)/scripts/comp_sql$(EXEEXT) \ mysql_fix_privilege_tables \ - $(top_srcdir)/scripts/mysql_fix_privilege_tables.sql $@ + $(top_builddir)/scripts/mysql_fix_privilege_tables.sql $@ SUFFIXES = .sh diff --git a/scripts/dheadgen.pl b/scripts/dheadgen.pl new file mode 100755 index 00000000000..5ead0f90a31 --- /dev/null +++ b/scripts/dheadgen.pl @@ -0,0 +1,338 @@ +#!/usr/bin/perl -w + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of the above-listed copyright holders nor the names +# of its contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# ident "@(#)dheadgen.pl 1.4 07/06/24 SMI" + +# +# DTrace Header Generator +# ----------------------- +# +# This script is meant to mimic the output of dtrace(1M) with the -h +# (headergen) flag on system that lack native support for DTrace. This script +# is intended to be integrated into projects that use DTrace's static tracing +# facilities (USDT), and invoked as part of the build process to have a +# common build process on all target systems. To facilitate this, this script +# is licensed under a BSD license. On system with native DTrace support, the +# dtrace(1M) command will be invoked to create the full header file; on other +# systems, this script will generated a stub header file. +# +# Normally, generated macros take the form PROVIDER_PROBENAME(). It may be +# desirable to customize the output of this script and of dtrace(1M) to +# tailor the precise macro name. To do this, edit the emit_dtrace() subroutine +# to pattern match for the lines you want to customize. +# + +use strict; + +my @lines; +my @tokens = (); +my $lineno = 0; +my $newline = 1; +my $eof = 0; +my $infile; +my $outfile; +my $force = 0; + +sub emit_dtrace { + my ($line) = @_; + + # + # Insert customization here. For example, if you want to change the + # name of the macros you may do something like this: + # + # $line =~ s/(\s)[A-Z]+_/\1TRACE_MOZILLA_/; + # + + print $line; +} + +# +# The remaining code deals with parsing D provider definitions and emitting +# the stub header file. There should be no need to edit this absent a bug. +# + +# +# Emit the two relevant macros for each probe in the given provider: +# PROVIDER_PROBENAME(<args>) +# PROVIDER_PROBENAME_ENABLED() (0) +# +sub emit_provider { + my ($provname, @probes) = @_; + + $provname = uc($provname); + + foreach my $probe (@probes) { + my $probename = uc($$probe{'name'}); + my $argc = $$probe{'argc'}; + my $line; + + $probename =~ s/__/_/g; + + $line = "#define\t${provname}_${probename}("; + for (my $i = 0; $i < $argc; $i++) { + $line .= ($i == 0 ? '' : ', '); + $line .= "arg$i"; + } + $line .= ")\n"; + emit_dtrace($line); + + $line = "#define\t${provname}_${probename}_ENABLED() (0)\n"; + emit_dtrace($line); + } + + emit_dtrace("\n"); +} + +sub emit_prologue { + my ($filename) = @_; + + $filename =~ s/.*\///g; + $filename = uc($filename); + $filename =~ s/\./_/g; + + emit_dtrace <<"EOF"; +/* + * Generated by dheadgen(1). + */ + +#ifndef\t_${filename} +#define\t_${filename} + +#ifdef\t__cplusplus +extern "C" { +#endif + +EOF +} + +sub emit_epilogue { + my ($filename) = @_; + + $filename =~ s/.*\///g; + $filename = uc($filename); + $filename =~ s/\./_/g; + + emit_dtrace <<"EOF"; +#ifdef __cplusplus +} +#endif + +#endif /* _$filename */ +EOF +} + +# +# Get the next token from the file keeping track of the line number. +# +sub get_token { + my ($eof_ok) = @_; + my $tok; + + while (1) { + while (scalar(@tokens) == 0) { + if (scalar(@lines) == 0) { + $eof = 1; + return if ($eof_ok); + die "expected more data at line $lineno"; + } + + $lineno++; + push(@tokens, split(/(\s+|\n|[(){},#;]|\/\*|\*\/)/, + shift(@lines))); + } + + $tok = shift(@tokens); + next if ($tok eq ''); + next if ($tok =~ /^[ \t]+$/); + + return ($tok); + } +} + +# +# Ignore newlines, comments and typedefs +# +sub next_token { + my ($eof_ok) = @_; + my $tok; + + while (1) { + $tok = get_token($eof_ok); + return if ($eof_ok && $eof); + if ($tok eq "typedef" or $tok =~ /^#/) { + while (1) { + $tok = get_token(0); + last if ($tok eq "\n"); + } + next; + } elsif ($tok eq '/*') { + while (get_token(0) ne '*/') { + next; + } + next; + } elsif ($tok eq "\n") { + next; + } + + last; + } + + return ($tok); +} + +sub expect_token { + my ($t) = @_; + my $tok; + + while (($tok = next_token(0)) eq "\n") { + next; + } + + die "expected '$t' at line $lineno rather than '$tok'" if ($t ne $tok); +} + +sub get_args { + expect_token('('); + + my $tok = next_token(0); + my @args = (); + + return (@args) if ($tok eq ')'); + + if ($tok eq 'void') { + expect_token(')'); + return (@args); + } + + my $arg = $tok; + + while (1) { + $tok = next_token(0); + if ($tok eq ',' || $tok eq ')') { + push(@args, $arg); + $arg = ''; + last if ($tok eq ')'); + } else { + $arg = "$arg $tok"; + } + } + + return (@args); +} + +sub usage { + die "usage: $0 [-f] <filename.d>\n"; +} + +usage() if (scalar(@ARGV) < 1); +if ($ARGV[0] eq '-f') { + usage() if (scalar(@ARGV < 2)); + $force = 1; + shift; +} +$infile = $ARGV[0]; +usage() if ($infile !~ /(.+)\.d$/); + +# +# If the system has native support for DTrace, we'll use that binary instead. +# +if (-x '/usr/sbin/dtrace' && !$force) { + open(my $dt, '-|', "/usr/sbin/dtrace -C -h -s $infile -o /dev/stdout") + or die "can't invoke dtrace(1M)"; + + while (<$dt>) { + emit_dtrace($_); + } + + close($dt); + + exit(0); +} + +emit_prologue($infile); + +open(my $d, '<', $infile) or die "couldn't open $infile"; +@lines = <$d>; +close($d); + +while (1) { + my $nl = 0; + my $tok = next_token(1); + last if $eof; + + if ($newline && $tok eq '#') { + while (1) { + $tok = get_token(0); + + last if ($tok eq "\n"); + } + $nl = 1; + } elsif ($tok eq "\n") { + $nl = 1; + } elsif ($tok eq 'provider') { + my $provname = next_token(0); + my @probes = (); + expect_token('{'); + + while (1) { + $tok = next_token(0); + if ($tok eq 'probe') { + my $probename = next_token(0); + my @args = get_args(); + + next while (next_token(0) ne ';'); + + push(@probes, { + 'name' => $probename, + 'argc' => scalar(@args) + }); + + } elsif ($tok eq '}') { + expect_token(';'); + + emit_provider($provname, @probes); + + last; + } + } + + } else { + die "syntax error at line $lineno near '$tok'\n"; + } + + $newline = $nl; +} + +emit_epilogue($infile); + +exit(0); diff --git a/scripts/make_binary_distribution.sh b/scripts/make_binary_distribution.sh index ee7c36b097d..f73de69f5b3 100644 --- a/scripts/make_binary_distribution.sh +++ b/scripts/make_binary_distribution.sh @@ -337,7 +337,6 @@ BIN_FILES="extra/comp_err$BS extra/replace$BS extra/perror$BS \ storage/myisam/myisamlog$BS storage/myisam/myisam_ftdump$BS \ sql/mysqld$BS sql/mysqld-debug$BS \ sql/mysql_tzinfo_to_sql$BS \ - server-tools/instance-manager/mysqlmanager$BS \ client/mysql$BS client/mysqlshow$BS client/mysqladmin$BS \ client/mysqlslap$BS \ client/mysqldump$BS client/mysqlimport$BS \ diff --git a/scripts/make_win_bin_dist b/scripts/make_win_bin_dist index 5eb5a5643f1..7419083f262 100755 --- a/scripts/make_win_bin_dist +++ b/scripts/make_win_bin_dist @@ -146,9 +146,7 @@ mkdir $DESTDIR/bin cp client/$TARGET/*.exe $DESTDIR/bin/ cp extra/$TARGET/*.exe $DESTDIR/bin/ cp storage/myisam/$TARGET/*.exe $DESTDIR/bin/ -cp server-tools/instance-manager/$TARGET/*.{exe,map} $DESTDIR/bin/ if [ x"$TARGET" != x"release" ] ; then - cp server-tools/instance-manager/$TARGET/*.pdb $DESTDIR/bin/ cp client/$TARGET/mysql.pdb $DESTDIR/bin/ cp client/$TARGET/mysqladmin.pdb $DESTDIR/bin/ cp client/$TARGET/mysqlbinlog.pdb $DESTDIR/bin/ diff --git a/sql/Makefile.am b/sql/Makefile.am index e477a6123ec..d05e0efbcba 100644 --- a/sql/Makefile.am +++ b/sql/Makefile.am @@ -28,6 +28,23 @@ SUBDIRS = share libexec_PROGRAMS = mysqld EXTRA_PROGRAMS = gen_lex_hash bin_PROGRAMS = mysql_tzinfo_to_sql +DTRACEFILES = filesort.o \ + .libs/libndb_la-ha_ndbcluster.o \ + handler.o \ + mysqld.o \ + net_serv.o \ + scheduler.o \ + sp_head.o \ + sql_cache.o \ + sql_connect.o \ + sql_cursor.o \ + sql_delete.o \ + sql_insert.o \ + sql_parse.o \ + sql_prepare.o \ + sql_select.o \ + sql_update.o + noinst_LTLIBRARIES= libndb.la \ udf_example.la @@ -148,6 +165,7 @@ BUILT_SOURCES = $(BUILT_MAINT_SRC) lex_hash.h link_sources EXTRA_DIST = udf_example.c udf_example.def $(BUILT_MAINT_SRC) \ nt_servc.cc nt_servc.h \ message.mc message.h message.rc MSG00001.bin \ + probes_mysql.d \ CMakeLists.txt CLEANFILES = lex_hash.h sql_yacc.output link_sources @@ -188,5 +206,31 @@ install-exec-hook: test ! -f mysqld-debug.sym.gz || $(INSTALL_DATA) mysqld-debug.sym.gz $(DESTDIR)$(pkglibdir) test ! -f mysqld.sym.gz || $(INSTALL_DATA) mysqld.sym.gz $(DESTDIR)$(pkglibdir) +if HAVE_DTRACE_DASH_G +libndb_la_LIBADD = probes_libndb.o +libndb_la_DEPENDENCIES = dtrace_files dtrace_providers probes_libndb.o +mysqld_LDADD += probes_all.o +mysqld_DEPENDENCIES += dtrace_files dtrace_providers probes_all.o +CLEANFILES += dtrace_files dtrace_providers probes_all.o +DTRACEPROVIDER = $(abs_top_srcdir)/sql/probes_mysql.d + +dtrace_files: + echo $(DTRACEFILES) > $@ +dtrace_providers: + echo $(DTRACEPROVIDER) > $@ + +DTRACEDIRS = . ../mysys $(patsubst %,$(top_builddir)/storage/%,@mysql_se_dirs@) + +probes_all.o: probes_mysql.d $(DTRACEFILES) + providers=`(for i in $(DTRACEDIRS); do cat $$i/dtrace_providers 2>/dev/null; done) | tr " " "\n" | sort | uniq | sed -e '/^$$/d' -e 's/^/-s /'`; \ + objects=`for i in $(DTRACEDIRS); do f=\`cat $$i/dtrace_files 2>/dev/null\`; for j in $$f; do test -f $$i/$$j && echo "$$i/$$j "; done; done`; \ + $(DTRACE) $(DTRACEFLAGS) -G $$providers $$objects -o $@ + +# Can't depend directly on .libs/*.o, because there is no generated rule for +# that in the Makefile; it is a byproduct of *.lo +probes_libndb.o: probes_mysql.d libndb_la-ha_ndbcluster.lo + $(DTRACE) $(DTRACEFLAGS) -G -s probes_mysql.d .libs/libndb_la-ha_ndbcluster.o -o $@ +endif + # Don't update the files from bitkeeper %::SCCS/s.% diff --git a/sql/filesort.cc b/sql/filesort.cc index 5d8b4e869c8..8f18471b378 100644 --- a/sql/filesort.cc +++ b/sql/filesort.cc @@ -27,6 +27,7 @@ #endif #include <m_ctype.h> #include "sql_sort.h" +#include "probes_mysql.h" #ifndef THREAD #define SKIP_DBUG_IN_FILESORT @@ -121,6 +122,8 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length, TABLE_LIST *tab= table->pos_in_table_list; Item_subselect *subselect= tab ? tab->containing_subselect() : 0; + MYSQL_FILESORT_START(table->s->db.str, table->s->table_name.str); + /* Release InnoDB's adaptive hash index latch (if holding) before running a sort. @@ -331,6 +334,7 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length, #endif memcpy(&table->sort, &table_sort, sizeof(FILESORT_INFO)); DBUG_PRINT("exit",("records: %ld", (long) records)); + MYSQL_FILESORT_DONE(error, records); DBUG_RETURN(error ? HA_POS_ERROR : records); } /* filesort */ diff --git a/sql/ha_ndbcluster.cc b/sql/ha_ndbcluster.cc index 3454f3558e8..c804b84ca87 100644 --- a/sql/ha_ndbcluster.cc +++ b/sql/ha_ndbcluster.cc @@ -41,6 +41,7 @@ #include "ha_ndbcluster_tables.h" #include <mysql/plugin.h> +#include "probes_mysql.h" #ifdef ndb_dynamite #undef assert @@ -128,6 +129,13 @@ static uint ndbcluster_alter_table_flags(uint flags) DBUG_RETURN(ndb_to_mysql_error(&tmp)); \ } +#define ERR_RETURN_PREPARE(rc, err) \ +{ \ + const NdbError& tmp= err; \ + set_ndb_err(current_thd, tmp); \ + rc= ndb_to_mysql_error(&tmp); \ +} + #define ERR_BREAK(err, code) \ { \ const NdbError& tmp= err; \ @@ -3601,9 +3609,11 @@ int ha_ndbcluster::index_read(uchar *buf, { key_range start_key; bool descending= FALSE; + int rc; DBUG_ENTER("ha_ndbcluster::index_read"); DBUG_PRINT("enter", ("active_index: %u, key_len: %u, find_flag: %d", active_index, key_len, find_flag)); + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); start_key.key= key; start_key.length= key_len; @@ -3619,43 +3629,61 @@ int ha_ndbcluster::index_read(uchar *buf, default: break; } - DBUG_RETURN(read_range_first_to_buf(&start_key, 0, descending, - m_sorted, buf)); + rc= read_range_first_to_buf(&start_key, 0, descending, + m_sorted, buf); + MYSQL_INDEX_READ_ROW_DONE(rc); + DBUG_RETURN(rc); } int ha_ndbcluster::index_next(uchar *buf) { + int rc; DBUG_ENTER("ha_ndbcluster::index_next"); + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); ha_statistic_increment(&SSV::ha_read_next_count); - DBUG_RETURN(next_result(buf)); + rc= next_result(buf); + MYSQL_INDEX_READ_ROW_DONE(rc); + DBUG_RETURN(rc); } int ha_ndbcluster::index_prev(uchar *buf) { + int rc; DBUG_ENTER("ha_ndbcluster::index_prev"); + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); ha_statistic_increment(&SSV::ha_read_prev_count); - DBUG_RETURN(next_result(buf)); + rc= next_result(buf); + MYSQL_INDEX_READ_ROW_DONE(rc); + DBUG_RETURN(rc); } int ha_ndbcluster::index_first(uchar *buf) { + int rc; DBUG_ENTER("ha_ndbcluster::index_first"); + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); ha_statistic_increment(&SSV::ha_read_first_count); // Start the ordered index scan and fetch the first row // Only HA_READ_ORDER indexes get called by index_first - DBUG_RETURN(ordered_index_scan(0, 0, TRUE, FALSE, buf, NULL)); + rc= ordered_index_scan(0, 0, TRUE, FALSE, buf, NULL); + MYSQL_INDEX_READ_ROW_DONE(rc); + DBUG_RETURN(rc); } int ha_ndbcluster::index_last(uchar *buf) { + int rc; DBUG_ENTER("ha_ndbcluster::index_last"); + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); ha_statistic_increment(&SSV::ha_read_last_count); - DBUG_RETURN(ordered_index_scan(0, 0, TRUE, TRUE, buf, NULL)); + rc= ordered_index_scan(0, 0, TRUE, TRUE, buf, NULL); + MYSQL_INDEX_READ_ROW_DONE(rc); + DBUG_RETURN(rc); } int ha_ndbcluster::index_read_last(uchar * buf, const uchar * key, uint key_len) @@ -3747,16 +3775,24 @@ int ha_ndbcluster::read_range_first(const key_range *start_key, const key_range *end_key, bool eq_r, bool sorted) { + int rc; uchar* buf= table->record[0]; DBUG_ENTER("ha_ndbcluster::read_range_first"); - DBUG_RETURN(read_range_first_to_buf(start_key, end_key, FALSE, - sorted, buf)); + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); + rc= read_range_first_to_buf(start_key, end_key, FALSE, + sorted, buf); + MYSQL_INDEX_READ_ROW_DONE(rc); + DBUG_RETURN(rc); } int ha_ndbcluster::read_range_next() { + int rc; DBUG_ENTER("ha_ndbcluster::read_range_next"); - DBUG_RETURN(next_result(table->record[0])); + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); + rc= next_result(table->record[0]); + MYSQL_INDEX_READ_ROW_DONE(rc); + DBUG_RETURN(rc); } @@ -3839,12 +3875,18 @@ int ha_ndbcluster::rnd_end() int ha_ndbcluster::rnd_next(uchar *buf) { + int rc; DBUG_ENTER("rnd_next"); + MYSQL_READ_ROW_START(table_share->db.str, table_share->table_name.str, + TRUE); ha_statistic_increment(&SSV::ha_read_rnd_next_count); if (!m_active_cursor) - DBUG_RETURN(full_table_scan(buf)); - DBUG_RETURN(next_result(buf)); + rc= full_table_scan(buf); + else + rc= next_result(buf); + MYSQL_READ_ROW_DONE(rc); + DBUG_RETURN(rc); } @@ -3856,7 +3898,10 @@ int ha_ndbcluster::rnd_next(uchar *buf) int ha_ndbcluster::rnd_pos(uchar *buf, uchar *pos) { + int rc; DBUG_ENTER("rnd_pos"); + MYSQL_READ_ROW_START(table_share->db.str, table_share->table_name.str, + FALSE); ha_statistic_increment(&SSV::ha_read_rnd_count); // The primary key for the record is stored in pos // Perform a pk_read using primary key "index" @@ -3889,7 +3934,9 @@ int ha_ndbcluster::rnd_pos(uchar *buf, uchar *pos) DBUG_PRINT("info", ("partition id %u", part_spec.start_part)); } DBUG_DUMP("key", pos, key_length); - DBUG_RETURN(pk_read(pos, key_length, buf, part_spec.start_part)); + rc= pk_read(pos, key_length, buf, part_spec.start_part); + MYSQL_READ_ROW_DONE(rc); + DBUG_RETURN(rc); } } @@ -8727,6 +8774,7 @@ ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p, sorted, buffer)); } + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); thd_ndb->query_state|= NDB_QUERY_MULTI_READ_RANGE; m_disable_multi_read= FALSE; @@ -8808,7 +8856,13 @@ ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p, (op->setPartitionId(part_spec.start_part), TRUE))) curr += reclength; else - ERR_RETURN(op ? op->getNdbError() : m_active_trans->getNdbError()); + { + ERR_RETURN_PREPARE(res, + op ? op->getNdbError() : + m_active_trans->getNdbError()) + MYSQL_INDEX_READ_ROW_DONE(res); + DBUG_RETURN(res); + } break; } break; @@ -8828,7 +8882,13 @@ ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p, !define_read_attrs(curr, op)) curr += reclength; else - ERR_RETURN(op ? op->getNdbError() : m_active_trans->getNdbError()); + { + ERR_RETURN_PREPARE(res, + op ? op->getNdbError() : + m_active_trans->getNdbError()); + MYSQL_INDEX_READ_ROW_DONE(res); + DBUG_RETURN(res); + } break; } case ORDERED_INDEX: { @@ -8843,7 +8903,11 @@ ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p, DBUG_ASSERT(scanOp->getLockMode() == (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type)); if (scanOp->reset_bounds(m_force_send)) - DBUG_RETURN(ndb_err(m_active_trans)); + { + res= ndb_err(m_active_trans); + MYSQL_INDEX_READ_ROW_DONE(res); + DBUG_RETURN(res); + } end_of_buffer -= reclength; } @@ -8858,8 +8922,11 @@ ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p, } else { - ERR_RETURN(scanOp ? scanOp->getNdbError() : - m_active_trans->getNdbError()); + ERR_RETURN_PREPARE(res, + scanOp ? scanOp->getNdbError() : + m_active_trans->getNdbError()); + MYSQL_INDEX_READ_ROW_DONE(res); + DBUG_RETURN(res); } } @@ -8867,11 +8934,15 @@ ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p, &multi_range_curr->end_key }; if ((res= set_bounds(scanOp, active_index, FALSE, keys, multi_range_curr-ranges))) + { + MYSQL_INDEX_READ_ROW_DONE(res); DBUG_RETURN(res); + } break; } case UNDEFINED_INDEX: DBUG_ASSERT(FALSE); + MYSQL_INDEX_READ_ROW_DONE(1); DBUG_RETURN(1); break; } @@ -8902,9 +8973,13 @@ ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p, m_multi_range_defined= multi_range_curr; multi_range_curr= ranges; m_multi_range_result_ptr= (uchar*)buffer->buffer; - DBUG_RETURN(read_multi_range_next(found_range_p)); + res= loc_read_multi_range_next(found_range_p); + MYSQL_INDEX_READ_ROW_DONE(res); + DBUG_RETURN(res); } - ERR_RETURN(m_active_trans->getNdbError()); + ERR_RETURN_PREPARE(res, m_active_trans->getNdbError()); + MYSQL_INDEX_READ_ROW_DONE(res); + DBUG_RETURN(res); } #if 0 @@ -8916,17 +8991,28 @@ ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p, int ha_ndbcluster::read_multi_range_next(KEY_MULTI_RANGE ** multi_range_found_p) { + int rc; DBUG_ENTER("ha_ndbcluster::read_multi_range_next"); if (m_disable_multi_read) { DBUG_MULTI_RANGE(11); DBUG_RETURN(handler::read_multi_range_next(multi_range_found_p)); } - + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); + rc= loc_read_multi_range_next(multi_range_found_p); + MYSQL_INDEX_READ_ROW_DONE(rc); + DBUG_RETURN(rc); +} + +int ha_ndbcluster::loc_read_multi_range_next( + KEY_MULTI_RANGE **multi_range_found_p) +{ int res; int range_no; ulong reclength= table_share->reclength; const NdbOperation* op= m_current_multi_operation; + DBUG_ENTER("ha_ndbcluster::loc_read_multi_range_next"); + for (;multi_range_curr < m_multi_range_defined; multi_range_curr++) { DBUG_MULTI_RANGE(12); @@ -9033,6 +9119,7 @@ close_scan: /* * Read remaining ranges */ + MYSQL_INDEX_READ_ROW_DONE(1); DBUG_RETURN(read_multi_range_first(multi_range_found_p, multi_range_curr, multi_range_end - multi_range_curr, diff --git a/sql/ha_ndbcluster.h b/sql/ha_ndbcluster.h index a17323d3fd6..9106fd60731 100644 --- a/sql/ha_ndbcluster.h +++ b/sql/ha_ndbcluster.h @@ -385,6 +385,7 @@ static void set_tabname(const char *pathname, char *tabname); uint table_changes); private: + int loc_read_multi_range_next(KEY_MULTI_RANGE **found_range_p); friend int ndbcluster_drop_database_impl(const char *path); friend int ndb_handle_schema_change(THD *thd, Ndb *ndb, NdbEventOperation *pOp, diff --git a/sql/handler.cc b/sql/handler.cc index 948cb08b13f..2145d7c84eb 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -27,6 +27,7 @@ #include "rpl_filter.h" #include <myisampack.h> #include <errno.h> +#include "probes_mysql.h" #ifdef WITH_PARTITION_STORAGE_ENGINE #include "ha_partition.h" @@ -4530,11 +4531,51 @@ int handler::ha_external_lock(THD *thd, int lock_type) */ DBUG_ASSERT(next_insert_id == 0); + if (MYSQL_HANDLER_RDLOCK_START_ENABLED() || + MYSQL_HANDLER_WRLOCK_START_ENABLED() || + MYSQL_HANDLER_UNLOCK_START_ENABLED()) + { + if (lock_type == F_RDLCK) + { + MYSQL_HANDLER_RDLOCK_START(table_share->db.str, + table_share->table_name.str); + } + else if (lock_type == F_WRLCK) + { + MYSQL_HANDLER_WRLOCK_START(table_share->db.str, + table_share->table_name.str); + } + else if (lock_type == F_UNLCK) + { + MYSQL_HANDLER_UNLOCK_START(table_share->db.str, + table_share->table_name.str); + } + } + /* We cache the table flags if the locking succeeded. Otherwise, we keep them as they were when they were fetched in ha_open(). */ int error= external_lock(thd, lock_type); + + if (MYSQL_HANDLER_RDLOCK_DONE_ENABLED() || + MYSQL_HANDLER_WRLOCK_DONE_ENABLED() || + MYSQL_HANDLER_UNLOCK_DONE_ENABLED()) + { + if (lock_type == F_RDLCK) + { + MYSQL_HANDLER_RDLOCK_DONE(error); + } + else if (lock_type == F_WRLCK) + { + MYSQL_HANDLER_WRLOCK_DONE(error); + } + else if (lock_type == F_UNLCK) + { + MYSQL_HANDLER_UNLOCK_DONE(error); + } + } + if (error == 0) cached_table_flags= table_flags(); DBUG_RETURN(error); @@ -4569,10 +4610,14 @@ int handler::ha_write_row(uchar *buf) Log_func *log_func= Write_rows_log_event::binlog_row_logging_function; DBUG_ENTER("handler::ha_write_row"); + MYSQL_INSERT_ROW_START(table_share->db.str, table_share->table_name.str); mark_trx_read_write(); - if (unlikely(error= write_row(buf))) + error= write_row(buf); + MYSQL_INSERT_ROW_DONE(error); + if (unlikely(error)) DBUG_RETURN(error); + if (unlikely(error= binlog_log_row(table, 0, buf, log_func))) DBUG_RETURN(error); /* purecov: inspected */ DBUG_RETURN(0); @@ -4590,9 +4635,12 @@ int handler::ha_update_row(const uchar *old_data, uchar *new_data) */ DBUG_ASSERT(new_data == table->record[0]); + MYSQL_UPDATE_ROW_START(table_share->db.str, table_share->table_name.str); mark_trx_read_write(); - if (unlikely(error= update_row(old_data, new_data))) + error= update_row(old_data, new_data); + MYSQL_UPDATE_ROW_DONE(error); + if (unlikely(error)) return error; if (unlikely(error= binlog_log_row(table, old_data, new_data, log_func))) return error; @@ -4604,9 +4652,12 @@ int handler::ha_delete_row(const uchar *buf) int error; Log_func *log_func= Delete_rows_log_event::binlog_row_logging_function; + MYSQL_DELETE_ROW_START(table_share->db.str, table_share->table_name.str); mark_trx_read_write(); - if (unlikely(error= delete_row(buf))) + error= delete_row(buf); + MYSQL_DELETE_ROW_DONE(error); + if (unlikely(error)) return error; if (unlikely(error= binlog_log_row(table, buf, 0, log_func))) return error; diff --git a/sql/mysql_priv.h b/sql/mysql_priv.h index 015128cda1f..8671f2189bd 100644 --- a/sql/mysql_priv.h +++ b/sql/mysql_priv.h @@ -265,8 +265,8 @@ protected: #define MAX_FIELDS_BEFORE_HASH 32 #define USER_VARS_HASH_SIZE 16 #define TABLE_OPEN_CACHE_MIN 64 -#define TABLE_OPEN_CACHE_DEFAULT 64 -#define TABLE_DEF_CACHE_DEFAULT 256 +#define TABLE_OPEN_CACHE_DEFAULT 400 +#define TABLE_DEF_CACHE_DEFAULT 400 /** We must have room for at least 256 table definitions in the table cache, since otherwise there is no chance prepared @@ -1227,12 +1227,14 @@ bool mysql_prepare_update(THD *thd, TABLE_LIST *table_list, int mysql_update(THD *thd,TABLE_LIST *tables,List<Item> &fields, List<Item> &values,COND *conds, uint order_num, ORDER *order, ha_rows limit, - enum enum_duplicates handle_duplicates, bool ignore); + enum enum_duplicates handle_duplicates, bool ignore, + ha_rows *found_return, ha_rows *updated_return); bool mysql_multi_update(THD *thd, TABLE_LIST *table_list, List<Item> *fields, List<Item> *values, COND *conds, ulonglong options, enum enum_duplicates handle_duplicates, bool ignore, - SELECT_LEX_UNIT *unit, SELECT_LEX *select_lex); + SELECT_LEX_UNIT *unit, SELECT_LEX *select_lex, + multi_update **result); bool mysql_prepare_insert(THD *thd, TABLE_LIST *table_list, TABLE *table, List<Item> &fields, List_item *values, List<Item> &update_fields, diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 0213eea889b..553179d68f3 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -26,6 +26,7 @@ #include "mysqld_suffix.h" #include "mysys_err.h" #include "events.h" +#include "probes_mysql.h" #include "../storage/myisam/ha_myisam.h" @@ -129,6 +130,16 @@ extern "C" { // Because of SCO 3.2V4.2 #define SIGNAL_FMT "signal %d" #endif +#ifdef HAVE_SOLARIS_LARGE_PAGES +#include <sys/mman.h> +#if defined(__sun__) && defined(__GNUC__) && defined(__cplusplus) \ + && defined(_XOPEN_SOURCE) +extern int getpagesizes(size_t *, int); +extern int getpagesizes2(size_t *, int); +extern int memcntl(caddr_t, size_t, int, caddr_t, int, int); +#endif /* __sun__ ... */ +#endif /* HAVE_SOLARIS_LARGE_PAGES */ + #ifdef __NETWARE__ #define zVOLSTATE_ACTIVE 6 #define zVOLSTATE_DEACTIVE 2 @@ -443,7 +454,8 @@ char* opt_secure_file_priv= 0; my_bool opt_log_slow_admin_statements= 0; my_bool opt_log_slow_slave_statements= 0; my_bool lower_case_file_system= 0; -my_bool opt_large_pages= 0; +my_bool opt_large_pages= 1; +my_bool opt_super_large_pages= 0; my_bool opt_myisam_use_mmap= 0; uint opt_large_page_size= 0; my_bool opt_old_style_user_limits= 0, trust_function_creators= 0; @@ -1752,6 +1764,7 @@ void close_connection(THD *thd, uint errcode, bool lock) net_send_error(thd, errcode, ER(errcode)); /* purecov: inspected */ vio_close(vio); /* vio is freed in delete thd */ } + MYSQL_CONNECTION_DONE((int) errcode, thd->thread_id); if (lock) (void) pthread_mutex_unlock(&LOCK_thread_count); DBUG_VOID_RETURN; @@ -3233,6 +3246,57 @@ static int init_common_variables(const char *conf_file_name, int argc, my_large_page_size= opt_large_page_size; } #endif /* HAVE_LARGE_PAGES */ +#ifdef HAVE_SOLARIS_LARGE_PAGES +#define LARGE_PAGESIZE (4*1024*1024) /* 4MB */ +#define SUPER_LARGE_PAGESIZE (256*1024*1024) /* 256MB */ + if (opt_large_pages) + { + /* + tell the kernel that we want to use 4/256MB page for heap storage + and also for the stack. We use 4 MByte as default and if the + super-large-page is set we increase it to 256 MByte. 256 MByte + is for server installations with GBytes of RAM memory where + the MySQL Server will have page caches and other memory regions + measured in a number of GBytes. + We use as big pages as possible which isn't bigger than the above + desired page sizes. + */ + int nelem; + int max_desired_page_size; + int max_page_size; + if (opt_super_large_pages) + max_page_size= SUPER_LARGE_PAGESIZE; + else + max_page_size= LARGE_PAGESIZE; + nelem = getpagesizes(NULL, 0); + if (nelem > 0) + { + size_t *pagesize = (size_t *) malloc(sizeof(size_t) * nelem); + if (pagesize != NULL && getpagesizes(pagesize, nelem) > 0) + { + size_t i, max_page_size= 0; + for (i= 0; i < nelem; i++) + { + if (pagesize[i] > max_page_size && + pagesize[i] <= max_desired_page_size) + max_page_size= pagesize[i]; + } + free(pagesize); + if (max_page_size > 0) + { + struct memcntl_mha mpss; + + mpss.mha_cmd= MHA_MAPSIZE_BSSBRK; + mpss.mha_pagesize= max_page_size; + mpss.mha_flags= 0; + memcntl(NULL, 0, MC_HAT_ADVISE, (caddr_t)&mpss, 0, 0); + mpss.mha_cmd= MHA_MAPSIZE_STACK; + memcntl(NULL, 0, MC_HAT_ADVISE, (caddr_t)&mpss, 0, 0); + } + } + } + } +#endif /* HAVE_SOLARIS_LARGE_PAGES */ /* connections and databases needs lots of files */ { @@ -3462,6 +3526,19 @@ You should consider changing lower_case_table_names to 1 or 2", files_charset_info : &my_charset_bin); + + /* + If we explicitly turn off query cache from the command line query cache will + be disabled for the reminder of the server life time. This is because we + want to avoid locking the QC specific mutex if query cache isn't going to + be used. + */ + if (global_system_variables.query_cache_type == 0) + { + have_query_cache= SHOW_OPTION_NO; + query_cache.disable_query_cache(); + } + return 0; } @@ -5565,6 +5642,7 @@ enum options_mysqld OPT_MAX_SP_RECURSION_DEPTH, OPT_AUTO_INCREMENT, OPT_AUTO_INCREMENT_OFFSET, OPT_ENABLE_LARGE_PAGES, + OPT_ENABLE_SUPER_LARGE_PAGES, OPT_TIMED_MUTEXES, OPT_OLD_STYLE_USER_LIMITS, OPT_LOG_SLOW_ADMIN_STATEMENTS, @@ -5786,11 +5864,15 @@ struct my_option my_long_options[] = {"general_log", OPT_GENERAL_LOG, "Enable|disable general log", (uchar**) &opt_log, (uchar**) &opt_log, 0, GET_BOOL, OPT_ARG, 0, 0, 0, 0, 0, 0}, -#ifdef HAVE_LARGE_PAGES +#ifdef HAVE_LARGE_PAGE_OPTION {"large-pages", OPT_ENABLE_LARGE_PAGES, "Enable support for large pages. \ Disable with --skip-large-pages.", (uchar**) &opt_large_pages, (uchar**) &opt_large_pages, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"super-large-pages", OPT_ENABLE_SUPER_LARGE_PAGES, "Enable support for super large pages. \ +Disable with --skip-super-large-pages.", + (uchar**) &opt_super_large_pages, (uchar**) &opt_super_large_pages, 0, GET_BOOL, NO_ARG, 0, 0, 0, + 0, 0, 0}, #endif {"ignore-builtin-innodb", OPT_IGNORE_BUILTIN_INNODB , "Disable initialization of builtin InnoDB plugin", @@ -6742,12 +6824,10 @@ The minimum value for this variable is 4096.", (uchar**) &query_cache_min_res_unit, (uchar**) &query_cache_min_res_unit, 0, GET_ULONG, REQUIRED_ARG, QUERY_CACHE_MIN_RESULT_DATA_SIZE, 0, ULONG_MAX, 0, 1, 0}, -#endif /*HAVE_QUERY_CACHE*/ {"query_cache_size", OPT_QUERY_CACHE_SIZE, "The memory allocated to store results from old queries.", (uchar**) &query_cache_size, (uchar**) &query_cache_size, 0, GET_ULONG, REQUIRED_ARG, 0, 0, (longlong) ULONG_MAX, 0, 1024, 0}, -#ifdef HAVE_QUERY_CACHE {"query_cache_type", OPT_QUERY_CACHE_TYPE, "0 = OFF = Don't cache or retrieve results. 1 = ON = Cache all results except SELECT SQL_NO_CACHE ... queries. 2 = DEMAND = Cache only SELECT SQL_CACHE ... queries.", (uchar**) &global_system_variables.query_cache_type, @@ -7520,7 +7600,8 @@ static int mysql_init_variables(void) mysqld_unix_port= opt_mysql_tmpdir= my_bind_addr_str= NullS; bzero((uchar*) &mysql_tmpdir_list, sizeof(mysql_tmpdir_list)); bzero((char *) &global_status_var, sizeof(global_status_var)); - opt_large_pages= 0; + opt_large_pages= 1; + opt_super_large_pages= 0; key_map_full.set_all(); /* Character sets */ diff --git a/sql/net_serv.cc b/sql/net_serv.cc index 1098e8e6832..ed47e4b9c7b 100644 --- a/sql/net_serv.cc +++ b/sql/net_serv.cc @@ -48,6 +48,7 @@ #include <violite.h> #include <signal.h> #include <errno.h> +#include "probes_mysql.h" #ifdef __NETWARE__ #include <sys/select.h> #endif @@ -368,8 +369,13 @@ my_bool my_net_write(NET *net,const uchar *packet,size_t len) { uchar buff[NET_HEADER_SIZE]; + int rc; + if (unlikely(!net->vio)) /* nowhere to write */ return 0; + + MYSQL_NET_WRITE_START(len); + /* Big packets are handled by splitting them in packets of MAX_PACKET_LENGTH length. The last packet is always a packet that is < MAX_PACKET_LENGTH. @@ -382,7 +388,10 @@ my_net_write(NET *net,const uchar *packet,size_t len) buff[3]= (uchar) net->pkt_nr++; if (net_write_buff(net, buff, NET_HEADER_SIZE) || net_write_buff(net, packet, z_size)) + { + MYSQL_NET_WRITE_DONE(1); return 1; + } packet += z_size; len-= z_size; } @@ -390,11 +399,16 @@ my_net_write(NET *net,const uchar *packet,size_t len) int3store(buff,len); buff[3]= (uchar) net->pkt_nr++; if (net_write_buff(net, buff, NET_HEADER_SIZE)) + { + MYSQL_NET_WRITE_DONE(1); return 1; + } #ifndef DEBUG_DATA_PACKETS DBUG_DUMP("packet_header", buff, NET_HEADER_SIZE); #endif - return test(net_write_buff(net,packet,len)); + rc= test(net_write_buff(net,packet,len)); + MYSQL_NET_WRITE_DONE(rc); + return rc; } /** @@ -432,9 +446,12 @@ net_write_command(NET *net,uchar command, ulong length=len+1+head_len; /* 1 extra byte for command */ uchar buff[NET_HEADER_SIZE+1]; uint header_size=NET_HEADER_SIZE+1; + int rc; DBUG_ENTER("net_write_command"); DBUG_PRINT("enter",("length: %lu", (ulong) len)); + MYSQL_NET_WRITE_START(length); + buff[4]=command; /* For first packet */ if (length >= MAX_PACKET_LENGTH) @@ -448,7 +465,10 @@ net_write_command(NET *net,uchar command, if (net_write_buff(net, buff, header_size) || net_write_buff(net, header, head_len) || net_write_buff(net, packet, len)) + { + MYSQL_NET_WRITE_DONE(1); DBUG_RETURN(1); + } packet+= len; length-= MAX_PACKET_LENGTH; len= MAX_PACKET_LENGTH; @@ -459,9 +479,11 @@ net_write_command(NET *net,uchar command, } int3store(buff,length); buff[3]= (uchar) net->pkt_nr++; - DBUG_RETURN(test(net_write_buff(net, buff, header_size) || - (head_len && net_write_buff(net, header, head_len)) || - net_write_buff(net, packet, len) || net_flush(net))); + rc= test(net_write_buff(net, buff, header_size) || + (head_len && net_write_buff(net, header, head_len)) || + net_write_buff(net, packet, len) || net_flush(net)); + MYSQL_NET_WRITE_DONE(rc); + DBUG_RETURN(rc); } /** @@ -989,6 +1011,8 @@ my_net_read(NET *net) { size_t len, complen; + MYSQL_NET_READ_START(); + #ifdef HAVE_COMPRESS if (!net->compress) { @@ -1012,6 +1036,7 @@ my_net_read(NET *net) net->read_pos = net->buff + net->where_b; if (len != packet_error) net->read_pos[len]=0; /* Safeguard for mysql_use_result */ + MYSQL_NET_READ_DONE(0, len); return len; #ifdef HAVE_COMPRESS } @@ -1095,7 +1120,10 @@ my_net_read(NET *net) net->where_b=buf_length; if ((packet_len = my_real_read(net,&complen)) == packet_error) + { + MYSQL_NET_READ_DONE(1, 0); return packet_error; + } if (my_uncompress(net->buff + net->where_b, packet_len, &complen)) { @@ -1104,6 +1132,7 @@ my_net_read(NET *net) #ifdef MYSQL_SERVER my_error(ER_NET_UNCOMPRESS_ERROR, MYF(0)); #endif + MYSQL_NET_READ_DONE(1, 0); return packet_error; } buf_length+= complen; @@ -1118,6 +1147,7 @@ my_net_read(NET *net) net->read_pos[len]=0; /* Safeguard for mysql_use_result */ } #endif /* HAVE_COMPRESS */ + MYSQL_NET_READ_DONE(0, len); return len; } diff --git a/sql/probes_mysql.d b/sql/probes_mysql.d new file mode 100644 index 00000000000..958e3042dde --- /dev/null +++ b/sql/probes_mysql.d @@ -0,0 +1,176 @@ +/* Copyright (C) 2008 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + The actual probe names in DTrace scripts will replace '__' by '-'. Thus + insert__row__start will be insert-row-start. + + Recommendations for adding new probes: + + - each probe should have the minimal set of arguments required to + unambiguously identify the context in which the probe fires. Redundant + arguments (i.e. the ones that can be obtained in user scripts from previous + probes' arguments or otherwise) may be added for convenience. + + - try to avoid computationally expensive probe arguments. If impossible, + use *_ENABLED() macros to check if the probe is activated before + performing expensive calculations for a probe argument. + + - all *-done probes should have a status argument wherever applicable to make + it possible for user scripts to figure out whether the completed operation + was successful or not. + + - for all status arguments, a non-zero value should be returned on error or + failure, 0 should be returned on success. +*/ + +provider mysql { + + /* The following ones fire when creating or closing a client connection */ + probe connection__start(unsigned long conn_id, char *user, char *host); + probe connection__done(int status, unsigned long conn_id); + + /* + Fire at the start/end of any client command processing (including SQL + queries). + */ + probe command__start(unsigned long conn_id, int command, + char *user, char *host); + probe command__done(int status); + + /* + The following probes fire at the start/end of any SQL query processing, + respectively. + + query_start() has a lot of parameters that can be used to pick up + parameters for a lot of other probes here. For simplicity reasons we also + add the query string to most other DTrace probes as well. Hostname is + either the hostname or the IP address of the MySQL Client. + */ + probe query__start(char *query, + unsigned long conn_id, + char *db_name, + char *user, + char *host); + probe query__done(int status); + + /* Fire at the start/end of SQL query parsing */ + probe query__parse__start(char *query); + probe query__parse__done(int status); + + /* Track whether the query hits the query cache or not */ + probe query__cache__hit(char *query, unsigned long rows); + probe query__cache__miss(char *query); + + /* + This probe fires when the actual query execution starts, i.e. after + parsing and checking the query cache, but before privilege checks, + optimizing, etc. + + Query means also all independent queries of a stored procedure and prepared + statements. Also the stored procedure itself is a query. + + exec_type is: + 0: Executed query from sql_parse, top-level query (sql_parse.cc) + 1: Executed prepared statement (sql_prepare.cc) + 2: Executed cursor statement (sql_cursor.cc) + 3: Executed query in stored procedure (sp_head.cc) + */ + probe query__exec__start(char *query, + unsigned long connid, + char *db_name, + char *user, + char *host, + int exec_type); + probe query__exec__done(int status); + + /* These probes fire when performing row operations towards any handler */ + probe insert__row__start(char *db, char *table); + probe insert__row__done(int status); + probe update__row__start(char *db, char *table); + probe update__row__done(int status); + probe delete__row__start(char *db, char *table); + probe delete__row__done(int status); + probe read__row__start(char *db, char *table, int scan_flag); + probe read__row__done(int status); + probe index__read__row__start(char *db, char *table); + probe index__read__row__done(int status); + + /* + These probes fire when calling external_lock for any handler + depending on the lock type being acquired or released. + */ + probe handler__rdlock__start(char *db, char *table); + probe handler__wrlock__start(char *db, char *table); + probe handler__unlock__start(char *db, char *table); + probe handler__rdlock__done(int status); + probe handler__wrlock__done(int status); + probe handler__unlock__done(int status); + + /* + These probes fire when a filesort activity happens in a query. + */ + probe filesort__start(char *db, char *table); + probe filesort__done(int status, unsigned long rows); + /* + The query types SELECT, INSERT, INSERT AS SELECT, UPDATE, UPDATE with + multiple tables, DELETE, DELETE with multiple tables are all probed. + The start probe always contains the query text. + */ + probe select__start(char *query); + probe select__done(int status, unsigned long rows); + probe insert__start(char *query); + probe insert__done(int status, unsigned long rows); + probe insert__select__start(char *query); + probe insert__select__done(int status, unsigned long rows); + probe update__start(char *query); + probe update__done(int status, + unsigned long rowsmatches, unsigned long rowschanged); + probe multi__update__start(char *query); + probe multi__update__done(int status, + unsigned long rowsmatches, + unsigned long rowschanged); + probe delete__start(char *query); + probe delete__done(int status, unsigned long rows); + probe multi__delete__start(char *query); + probe multi__delete__done(int status, unsigned long rows); + + /* + These probes can be used to measure the time waiting for network traffic + or identify network-related problems. + */ + probe net__read__start(); + probe net__read__done(int status, unsigned long bytes); + probe net__write__start(unsigned long bytes); + probe net__write__done(int status); + + /* MyISAM Key cache probes */ + probe keycache__read__start(char *filepath, unsigned long bytes, + unsigned long mem_used, unsigned long mem_free); + probe keycache__read__block(unsigned long bytes); + probe keycache__read__hit(); + probe keycache__read__miss(); + probe keycache__read__done(unsigned long mem_used, unsigned long mem_free); + probe keycache__write__start(char *filepath, unsigned long bytes, + unsigned long mem_used, unsigned long mem_free); + probe keycache__write__block(unsigned long bytes); + probe keycache__write__done(unsigned long mem_used, unsigned long mem_free); +}; + +#pragma D attributes Evolving/Evolving/Common provider mysql provider +#pragma D attributes Evolving/Evolving/Common provider mysql module +#pragma D attributes Evolving/Evolving/Common provider mysql function +#pragma D attributes Evolving/Evolving/Common provider mysql name +#pragma D attributes Evolving/Evolving/Common provider mysql args diff --git a/sql/sp_head.cc b/sql/sp_head.cc index ef6cb556f4c..34b6c483637 100644 --- a/sql/sp_head.cc +++ b/sql/sp_head.cc @@ -14,6 +14,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include "mysql_priv.h" +#include "probes_mysql.h" #ifdef USE_PRAGMA_IMPLEMENTATION #pragma implementation #endif @@ -2896,7 +2897,14 @@ sp_instr_stmt::print(String *str) int sp_instr_stmt::exec_core(THD *thd, uint *nextp) { + MYSQL_QUERY_EXEC_START(thd->query, + thd->thread_id, + (char *) (thd->db ? thd->db : ""), + thd->security_ctx->priv_user, + (char *)thd->security_ctx->host_or_ip, + 3); int res= mysql_execute_command(thd); + MYSQL_QUERY_EXEC_DONE(res); *nextp= m_ip+1; return res; } diff --git a/sql/sql_cache.cc b/sql/sql_cache.cc index 7c97ee4cf32..fcea4153eb0 100644 --- a/sql/sql_cache.cc +++ b/sql/sql_cache.cc @@ -333,6 +333,7 @@ TODO list: #include <hash.h> #include "../storage/myisammrg/ha_myisammrg.h" #include "../storage/myisammrg/myrg_def.h" +#include "probes_mysql.h" #ifdef EMBEDDED_LIBRARY #include "emb_qcache.h" @@ -670,7 +671,7 @@ void query_cache_insert(NET *net, const char *packet, ulong length) DBUG_ENTER("query_cache_insert"); /* See the comment on double-check locking usage above. */ - if (net->query_cache_query == 0) + if (query_cache.is_disabled() || net->query_cache_query == 0) DBUG_VOID_RETURN; DBUG_EXECUTE_IF("wait_in_query_cache_insert", @@ -778,7 +779,7 @@ void query_cache_end_of_result(THD *thd) DBUG_ENTER("query_cache_end_of_result"); /* See the comment on double-check locking usage above. */ - if (thd->net.query_cache_query == 0) + if (query_cache.is_disabled() || thd->net.query_cache_query == 0) DBUG_VOID_RETURN; /* Ensure that only complete results are cached. */ @@ -890,6 +891,7 @@ Query_cache::Query_cache(ulong query_cache_limit_arg, query_cache_limit(query_cache_limit_arg), queries_in_cache(0), hits(0), inserts(0), refused(0), total_blocks(0), lowmem_prunes(0), + m_query_cache_is_disabled(FALSE), min_allocation_unit(ALIGN_SIZE(min_allocation_unit_arg)), min_result_data_size(ALIGN_SIZE(min_result_data_size_arg)), def_query_hash_size(ALIGN_SIZE(def_query_hash_size_arg)), @@ -981,7 +983,7 @@ void Query_cache::store_query(THD *thd, TABLE_LIST *tables_used) See also a note on double-check locking usage above. */ - if (thd->locked_tables || query_cache_size == 0) + if (m_query_cache_is_disabled || thd->locked_tables || query_cache_size == 0) DBUG_VOID_RETURN; uint8 tables_type= 0; @@ -1169,14 +1171,18 @@ end: Check if the query is in the cache. If it was cached, send it to the user. - RESULTS - 1 Query was not cached. - 0 The query was cached and user was sent the result. - -1 The query was cached but we didn't have rights to use it. - No error is sent to the client yet. + @param thd Pointer to the thread handler + @param sql A pointer to the sql statement * + @param query_length Length of the statement in characters + + @return status code + @retval 1 Query was not cached. + @retval 0 The query was cached and user was sent the result. + @retval -1 The query was cached but we didn't have rights to use it. + + In case of -1, no error is sent to the client. - NOTE - This method requires that sql points to allocated memory of size: + *) The buffer must be allocated memory of size: tot_length= query_length + thd->db_length + 1 + QUERY_CACHE_FLAGS_SIZE; */ @@ -1191,6 +1197,9 @@ Query_cache::send_result_to_client(THD *thd, char *sql, uint query_length) Query_cache_query_flags flags; DBUG_ENTER("Query_cache::send_result_to_client"); + if (m_query_cache_is_disabled) + DBUG_RETURN(0); + /* Testing 'query_cache_size' without a lock here is safe: the thing we may loose is that the query won't be served from cache, but we @@ -1480,11 +1489,13 @@ def_week_frmt: %lu, in_trans: %d, autocommit: %d", thd->main_da.disable_status(); BLOCK_UNLOCK_RD(query_block); + MYSQL_QUERY_CACHE_HIT(thd->query, (ulong) thd->limit_found_rows); DBUG_RETURN(1); // Result sent to client err_unlock: STRUCT_UNLOCK(&structure_guard_mutex); err: + MYSQL_QUERY_CACHE_MISS(thd->query); DBUG_RETURN(0); // Query was not cached } @@ -2541,7 +2552,17 @@ void Query_cache::invalidate_table(THD *thd, TABLE *table) void Query_cache::invalidate_table(THD *thd, uchar * key, uint32 key_length) { bool interrupt; + + if (m_query_cache_is_disabled) + return; + STRUCT_LOCK(&structure_guard_mutex); + if (query_cache_size == 0) + { + STRUCT_UNLOCK(&structure_guard_mutex); + return; + } + wait_while_table_flush_is_in_progress(&interrupt); if (interrupt) { diff --git a/sql/sql_cache.h b/sql/sql_cache.h index f2c33eff614..15e97238335 100644 --- a/sql/sql_cache.h +++ b/sql/sql_cache.h @@ -279,6 +279,8 @@ private: Cache_status m_cache_status; + bool m_query_cache_is_disabled; + void free_query_internal(Query_cache_block *point); void invalidate_table_internal(THD *thd, uchar *key, uint32 key_length); @@ -437,6 +439,14 @@ protected: /* register query in cache */ void store_query(THD *thd, TABLE_LIST *used_tables); + /** + At startup the user has an option to disable the query cache + to avoid locking the structure_guard_mutex. + This option is enabled by explicitly setting query_cache_type=OFF + in the command line. + */ + void disable_query_cache(void) { m_query_cache_is_disabled= TRUE; } + /* Check if the query is in the cache and if this is true send the data to client. @@ -469,6 +479,8 @@ protected: friend void query_cache_end_of_result(THD *thd); friend void query_cache_abort(NET *net); + bool is_disabled(void) { return m_query_cache_is_disabled; } + bool is_flushing(void) { return (m_cache_status != Query_cache::NO_FLUSH_IN_PROGRESS); diff --git a/sql/sql_class.h b/sql/sql_class.h index 3439e5b4f74..413e2f5be86 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -2824,6 +2824,10 @@ public: void send_error(uint errcode,const char *err); int do_deletes(); bool send_eof(); + inline ha_rows num_deleted() + { + return deleted; + } virtual void abort(); }; @@ -2867,6 +2871,14 @@ public: void send_error(uint errcode,const char *err); int do_updates(); bool send_eof(); + inline ha_rows num_found() + { + return found; + } + inline ha_rows num_updated() + { + return updated; + } virtual void abort(); }; diff --git a/sql/sql_connect.cc b/sql/sql_connect.cc index d9fb586eb77..02c8297d019 100644 --- a/sql/sql_connect.cc +++ b/sql/sql_connect.cc @@ -19,6 +19,7 @@ */ #include "mysql_priv.h" +#include "probes_mysql.h" #ifdef HAVE_OPENSSL /* @@ -1107,6 +1108,9 @@ pthread_handler_t handle_one_connection(void *arg) if (login_connection(thd)) goto end_thread; + MYSQL_CONNECTION_START(thd->thread_id, thd->security_ctx->priv_user, + (char *) thd->security_ctx->host_or_ip); + prepare_new_connection_state(thd); while (!net->error && net->vio != 0 && diff --git a/sql/sql_cursor.cc b/sql/sql_cursor.cc index 6f61dc40f66..553342f155b 100644 --- a/sql/sql_cursor.cc +++ b/sql/sql_cursor.cc @@ -19,6 +19,7 @@ #include "mysql_priv.h" #include "sql_cursor.h" #include "sql_select.h" +#include "probes_mysql.h" /**************************************************************************** Declarations. @@ -168,8 +169,14 @@ int mysql_open_cursor(THD *thd, uint flags, select_result *result, thd->lock_id= sensitive_cursor->get_lock_id(); thd->cursor= sensitive_cursor; } - + MYSQL_QUERY_EXEC_START(thd->query, + thd->thread_id, + (char *) (thd->db ? thd->db : ""), + thd->security_ctx->priv_user, + (char *) thd->security_ctx->host_or_ip, + 2); rc= mysql_execute_command(thd); + MYSQL_QUERY_EXEC_DONE(rc); lex->result= save_result; thd->lock_id= &thd->main_lock_id; diff --git a/sql/sql_insert.cc b/sql/sql_insert.cc index fcf86edeaa9..322391b63e8 100644 --- a/sql/sql_insert.cc +++ b/sql/sql_insert.cc @@ -3089,6 +3089,9 @@ bool select_insert::send_data(List<Item> &values) DBUG_RETURN(1); } } + + // Release latches in case bulk insert takes a long time + ha_release_temporary_latches(thd); error= write_record(thd, table, &info); table->auto_increment_field_not_null= FALSE; diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index 592dbe9f43b..68bb7d81e6b 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -27,6 +27,7 @@ #include "sp_cache.h" #include "events.h" #include "sql_trigger.h" +#include "probes_mysql.h" /** @defgroup Runtime_Environment Runtime Environment @@ -961,6 +962,10 @@ bool dispatch_command(enum enum_server_command command, THD *thd, DBUG_ENTER("dispatch_command"); DBUG_PRINT("info",("packet: '%*.s'; command: %d", packet_length, packet, command)); + MYSQL_COMMAND_START(thd->thread_id, command, + thd->security_ctx->priv_user, + (char *) thd->security_ctx->host_or_ip); + thd->command=command; /* Commands which always take a long time are logged into @@ -1200,6 +1205,10 @@ bool dispatch_command(enum enum_server_command command, THD *thd, { if (alloc_query(thd, packet, packet_length)) break; // fatal error is set + MYSQL_QUERY_START(thd->query, thd->thread_id, + (char *) (thd->db ? thd->db : ""), + thd->security_ctx->priv_user, + (char *) thd->security_ctx->host_or_ip); char *packet_end= thd->query + thd->query_length; /* 'b' stands for 'buffer' parameter', special for 'my_snprintf' */ const char* end_of_stmt= NULL; @@ -1236,12 +1245,22 @@ bool dispatch_command(enum enum_server_command command, THD *thd, length--; } + if (MYSQL_QUERY_DONE_ENABLED()) + { + MYSQL_QUERY_DONE(thd->is_error()); + } + #if defined(ENABLED_PROFILING) && defined(COMMUNITY_SERVER) thd->profiling.finish_current_query(); thd->profiling.start_new_query("continuing"); thd->profiling.set_query_source(beginning_of_next_stmt, length); #endif + MYSQL_QUERY_START(thd->query, thd->thread_id, + (char *) (thd->db ? thd->db : ""), + thd->security_ctx->priv_user, + (char *) thd->security_ctx->host_or_ip); + VOID(pthread_mutex_lock(&LOCK_thread_count)); thd->query_length= length; thd->query= beginning_of_next_stmt; @@ -1602,6 +1621,17 @@ bool dispatch_command(enum enum_server_command command, THD *thd, VOID(pthread_mutex_unlock(&LOCK_thread_count)); thd->packet.shrink(thd->variables.net_buffer_length); // Reclaim some memory free_root(thd->mem_root,MYF(MY_KEEP_PREALLOC)); + + if (MYSQL_QUERY_DONE_ENABLED() || MYSQL_COMMAND_DONE_ENABLED()) + { + int res; + res= (int) thd->is_error(); + if (command == COM_QUERY) + { + MYSQL_QUERY_DONE(res); + } + MYSQL_COMMAND_DONE(res); + } DBUG_RETURN(error); } @@ -2966,11 +2996,14 @@ end_with_restore_list: break; } case SQLCOM_UPDATE: + { + ha_rows found= 0, updated= 0; DBUG_ASSERT(first_table == all_tables && first_table != 0); if (update_precheck(thd, all_tables)) break; DBUG_ASSERT(select_lex->offset_limit == 0); unit->set_limit(select_lex); + MYSQL_UPDATE_START(thd->query); res= (up_result= mysql_update(thd, all_tables, select_lex->item_list, lex->value_list, @@ -2978,11 +3011,14 @@ end_with_restore_list: select_lex->order_list.elements, (ORDER *) select_lex->order_list.first, unit->select_limit_cnt, - lex->duplicates, lex->ignore)); + lex->duplicates, lex->ignore, + &found, &updated)); + MYSQL_UPDATE_DONE(res, found, updated); /* mysql_update return 2 if we need to switch to multi-update */ if (up_result != 2) break; /* Fall through */ + } case SQLCOM_UPDATE_MULTI: { DBUG_ASSERT(first_table == all_tables && first_table != 0); @@ -3030,13 +3066,31 @@ end_with_restore_list: #ifdef HAVE_REPLICATION } /* unlikely */ #endif - - res= mysql_multi_update(thd, all_tables, - &select_lex->item_list, - &lex->value_list, - select_lex->where, - select_lex->options, - lex->duplicates, lex->ignore, unit, select_lex); + { + multi_update *result_obj; + MYSQL_MULTI_UPDATE_START(thd->query); + res= mysql_multi_update(thd, all_tables, + &select_lex->item_list, + &lex->value_list, + select_lex->where, + select_lex->options, + lex->duplicates, + lex->ignore, + unit, + select_lex, + &result_obj); + if (result_obj) + { + MYSQL_MULTI_UPDATE_DONE(res, result_obj->num_found(), + result_obj->num_updated()); + res= FALSE; /* Ignore errors here */ + delete result_obj; + } + else + { + MYSQL_MULTI_UPDATE_DONE(1, 0, 0); + } + } break; } case SQLCOM_REPLACE: @@ -3082,11 +3136,11 @@ end_with_restore_list: res= 1; break; } - + MYSQL_INSERT_START(thd->query); res= mysql_insert(thd, all_tables, lex->field_list, lex->many_values, lex->update_list, lex->value_list, lex->duplicates, lex->ignore); - + MYSQL_INSERT_DONE(res, (ulong) thd->row_count_func); /* If we have inserted into a VIEW, and the base table has AUTO_INCREMENT column, but this column is not accessible through @@ -3122,9 +3176,9 @@ end_with_restore_list: res= 1; break; } - if (!(res= open_and_lock_tables(thd, all_tables))) { + MYSQL_INSERT_SELECT_START(thd->query); /* Skip first table, which is the table we are inserting in */ TABLE_LIST *second_table= first_table->next_local; select_lex->table_list.first= (uchar*) second_table; @@ -3158,9 +3212,9 @@ end_with_restore_list: delete sel_result; } /* revert changes for SP */ + MYSQL_INSERT_SELECT_DONE(res, (ulong) thd->row_count_func); select_lex->table_list.first= (uchar*) first_table; } - /* If we have inserted into a VIEW, and the base table has AUTO_INCREMENT column, but this column is not accessible through @@ -3209,11 +3263,12 @@ end_with_restore_list: res= 1; break; } - + MYSQL_DELETE_START(thd->query); res = mysql_delete(thd, all_tables, select_lex->where, &select_lex->order_list, unit->select_limit_cnt, select_lex->options, FALSE); + MYSQL_DELETE_DONE(res, (ulong) thd->row_count_func); break; } case SQLCOM_DELETE_MULTI: @@ -3243,8 +3298,12 @@ end_with_restore_list: if ((res= open_and_lock_tables(thd, all_tables))) break; + MYSQL_MULTI_DELETE_START(thd->query); if ((res= mysql_multi_delete_prepare(thd))) + { + MYSQL_MULTI_DELETE_DONE(1, 0); goto error; + } if (!thd->is_fatal_error && (del_result= new multi_delete(aux_tables, lex->table_count))) @@ -3261,12 +3320,16 @@ end_with_restore_list: OPTION_SETUP_TABLES_DONE, del_result, unit, select_lex); res|= thd->is_error(); + MYSQL_MULTI_DELETE_DONE(res, del_result->num_deleted()); if (res) del_result->abort(); delete del_result; } else + { res= TRUE; // Error + MYSQL_MULTI_DELETE_DONE(1, 0); + } break; } case SQLCOM_DROP_TABLE: @@ -5739,6 +5802,7 @@ void mysql_init_multi_delete(LEX *lex) void mysql_parse(THD *thd, const char *inBuf, uint length, const char ** found_semicolon) { + int error; DBUG_ENTER("mysql_parse"); DBUG_EXECUTE_IF("parser_debug", turn_parser_debug_on();); @@ -5807,7 +5871,15 @@ void mysql_parse(THD *thd, const char *inBuf, uint length, thd->server_status|= SERVER_MORE_RESULTS_EXISTS; } lex->set_trg_event_type_for_tables(); - mysql_execute_command(thd); + MYSQL_QUERY_EXEC_START(thd->query, + thd->thread_id, + (char *) (thd->db ? thd->db : ""), + thd->security_ctx->priv_user, + (char *) thd->security_ctx->host_or_ip, + 0); + + error= mysql_execute_command(thd); + MYSQL_QUERY_EXEC_DONE(error); } } } @@ -7643,8 +7715,10 @@ bool parse_sql(THD *thd, Parser_state *parser_state, Object_creation_ctx *creation_ctx) { + bool ret_value; DBUG_ASSERT(thd->m_parser_state == NULL); + MYSQL_QUERY_PARSE_START(thd->query); /* Backup creation context. */ Object_creation_ctx *backup_ctx= NULL; @@ -7676,7 +7750,9 @@ bool parse_sql(THD *thd, /* That's it. */ - return mysql_parse_status || thd->is_fatal_error; + ret_value= mysql_parse_status || thd->is_fatal_error; + MYSQL_QUERY_PARSE_DONE(ret_value); + return ret_value; } /** diff --git a/sql/sql_prepare.cc b/sql/sql_prepare.cc index 7fcc374e3f3..0ae902bcd0a 100644 --- a/sql/sql_prepare.cc +++ b/sql/sql_prepare.cc @@ -89,6 +89,7 @@ When one supplies long data for a placeholder: #include "sp_head.h" #include "sp.h" #include "sp_cache.h" +#include "probes_mysql.h" #ifdef EMBEDDED_LIBRARY /* include MYSQL_BIND headers */ #include <mysql.h> @@ -3565,7 +3566,14 @@ bool Prepared_statement::execute(String *expanded_query, bool open_cursor) if (query_cache_send_result_to_client(thd, thd->query, thd->query_length) <= 0) { + MYSQL_QUERY_EXEC_START(thd->query, + thd->thread_id, + (char *) (thd->db ? thd->db : ""), + thd->security_ctx->priv_user, + (char *) thd->security_ctx->host_or_ip, + 1); error= mysql_execute_command(thd); + MYSQL_QUERY_EXEC_DONE(error); } } diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 75c11c2ac64..9dd72ab55ba 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -31,6 +31,7 @@ #include "mysql_priv.h" #include "sql_select.h" #include "sql_cursor.h" +#include "probes_mysql.h" #include <m_ctype.h> #include <my_bit.h> @@ -240,6 +241,7 @@ bool handle_select(THD *thd, LEX *lex, select_result *result, bool res; register SELECT_LEX *select_lex = &lex->select_lex; DBUG_ENTER("handle_select"); + MYSQL_SELECT_START(thd->query); if (select_lex->master_unit()->is_union() || select_lex->master_unit()->fake_select_lex) @@ -273,6 +275,7 @@ bool handle_select(THD *thd, LEX *lex, select_result *result, if (unlikely(res)) result->abort(); + MYSQL_SELECT_DONE((int) res, (ulong) thd->limit_found_rows); DBUG_RETURN(res); } @@ -10558,6 +10561,9 @@ free_tmp_table(THD *thd, TABLE *entry) save_proc_info=thd->proc_info; thd_proc_info(thd, "removing tmp table"); + // Release latches since this can take a long time + ha_release_temporary_latches(thd); + if (entry->file) { if (entry->db_stat) @@ -10603,6 +10609,10 @@ bool create_myisam_from_heap(THD *thd, TABLE *table, TMP_TABLE_PARAM *param, table->file->print_error(error,MYF(0)); DBUG_RETURN(1); } + + // Release latches since this can take a long time + ha_release_temporary_latches(thd); + new_table= *table; share= *table->s; new_table.s= &share; diff --git a/sql/sql_update.cc b/sql/sql_update.cc index b3bd5d0bc57..1edd6952a34 100644 --- a/sql/sql_update.cc +++ b/sql/sql_update.cc @@ -23,6 +23,7 @@ #include "sql_select.h" #include "sp_head.h" #include "sql_trigger.h" +#include "probes_mysql.h" /* Return 0 if row hasn't changed */ @@ -180,7 +181,8 @@ int mysql_update(THD *thd, COND *conds, uint order_num, ORDER *order, ha_rows limit, - enum enum_duplicates handle_duplicates, bool ignore) + enum enum_duplicates handle_duplicates, bool ignore, + ha_rows *found_return, ha_rows *updated_return) { bool using_limit= limit != HA_POS_ERROR; bool safe_update= test(thd->options & OPTION_SAFE_UPDATES); @@ -827,6 +829,8 @@ int mysql_update(THD *thd, } thd->count_cuted_fields= CHECK_FIELD_IGNORE; /* calc cuted fields */ thd->abort_on_warning= 0; + *found_return= found; + *updated_return= updated; DBUG_RETURN((error >= 0 || thd->is_error()) ? 1 : 0); err: @@ -1191,18 +1195,22 @@ bool mysql_multi_update(THD *thd, List<Item> *values, COND *conds, ulonglong options, - enum enum_duplicates handle_duplicates, bool ignore, - SELECT_LEX_UNIT *unit, SELECT_LEX *select_lex) + enum enum_duplicates handle_duplicates, + bool ignore, + SELECT_LEX_UNIT *unit, + SELECT_LEX *select_lex, + multi_update **result) { - multi_update *result; bool res; DBUG_ENTER("mysql_multi_update"); - if (!(result= new multi_update(table_list, + if (!(*result= new multi_update(table_list, thd->lex->select_lex.leaf_tables, fields, values, handle_duplicates, ignore))) + { DBUG_RETURN(TRUE); + } thd->abort_on_warning= test(thd->variables.sql_mode & (MODE_STRICT_TRANS_TABLES | @@ -1216,19 +1224,18 @@ bool mysql_multi_update(THD *thd, (ORDER *)NULL, options | SELECT_NO_JOIN_CACHE | SELECT_NO_UNLOCK | OPTION_SETUP_TABLES_DONE, - result, unit, select_lex); + *result, unit, select_lex); DBUG_PRINT("info",("res: %d report_error: %d", res, (int) thd->is_error())); res|= thd->is_error(); if (unlikely(res)) { /* If we had a another error reported earlier then this will be ignored */ - result->send_error(ER_UNKNOWN_ERROR, ER(ER_UNKNOWN_ERROR)); - result->abort(); + (*result)->send_error(ER_UNKNOWN_ERROR, ER(ER_UNKNOWN_ERROR)); + (*result)->abort(); } - delete result; thd->abort_on_warning= 0; - DBUG_RETURN(FALSE); + DBUG_RETURN(res); } diff --git a/storage/archive/Makefile.am b/storage/archive/Makefile.am index d092f091798..649d84d8152 100644 --- a/storage/archive/Makefile.am +++ b/storage/archive/Makefile.am @@ -66,5 +66,21 @@ archive_reader_LDFLAGS = @NOINST_LDFLAGS@ EXTRA_DIST = CMakeLists.txt plug.in + +if HAVE_DTRACE_DASH_G +libarchive_a_LIBADD = probes_mysql.o +libarchive_a_DEPENDENCIES = probes_mysql.o dtrace_files dtrace_providers +CLEANFILES = probes_mysql.o dtrace_files dtrace_providers +DTRACEFILES = libarchive_a-ha_archive.o +DTRACEPROVIDER = $(abs_top_srcdir)/sql/probes_mysql.d + +dtrace_files: + echo $(DTRACEFILES) > $@ +dtrace_providers: + echo $(DTRACEPROVIDER) > $@ +probes_mysql.o: $(DTRACEPROVIDER) $(DTRACEFILES) + $(DTRACE) $(DTRACEFLAGS) -G -s $< $(DTRACEFILES) -o $@ +endif + # Don't update the files from bitkeeper %::SCCS/s.% diff --git a/storage/archive/ha_archive.cc b/storage/archive/ha_archive.cc index 7edfca53751..ff59a3eef5c 100644 --- a/storage/archive/ha_archive.cc +++ b/storage/archive/ha_archive.cc @@ -18,6 +18,7 @@ #endif #include "mysql_priv.h" +#include "probes_mysql.h" #include <myisam.h> #include "ha_archive.h" @@ -917,7 +918,9 @@ int ha_archive::index_read(uchar *buf, const uchar *key, { int rc; DBUG_ENTER("ha_archive::index_read"); + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); rc= index_read_idx(buf, active_index, key, key_len, find_flag); + MYSQL_INDEX_READ_ROW_DONE(rc); DBUG_RETURN(rc); } @@ -960,8 +963,10 @@ error: int ha_archive::index_next(uchar * buf) { bool found= 0; + int rc; DBUG_ENTER("ha_archive::index_next"); + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); while (!(get_row(&archive, buf))) { @@ -972,7 +977,9 @@ int ha_archive::index_next(uchar * buf) } } - DBUG_RETURN(found ? 0 : HA_ERR_END_OF_FILE); + rc= found ? 0 : HA_ERR_END_OF_FILE; + MYSQL_INDEX_READ_ROW_DONE(rc); + DBUG_RETURN(rc); } /* @@ -1196,12 +1203,17 @@ int ha_archive::rnd_next(uchar *buf) { int rc; DBUG_ENTER("ha_archive::rnd_next"); + MYSQL_READ_ROW_START(table_share->db.str, + table_share->table_name.str, TRUE); if (share->crashed) DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE); if (!scan_rows) - DBUG_RETURN(HA_ERR_END_OF_FILE); + { + rc= HA_ERR_END_OF_FILE; + goto end; + } scan_rows--; ha_statistic_increment(&SSV::ha_read_rnd_next_count); @@ -1210,6 +1222,8 @@ int ha_archive::rnd_next(uchar *buf) table->status=rc ? STATUS_NOT_FOUND: 0; +end: + MYSQL_READ_ROW_DONE(rc); DBUG_RETURN(rc); } @@ -1237,12 +1251,21 @@ void ha_archive::position(const uchar *record) int ha_archive::rnd_pos(uchar * buf, uchar *pos) { + int rc; DBUG_ENTER("ha_archive::rnd_pos"); + MYSQL_READ_ROW_START(table_share->db.str, + table_share->table_name.str, FALSE); ha_statistic_increment(&SSV::ha_read_rnd_next_count); current_position= (my_off_t)my_get_ptr(pos, ref_length); if (azseek(&archive, current_position, SEEK_SET) == (my_off_t)(-1L)) - DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE); - DBUG_RETURN(get_row(&archive, buf)); + { + rc= HA_ERR_CRASHED_ON_USAGE; + goto end; + } + rc= get_row(&archive, buf); +end: + MYSQL_READ_ROW_DONE(rc); + DBUG_RETURN(rc); } /* diff --git a/storage/blackhole/Makefile.am b/storage/blackhole/Makefile.am index db4f67cf847..c274ce8f908 100644 --- a/storage/blackhole/Makefile.am +++ b/storage/blackhole/Makefile.am @@ -48,5 +48,21 @@ libblackhole_a_SOURCES= ha_blackhole.cc EXTRA_DIST = CMakeLists.txt plug.in + +if HAVE_DTRACE_DASH_G +libblackhole_a_LIBADD = probes_mysql.o +libblackhole_a_DEPENDENCIES = probes_mysql.o dtrace_files dtrace_providers +CLEANFILES = probes_mysql.o dtrace_files dtrace_providers +DTRACEFILES = libblackhole_a-ha_blackhole.o +DTRACEPROVIDER = $(abs_top_srcdir)/sql/probes_mysql.d + +dtrace_files: + echo $(DTRACEFILES) > $@ +dtrace_providers: + echo $(DTRACEPROVIDER) > $@ +probes_mysql.o: $(DTRACEPROVIDER) $(DTRACEFILES) + $(DTRACE) $(DTRACEFLAGS) -G -s $< $(DTRACEFILES) -o $@ +endif + # Don't update the files from bitkeeper %::SCCS/s.% diff --git a/storage/blackhole/ha_blackhole.cc b/storage/blackhole/ha_blackhole.cc index 357496fe095..03ed57f38e8 100644 --- a/storage/blackhole/ha_blackhole.cc +++ b/storage/blackhole/ha_blackhole.cc @@ -20,6 +20,7 @@ #define MYSQL_SERVER 1 #include "mysql_priv.h" +#include "probes_mysql.h" #include "ha_blackhole.h" /* Static declarations for handlerton */ @@ -128,18 +129,27 @@ int ha_blackhole::rnd_init(bool scan) int ha_blackhole::rnd_next(uchar *buf) { + int rc; DBUG_ENTER("ha_blackhole::rnd_next"); + MYSQL_READ_ROW_START(table_share->db.str, table_share->table_name.str, + TRUE); THD *thd= ha_thd(); if (thd->system_thread == SYSTEM_THREAD_SLAVE_SQL && thd->query == NULL) - DBUG_RETURN(0); - DBUG_RETURN(HA_ERR_END_OF_FILE); + rc= 0; + else + rc= HA_ERR_END_OF_FILE; + MYSQL_READ_ROW_DONE(rc); + DBUG_RETURN(rc); } int ha_blackhole::rnd_pos(uchar * buf, uchar *pos) { DBUG_ENTER("ha_blackhole::rnd_pos"); + MYSQL_READ_ROW_START(table_share->db.str, table_share->table_name.str, + FALSE); DBUG_ASSERT(0); + MYSQL_READ_ROW_DONE(0); DBUG_RETURN(0); } @@ -210,11 +220,16 @@ int ha_blackhole::index_read_map(uchar * buf, const uchar * key, key_part_map keypart_map, enum ha_rkey_function find_flag) { + int rc; DBUG_ENTER("ha_blackhole::index_read"); + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); THD *thd= ha_thd(); if (thd->system_thread == SYSTEM_THREAD_SLAVE_SQL && thd->query == NULL) - DBUG_RETURN(0); - DBUG_RETURN(HA_ERR_END_OF_FILE); + rc= 0; + else + rc= HA_ERR_END_OF_FILE; + MYSQL_INDEX_READ_ROW_DONE(rc); + DBUG_RETURN(rc); } @@ -222,50 +237,77 @@ int ha_blackhole::index_read_idx_map(uchar * buf, uint idx, const uchar * key, key_part_map keypart_map, enum ha_rkey_function find_flag) { + int rc; DBUG_ENTER("ha_blackhole::index_read_idx"); + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); THD *thd= ha_thd(); if (thd->system_thread == SYSTEM_THREAD_SLAVE_SQL && thd->query == NULL) - DBUG_RETURN(0); - DBUG_RETURN(HA_ERR_END_OF_FILE); + rc= 0; + else + rc= HA_ERR_END_OF_FILE; + MYSQL_INDEX_READ_ROW_DONE(rc); + DBUG_RETURN(rc); } int ha_blackhole::index_read_last_map(uchar * buf, const uchar * key, key_part_map keypart_map) { + int rc; DBUG_ENTER("ha_blackhole::index_read_last"); + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); THD *thd= ha_thd(); if (thd->system_thread == SYSTEM_THREAD_SLAVE_SQL && thd->query == NULL) - DBUG_RETURN(0); - DBUG_RETURN(HA_ERR_END_OF_FILE); + rc= 0; + else + rc= HA_ERR_END_OF_FILE; + MYSQL_INDEX_READ_ROW_DONE(rc); + DBUG_RETURN(rc); } int ha_blackhole::index_next(uchar * buf) { + int rc; DBUG_ENTER("ha_blackhole::index_next"); - DBUG_RETURN(HA_ERR_END_OF_FILE); + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); + rc= HA_ERR_END_OF_FILE; + MYSQL_INDEX_READ_ROW_DONE(rc); + DBUG_RETURN(rc); } int ha_blackhole::index_prev(uchar * buf) { + int rc; DBUG_ENTER("ha_blackhole::index_prev"); - DBUG_RETURN(HA_ERR_END_OF_FILE); + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); + rc= HA_ERR_END_OF_FILE; + MYSQL_INDEX_READ_ROW_DONE(rc); + DBUG_RETURN(rc); } int ha_blackhole::index_first(uchar * buf) { + int rc; DBUG_ENTER("ha_blackhole::index_first"); + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); + rc= HA_ERR_END_OF_FILE; + MYSQL_INDEX_READ_ROW_DONE(rc); + DBUG_RETURN(rc); DBUG_RETURN(HA_ERR_END_OF_FILE); } int ha_blackhole::index_last(uchar * buf) { + int rc; DBUG_ENTER("ha_blackhole::index_last"); - DBUG_RETURN(HA_ERR_END_OF_FILE); + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); + rc= HA_ERR_END_OF_FILE; + MYSQL_INDEX_READ_ROW_DONE(rc); + DBUG_RETURN(rc); } diff --git a/storage/csv/Makefile.am b/storage/csv/Makefile.am index 07ffac88a96..fa7f77bd746 100644 --- a/storage/csv/Makefile.am +++ b/storage/csv/Makefile.am @@ -41,5 +41,21 @@ libcsv_a_CXXFLAGS = $(AM_CFLAGS) libcsv_a_SOURCES = transparent_file.cc ha_tina.cc EXTRA_DIST = CMakeLists.txt plug.in + +if HAVE_DTRACE_DASH_G +libcsv_a_LIBADD = probes_mysql.o +libcsv_a_DEPENDENCIES = probes_mysql.o dtrace_files dtrace_providers +CLEANFILES = probes_mysql.o dtrace_files dtrace_providers +DTRACEFILES = libcsv_a-ha_tina.o +DTRACEPROVIDER = $(abs_top_srcdir)/sql/probes_mysql.d + +dtrace_files: + echo $(DTRACEFILES) > $@ +dtrace_providers: + echo $(DTRACEPROVIDER) > $@ +probes_mysql.o: $(DTRACEPROVIDER) $(DTRACEFILES) + $(DTRACE) $(DTRACEFLAGS) -G -s $< $(DTRACEFILES) -o $@ +endif + # Don't update the files from bitkeeper %::SCCS/s.% diff --git a/storage/csv/ha_tina.cc b/storage/csv/ha_tina.cc index 368ce7e28f3..e40c2db77db 100644 --- a/storage/csv/ha_tina.cc +++ b/storage/csv/ha_tina.cc @@ -48,6 +48,7 @@ TODO: #include "mysql_priv.h" #include <mysql/plugin.h> #include "ha_tina.h" +#include "probes_mysql.h" /* @@ -1095,9 +1096,14 @@ int ha_tina::rnd_next(uchar *buf) { int rc; DBUG_ENTER("ha_tina::rnd_next"); + MYSQL_READ_ROW_START(table_share->db.str, table_share->table_name.str, + TRUE); if (share->crashed) - DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE); + { + rc= HA_ERR_CRASHED_ON_USAGE; + goto end; + } ha_statistic_increment(&SSV::ha_read_rnd_next_count); @@ -1105,13 +1111,19 @@ int ha_tina::rnd_next(uchar *buf) /* don't scan an empty file */ if (!local_saved_data_file_length) - DBUG_RETURN(HA_ERR_END_OF_FILE); + { + rc= HA_ERR_END_OF_FILE; + goto end; + } if ((rc= find_current_row(buf))) - DBUG_RETURN(rc); + goto end; stats.records++; - DBUG_RETURN(0); + rc= 0; +end: + MYSQL_READ_ROW_DONE(rc); + DBUG_RETURN(rc); } /* @@ -1138,10 +1150,15 @@ void ha_tina::position(const uchar *record) int ha_tina::rnd_pos(uchar * buf, uchar *pos) { + int rc; DBUG_ENTER("ha_tina::rnd_pos"); + MYSQL_READ_ROW_START(table_share->db.str, table_share->table_name.str, + FALSE); ha_statistic_increment(&SSV::ha_read_rnd_count); current_position= (off_t)my_get_ptr(pos,ref_length); - DBUG_RETURN(find_current_row(buf)); + rc= find_current_row(buf); + MYSQL_READ_ROW_DONE(rc); + DBUG_RETURN(rc); } /* diff --git a/storage/example/Makefile.am b/storage/example/Makefile.am index 4b2f165377c..bcf519c67c6 100644 --- a/storage/example/Makefile.am +++ b/storage/example/Makefile.am @@ -48,5 +48,22 @@ libexample_a_SOURCES= ha_example.cc EXTRA_DIST = CMakeLists.txt plug.in + +if HAVE_DTRACE_DASH_G +libexample_a_LIBADD = probes_mysql.o +libexample_a_DEPENDENCIES = probes_mysql.o +CLEANFILES = +BUILT_SOURCES = +DTRACEFILES = libexample_a-ha_example.o +DTRACEPROVIDER = $(abs_top_srcdir)/sql/probes_mysql.d + +dtrace_files: + echo $(DTRACEFILES) > $@ +dtrace_providers: + echo $(DTRACEPROVIDER) > $@ +probes_mysql.o: $(DTRACEPROVIDER) $(DTRACEFILES) + $(DTRACE) $(DTRACEFLAGS) -G -s $< $(DTRACEFILES) -o $@ +endif + # Don't update the files from bitkeeper %::SCCS/s.% diff --git a/storage/example/ha_example.cc b/storage/example/ha_example.cc index 604722c3c8c..30fc82c82d2 100644 --- a/storage/example/ha_example.cc +++ b/storage/example/ha_example.cc @@ -94,6 +94,7 @@ #define MYSQL_SERVER 1 #include "mysql_priv.h" #include "ha_example.h" +#include "probes_mysql.h" #include <mysql/plugin.h> static handler *example_create_handler(handlerton *hton, @@ -428,8 +429,12 @@ int ha_example::index_read_map(uchar *buf, const uchar *key, enum ha_rkey_function find_flag __attribute__((unused))) { + int rc; DBUG_ENTER("ha_example::index_read"); - DBUG_RETURN(HA_ERR_WRONG_COMMAND); + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); + rc= HA_ERR_WRONG_COMMAND; + MYSQL_INDEX_READ_ROW_DONE(rc); + DBUG_RETURN(rc); } @@ -440,8 +445,12 @@ int ha_example::index_read_map(uchar *buf, const uchar *key, int ha_example::index_next(uchar *buf) { + int rc; DBUG_ENTER("ha_example::index_next"); - DBUG_RETURN(HA_ERR_WRONG_COMMAND); + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); + rc= HA_ERR_WRONG_COMMAND; + MYSQL_INDEX_READ_ROW_DONE(rc); + DBUG_RETURN(rc); } @@ -452,8 +461,12 @@ int ha_example::index_next(uchar *buf) int ha_example::index_prev(uchar *buf) { + int rc; DBUG_ENTER("ha_example::index_prev"); - DBUG_RETURN(HA_ERR_WRONG_COMMAND); + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); + rc= HA_ERR_WRONG_COMMAND; + MYSQL_INDEX_READ_ROW_DONE(rc); + DBUG_RETURN(rc); } @@ -469,8 +482,12 @@ int ha_example::index_prev(uchar *buf) */ int ha_example::index_first(uchar *buf) { + int rc; DBUG_ENTER("ha_example::index_first"); - DBUG_RETURN(HA_ERR_WRONG_COMMAND); + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); + rc= HA_ERR_WRONG_COMMAND; + MYSQL_INDEX_READ_ROW_DONE(rc); + DBUG_RETURN(rc); } @@ -486,8 +503,12 @@ int ha_example::index_first(uchar *buf) */ int ha_example::index_last(uchar *buf) { + int rc; DBUG_ENTER("ha_example::index_last"); - DBUG_RETURN(HA_ERR_WRONG_COMMAND); + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); + rc= HA_ERR_WRONG_COMMAND; + MYSQL_INDEX_READ_ROW_DONE(rc); + DBUG_RETURN(rc); } @@ -533,8 +554,13 @@ int ha_example::rnd_end() */ int ha_example::rnd_next(uchar *buf) { + int rc; DBUG_ENTER("ha_example::rnd_next"); - DBUG_RETURN(HA_ERR_END_OF_FILE); + MYSQL_READ_ROW_START(table_share->db.str, table_share->table_name.str, + TRUE); + rc= HA_ERR_END_OF_FILE; + MYSQL_READ_ROW_DONE(rc); + DBUG_RETURN(rc); } @@ -581,8 +607,13 @@ void ha_example::position(const uchar *record) */ int ha_example::rnd_pos(uchar *buf, uchar *pos) { + int rc; DBUG_ENTER("ha_example::rnd_pos"); - DBUG_RETURN(HA_ERR_WRONG_COMMAND); + MYSQL_READ_ROW_START(table_share->db.str, table_share->table_name.str, + TRUE); + rc= HA_ERR_WRONG_COMMAND; + MYSQL_READ_ROW_DONE(rc); + DBUG_RETURN(rc); } diff --git a/storage/federated/Makefile.am b/storage/federated/Makefile.am index 64ea0207017..25db59dc3ce 100644 --- a/storage/federated/Makefile.am +++ b/storage/federated/Makefile.am @@ -48,5 +48,21 @@ libfederated_a_SOURCES= ha_federated.cc EXTRA_DIST = CMakeLists.txt plug.in + +if HAVE_DTRACE_DASH_G +libfederated_a_LIBADD = probes_mysql.o +libfederated_a_DEPENDENCIES = probes_mysql.o dtrace_files dtrace_providers +CLEANFILES = probes_mysql.o dtrace_files dtrace_providers +DTRACEFILES = libfederated_a-ha_federated.o +DTRACEPROVIDER = $(abs_top_srcdir)/sql/probes_mysql.d + +dtrace_files: + echo $(DTRACEFILES) > $@ +dtrace_providers: + echo $(DTRACEPROVIDER) > $@ +probes_mysql.o: $(DTRACEPROVIDER) $(DTRACEFILES) + $(DTRACE) $(DTRACEFLAGS) -G -s $< $(DTRACEFILES) -o $@ +endif + # Don't update the files from bitkeeper %::SCCS/s.% diff --git a/storage/federated/ha_federated.cc b/storage/federated/ha_federated.cc index 6cfbd355c40..e46379c25c9 100644 --- a/storage/federated/ha_federated.cc +++ b/storage/federated/ha_federated.cc @@ -380,6 +380,7 @@ #endif #include "ha_federated.h" +#include "probes_mysql.h" #include "m_string.h" @@ -2324,13 +2325,17 @@ int ha_federated::delete_row(const uchar *buf) int ha_federated::index_read(uchar *buf, const uchar *key, uint key_len, ha_rkey_function find_flag) { + int rc; DBUG_ENTER("ha_federated::index_read"); + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); if (stored_result) mysql_free_result(stored_result); - DBUG_RETURN(index_read_idx_with_result_set(buf, active_index, key, - key_len, find_flag, - &stored_result)); + rc= index_read_idx_with_result_set(buf, active_index, key, + key_len, find_flag, + &stored_result); + MYSQL_INDEX_READ_ROW_DONE(rc); + DBUG_RETURN(rc); } @@ -2478,6 +2483,7 @@ int ha_federated::read_range_first(const key_range *start_key, sizeof(sql_query_buffer), &my_charset_bin); DBUG_ENTER("ha_federated::read_range_first"); + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); DBUG_ASSERT(!(start_key == NULL && end_key == NULL)); @@ -2506,10 +2512,12 @@ int ha_federated::read_range_first(const key_range *start_key, } retval= read_next(table->record[0], stored_result); + MYSQL_INDEX_READ_ROW_DONE(retval); DBUG_RETURN(retval); error: table->status= STATUS_NOT_FOUND; + MYSQL_INDEX_READ_ROW_DONE(retval); DBUG_RETURN(retval); } @@ -2518,7 +2526,9 @@ int ha_federated::read_range_next() { int retval; DBUG_ENTER("ha_federated::read_range_next"); - retval= rnd_next(table->record[0]); + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); + retval= rnd_next_int(table->record[0]); + MYSQL_INDEX_READ_ROW_DONE(retval); DBUG_RETURN(retval); } @@ -2526,9 +2536,13 @@ int ha_federated::read_range_next() /* Used to read forward through the index. */ int ha_federated::index_next(uchar *buf) { + int retval; DBUG_ENTER("ha_federated::index_next"); + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); ha_statistic_increment(&SSV::ha_read_next_count); - DBUG_RETURN(read_next(buf, stored_result)); + retval= read_next(buf, stored_result); + MYSQL_INDEX_READ_ROW_DONE(retval); + DBUG_RETURN(retval); } @@ -2637,7 +2651,18 @@ int ha_federated::index_end(void) int ha_federated::rnd_next(uchar *buf) { + int rc; DBUG_ENTER("ha_federated::rnd_next"); + MYSQL_READ_ROW_START(table_share->db.str, table_share->table_name.str, + TRUE); + rc= rnd_next_int(buf); + MYSQL_READ_ROW_DONE(rc); + DBUG_RETURN(rc); +} + +int ha_federated::rnd_next_int(uchar *buf) +{ + DBUG_ENTER("ha_federated::rnd_next_int"); if (stored_result == 0) { @@ -2726,6 +2751,8 @@ int ha_federated::rnd_pos(uchar *buf, uchar *pos) { int result; DBUG_ENTER("ha_federated::rnd_pos"); + MYSQL_READ_ROW_START(table_share->db.str, table_share->table_name.str, + FALSE); ha_statistic_increment(&SSV::ha_read_rnd_count); if (table->s->primary_key != MAX_KEY) { @@ -2740,6 +2767,7 @@ int ha_federated::rnd_pos(uchar *buf, uchar *pos) result= 0; } table->status= result ? STATUS_NOT_FOUND : 0; + MYSQL_READ_ROW_DONE(result); DBUG_RETURN(result); } diff --git a/storage/federated/ha_federated.h b/storage/federated/ha_federated.h index 1974f9936fc..552676b6ae8 100644 --- a/storage/federated/ha_federated.h +++ b/storage/federated/ha_federated.h @@ -232,6 +232,7 @@ public: int rnd_init(bool scan); //required int rnd_end(); int rnd_next(uchar *buf); //required + int rnd_next_int(uchar *buf); int rnd_pos(uchar *buf, uchar *pos); //required void position(const uchar *record); //required int info(uint); //required diff --git a/storage/heap/Makefile.am b/storage/heap/Makefile.am index ec1445dea67..5528fec71f1 100644 --- a/storage/heap/Makefile.am +++ b/storage/heap/Makefile.am @@ -51,5 +51,20 @@ libheap_a_SOURCES = hp_open.c hp_extra.c hp_close.c hp_panic.c hp_info.c \ EXTRA_DIST = CMakeLists.txt plug.in +if HAVE_DTRACE_DASH_G +libheap_a_LIBADD = probes_mysql.o +libheap_a_DEPENDENCIES = probes_mysql.o dtrace_files dtrace_providers +CLEANFILES = probes_mysql.o dtrace_files dtrace_providers +DTRACEFILES = ha_heap.o +DTRACEPROVIDER = $(abs_top_srcdir)/sql/probes_mysql.d + +dtrace_files: + echo $(DTRACEFILES) > $@ +dtrace_providers: + echo $(DTRACEPROVIDER) > $@ +probes_mysql.o: $(DTRACEPROVIDER) $(DTRACEFILES) + $(DTRACE) $(DTRACEFLAGS) -G -s $< $(DTRACEFILES) -o $@ +endif + # Don't update the files from bitkeeper %::SCCS/s.% diff --git a/storage/heap/ha_heap.cc b/storage/heap/ha_heap.cc index 19863d83874..1f74ad0f941 100644 --- a/storage/heap/ha_heap.cc +++ b/storage/heap/ha_heap.cc @@ -20,6 +20,7 @@ #define MYSQL_SERVER 1 #include "mysql_priv.h" +#include "probes_mysql.h" #include <mysql/plugin.h> #include "ha_heap.h" #include "heapdef.h" @@ -274,21 +275,25 @@ int ha_heap::index_read_map(uchar *buf, const uchar *key, key_part_map keypart_map, enum ha_rkey_function find_flag) { + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); DBUG_ASSERT(inited==INDEX); ha_statistic_increment(&SSV::ha_read_key_count); int error = heap_rkey(file,buf,active_index, key, keypart_map, find_flag); table->status = error ? STATUS_NOT_FOUND : 0; + MYSQL_INDEX_READ_ROW_DONE(error); return error; } int ha_heap::index_read_last_map(uchar *buf, const uchar *key, key_part_map keypart_map) { + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); DBUG_ASSERT(inited==INDEX); ha_statistic_increment(&SSV::ha_read_key_count); int error= heap_rkey(file, buf, active_index, key, keypart_map, HA_READ_PREFIX_LAST); table->status= error ? STATUS_NOT_FOUND : 0; + MYSQL_INDEX_READ_ROW_DONE(error); return error; } @@ -296,45 +301,55 @@ int ha_heap::index_read_idx_map(uchar *buf, uint index, const uchar *key, key_part_map keypart_map, enum ha_rkey_function find_flag) { + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); ha_statistic_increment(&SSV::ha_read_key_count); int error = heap_rkey(file, buf, index, key, keypart_map, find_flag); table->status = error ? STATUS_NOT_FOUND : 0; + MYSQL_INDEX_READ_ROW_DONE(error); return error; } int ha_heap::index_next(uchar * buf) { + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); DBUG_ASSERT(inited==INDEX); ha_statistic_increment(&SSV::ha_read_next_count); int error=heap_rnext(file,buf); table->status=error ? STATUS_NOT_FOUND: 0; + MYSQL_INDEX_READ_ROW_DONE(error); return error; } int ha_heap::index_prev(uchar * buf) { + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); DBUG_ASSERT(inited==INDEX); ha_statistic_increment(&SSV::ha_read_prev_count); int error=heap_rprev(file,buf); table->status=error ? STATUS_NOT_FOUND: 0; + MYSQL_INDEX_READ_ROW_DONE(error); return error; } int ha_heap::index_first(uchar * buf) { + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); DBUG_ASSERT(inited==INDEX); ha_statistic_increment(&SSV::ha_read_first_count); int error=heap_rfirst(file, buf, active_index); table->status=error ? STATUS_NOT_FOUND: 0; + MYSQL_INDEX_READ_ROW_DONE(error); return error; } int ha_heap::index_last(uchar * buf) { + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); DBUG_ASSERT(inited==INDEX); ha_statistic_increment(&SSV::ha_read_last_count); int error=heap_rlast(file, buf, active_index); table->status=error ? STATUS_NOT_FOUND: 0; + MYSQL_INDEX_READ_ROW_DONE(error); return error; } @@ -345,9 +360,12 @@ int ha_heap::rnd_init(bool scan) int ha_heap::rnd_next(uchar *buf) { + MYSQL_READ_ROW_START(table_share->db.str, table_share->table_name.str, + TRUE); ha_statistic_increment(&SSV::ha_read_rnd_next_count); int error=heap_scan(file, buf); table->status=error ? STATUS_NOT_FOUND: 0; + MYSQL_READ_ROW_DONE(error); return error; } @@ -355,10 +373,13 @@ int ha_heap::rnd_pos(uchar * buf, uchar *pos) { int error; HEAP_PTR heap_position; + MYSQL_READ_ROW_START(table_share->db.str, table_share->table_name.str, + FALSE); ha_statistic_increment(&SSV::ha_read_rnd_count); memcpy_fixed((char*) &heap_position, pos, sizeof(HEAP_PTR)); error=heap_rrnd(file, buf, heap_position); table->status=error ? STATUS_NOT_FOUND: 0; + MYSQL_READ_ROW_DONE(error); return error; } diff --git a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c index 54acdf73db6..a2f62255dd6 100644 --- a/storage/innobase/btr/btr0cur.c +++ b/storage/innobase/btr/btr0cur.c @@ -333,7 +333,7 @@ btr_cur_search_to_nth_level( #ifdef UNIV_SEARCH_PERF_STAT info->n_searches++; #endif - if (btr_search_latch.writer == RW_LOCK_NOT_LOCKED + if (rw_lock_get_writer(&btr_search_latch) == RW_LOCK_NOT_LOCKED && latch_mode <= BTR_MODIFY_LEAF && info->last_hash_succ && !estimate #ifdef PAGE_CUR_LE_OR_EXTENDS diff --git a/storage/innobase/btr/btr0sea.c b/storage/innobase/btr/btr0sea.c index 3482e16497a..6a13ca863bd 100644 --- a/storage/innobase/btr/btr0sea.c +++ b/storage/innobase/btr/btr0sea.c @@ -774,8 +774,8 @@ btr_search_guess_on_hash( rw_lock_s_lock(&btr_search_latch); } - ut_ad(btr_search_latch.writer != RW_LOCK_EX); - ut_ad(btr_search_latch.reader_count > 0); + ut_ad(rw_lock_get_writer(&btr_search_latch) != RW_LOCK_EX); + ut_ad(rw_lock_get_reader_count(&btr_search_latch) > 0); rec = ha_search_and_get_data(btr_search_sys->hash_index, fold); diff --git a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c index 901ce8e0fef..5b4f0ee6ecb 100644 --- a/storage/innobase/buf/buf0buf.c +++ b/storage/innobase/buf/buf0buf.c @@ -1277,8 +1277,8 @@ loop: if (mode == BUF_GET_NOWAIT) { if (rw_latch == RW_S_LATCH) { - success = rw_lock_s_lock_func_nowait(&(block->lock), - file, line); + success = rw_lock_s_lock_nowait(&(block->lock), + file, line); fix_type = MTR_MEMO_PAGE_S_FIX; } else { ut_ad(rw_latch == RW_X_LATCH); @@ -1403,8 +1403,8 @@ buf_page_optimistic_get_func( ut_ad(!ibuf_inside() || ibuf_page(block->space, block->offset)); if (rw_latch == RW_S_LATCH) { - success = rw_lock_s_lock_func_nowait(&(block->lock), - file, line); + success = rw_lock_s_lock_nowait(&(block->lock), + file, line); fix_type = MTR_MEMO_PAGE_S_FIX; } else { success = rw_lock_x_lock_func_nowait(&(block->lock), @@ -1534,8 +1534,8 @@ buf_page_get_known_nowait( ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD)); if (rw_latch == RW_S_LATCH) { - success = rw_lock_s_lock_func_nowait(&(block->lock), - file, line); + success = rw_lock_s_lock_nowait(&(block->lock), + file, line); fix_type = MTR_MEMO_PAGE_S_FIX; } else { success = rw_lock_x_lock_func_nowait(&(block->lock), diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 50c3917c43c..cffa79300c7 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -98,7 +98,7 @@ static const long AUTOINC_NO_LOCKING = 2; static long innobase_mirrored_log_groups, innobase_log_files_in_group, innobase_log_buffer_size, innobase_buffer_pool_awe_mem_mb, - innobase_additional_mem_pool_size, innobase_file_io_threads, + innobase_additional_mem_pool_size, innobase_lock_wait_timeout, innobase_force_recovery, innobase_open_files, innobase_autoinc_lock_mode; @@ -133,6 +133,24 @@ static my_bool innobase_adaptive_hash_index = TRUE; static char* internal_innobase_data_file_path = NULL; +/* Default number of IO per second supported by server. Tunes background + IO rate. */ +static long innobase_io_capacity = 100; + +/* Write dirty pages when pct dirty is less than max pct dirty */ +static my_bool innobase_extra_dirty_writes = TRUE; + +/* Max number of IO requests merged to perform large IO in background + IO threads. +*/ +long innobase_max_merged_io = 64; + +/* Number of background IO threads for read and write. */ +long innobase_read_io_threads, innobase_write_io_threads; + +/* Use timer based InnoDB concurrency throttling flag */ +static my_bool innobase_thread_concurrency_timer_based; + /* The following counter is used to convey information to InnoDB about server activity: in selects it is not sensible to call srv_active_wake_master_thread after each fetch or search, we only do @@ -374,6 +392,10 @@ static SHOW_VAR innodb_status_variables[]= { (char*) &export_vars.innodb_dblwr_pages_written, SHOW_LONG}, {"dblwr_writes", (char*) &export_vars.innodb_dblwr_writes, SHOW_LONG}, + {"have_sync_atomic", + (char*) &export_vars.innodb_have_sync_atomic, SHOW_BOOL}, + {"heap_enabled", + (char*) &export_vars.innodb_heap_enabled, SHOW_BOOL}, {"log_waits", (char*) &export_vars.innodb_log_waits, SHOW_LONG}, {"log_write_requests", @@ -414,6 +436,8 @@ static SHOW_VAR innodb_status_variables[]= { (char*) &export_vars.innodb_rows_read, SHOW_LONG}, {"rows_updated", (char*) &export_vars.innodb_rows_updated, SHOW_LONG}, + {"wake_ups", + (char*) &export_vars.innodb_wake_ups, SHOW_LONG}, {NullS, NullS, SHOW_LONG} }; @@ -1668,11 +1692,17 @@ innobase_init( srv_n_log_files = (ulint) innobase_log_files_in_group; srv_log_file_size = (ulint) innobase_log_file_size; + srv_thread_concurrency_timer_based = + (ibool) innobase_thread_concurrency_timer_based; + #ifdef UNIV_LOG_ARCHIVE srv_log_archive_on = (ulint) innobase_log_archive; #endif /* UNIV_LOG_ARCHIVE */ srv_log_buffer_size = (ulint) innobase_log_buffer_size; + srv_io_capacity = (ulint) innobase_io_capacity; + srv_extra_dirty_writes = (ulint) innobase_extra_dirty_writes; + /* We set srv_pool_size here in units of 1 kB. InnoDB internally changes the value so that it becomes the number of database pages. */ @@ -1692,7 +1722,9 @@ innobase_init( srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size; - srv_n_file_io_threads = (ulint) innobase_file_io_threads; + srv_n_read_io_threads = (ulint) innobase_read_io_threads; + srv_n_write_io_threads = (ulint) innobase_write_io_threads; + srv_max_merged_io = (ulint) innobase_max_merged_io; srv_lock_wait_timeout = (ulint) innobase_lock_wait_timeout; srv_force_recovery = (ulint) innobase_force_recovery; @@ -7071,8 +7103,7 @@ innodb_show_status( mutex_enter_noninline(&srv_monitor_file_mutex); rewind(srv_monitor_file); - srv_printf_innodb_monitor(srv_monitor_file, - &trx_list_start, &trx_list_end); + srv_printf_innodb_monitor(srv_monitor_file); flen = ftell(srv_monitor_file); os_file_set_eof(srv_monitor_file); @@ -7141,6 +7172,7 @@ innodb_mutex_show_status( { char buf1[IO_SIZE], buf2[IO_SIZE]; mutex_t* mutex; + rw_lock_t* lock; #ifdef UNIV_DEBUG ulint rw_lock_count= 0; ulint rw_lock_count_spin_loop= 0; @@ -7211,6 +7243,31 @@ innodb_mutex_show_status( mutex_exit_noninline(&mutex_list_mutex); + mutex_enter_noninline(&rw_lock_list_mutex); + + lock = UT_LIST_GET_FIRST(rw_lock_list); + + while (lock != NULL) + { + if (lock->count_os_wait) + { + buf1len= my_snprintf(buf1, sizeof(buf1), "%s:%lu", + lock->cfile_name, (ulong) lock->cline); + buf2len= my_snprintf(buf2, sizeof(buf2), + "os_waits=%lu", lock->count_os_wait); + + if (stat_print(thd, innobase_hton_name, + hton_name_len, buf1, buf1len, + buf2, buf2len)) { + mutex_exit_noninline(&rw_lock_list_mutex); + DBUG_RETURN(1); + } + } + lock = UT_LIST_GET_NEXT(list, lock); + } + + mutex_exit_noninline(&rw_lock_list_mutex); + #ifdef UNIV_DEBUG buf2len= my_snprintf(buf2, sizeof(buf2), "count=%lu, spin_waits=%lu, spin_rounds=%lu, " @@ -7243,6 +7300,7 @@ bool innobase_show_status(handlerton *hton, THD* thd, return FALSE; } } + rw_lock_t* lock; /**************************************************************************** @@ -8173,6 +8231,16 @@ static MYSQL_SYSVAR_BOOL(doublewrite, innobase_use_doublewrite, "Disable with --skip-innodb-doublewrite.", NULL, NULL, TRUE); +static MYSQL_SYSVAR_BOOL(extra_dirty_writes, innobase_extra_dirty_writes, + PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, + "Flush dirty buffer pages when dirty max pct is not exceeded", + NULL, NULL, TRUE); + +static MYSQL_SYSVAR_LONG(io_capacity, innobase_io_capacity, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Number of IOPs the server can do. Tunes the background IO rate", + NULL, NULL, 200, 100, ~0L, 0); + static MYSQL_SYSVAR_ULONG(fast_shutdown, innobase_fast_shutdown, PLUGIN_VAR_OPCMDARG, "Speeds up the shutdown process of the InnoDB storage engine. Possible " @@ -8223,7 +8291,7 @@ static MYSQL_SYSVAR_STR(log_group_home_dir, innobase_log_group_home_dir, static MYSQL_SYSVAR_ULONG(max_dirty_pages_pct, srv_max_buf_pool_modified_pct, PLUGIN_VAR_RQCMDARG, "Percentage of dirty pages allowed in bufferpool.", - NULL, NULL, 90, 0, 100, 0); + NULL, NULL, 75, 0, 99, 0); static MYSQL_SYSVAR_ULONG(max_purge_lag, srv_max_purge_lag, PLUGIN_VAR_RQCMDARG, @@ -8254,17 +8322,17 @@ static MYSQL_SYSVAR_BOOL(adaptive_hash_index, innobase_adaptive_hash_index, static MYSQL_SYSVAR_LONG(additional_mem_pool_size, innobase_additional_mem_pool_size, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, "Size of a memory pool InnoDB uses to store data dictionary information and other internal data structures.", - NULL, NULL, 1*1024*1024L, 512*1024L, LONG_MAX, 1024); + NULL, NULL, 8*1024*1024L, 2*1024*1024L, LONG_MAX, 1024); static MYSQL_SYSVAR_ULONG(autoextend_increment, srv_auto_extend_increment, PLUGIN_VAR_RQCMDARG, "Data file autoextend increment in megabytes", - NULL, NULL, 8L, 1L, 1000L, 0); + NULL, NULL, 64L, 1L, 1000L, 0); static MYSQL_SYSVAR_LONGLONG(buffer_pool_size, innobase_buffer_pool_size, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, "The size of the memory buffer InnoDB uses to cache data and indexes of its tables.", - NULL, NULL, 8*1024*1024L, 1024*1024L, LONGLONG_MAX, 1024*1024L); + NULL, NULL, 1024*1024*1024L, 64*1024*1024L, LONGLONG_MAX, 1024*1024L); static MYSQL_SYSVAR_ULONG(commit_concurrency, srv_commit_concurrency, PLUGIN_VAR_RQCMDARG, @@ -8276,10 +8344,20 @@ static MYSQL_SYSVAR_ULONG(concurrency_tickets, srv_n_free_tickets_to_enter, "Number of times a thread is allowed to enter InnoDB within the same SQL query after it has once got the ticket", NULL, NULL, 500L, 1L, ~0L, 0); -static MYSQL_SYSVAR_LONG(file_io_threads, innobase_file_io_threads, +static MYSQL_SYSVAR_LONG(write_io_threads, innobase_write_io_threads, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Number of file I/O threads in InnoDB.", - NULL, NULL, 4, 4, 64, 0); + "Number of write I/O threads in InnoDB.", + NULL, NULL, 8, 1, 64, 0); + +static MYSQL_SYSVAR_LONG(read_io_threads, innobase_read_io_threads, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Number of read I/O threads in InnoDB.", + NULL, NULL, 8, 1, 64, 0); + +static MYSQL_SYSVAR_LONG(max_merged_io, innobase_max_merged_io, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Max number of adjacent IO requests to merge in InnoDB.", + NULL, NULL, 64, 1, 64, 0); static MYSQL_SYSVAR_LONG(force_recovery, innobase_force_recovery, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, @@ -8294,17 +8372,17 @@ static MYSQL_SYSVAR_LONG(lock_wait_timeout, innobase_lock_wait_timeout, static MYSQL_SYSVAR_LONG(log_buffer_size, innobase_log_buffer_size, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, "The size of the buffer which InnoDB uses to write log to the log files on disk.", - NULL, NULL, 1024*1024L, 256*1024L, LONG_MAX, 1024); + NULL, NULL, 16*1024*1024L, 2*1024*1024L, LONG_MAX, 1024); static MYSQL_SYSVAR_LONGLONG(log_file_size, innobase_log_file_size, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, "Size of each log file in a log group.", - NULL, NULL, 5*1024*1024L, 1*1024*1024L, LONGLONG_MAX, 1024*1024L); + NULL, NULL, 128*1024*1024L, 32*1024*1024L, LONGLONG_MAX, 1024*1024L); static MYSQL_SYSVAR_LONG(log_files_in_group, innobase_log_files_in_group, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, "Number of log files in the log group. InnoDB writes to the files in a circular fashion. Value 3 is recommended here.", - NULL, NULL, 2, 2, 100, 0); + NULL, NULL, 3, 2, 100, 0); static MYSQL_SYSVAR_LONG(mirrored_log_groups, innobase_mirrored_log_groups, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, @@ -8321,10 +8399,16 @@ static MYSQL_SYSVAR_ULONG(sync_spin_loops, srv_n_spin_wait_rounds, "Count of spin-loop rounds in InnoDB mutexes", NULL, NULL, 20L, 0L, ~0L, 0); +static MYSQL_SYSVAR_BOOL(thread_concurrency_timer_based, + innobase_thread_concurrency_timer_based, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Use InnoDB timer based concurrency throttling. ", + NULL, NULL, TRUE); + static MYSQL_SYSVAR_ULONG(thread_concurrency, srv_thread_concurrency, PLUGIN_VAR_RQCMDARG, "Helps in performance tuning in heavily concurrent environments. Sets the maximum number of threads allowed inside InnoDB. Value 0 will disable the thread throttling.", - NULL, NULL, 8, 0, 1000, 0); + NULL, NULL, 0, 0, 1000, 0); static MYSQL_SYSVAR_ULONG(thread_sleep_delay, srv_thread_sleep_delay, PLUGIN_VAR_RQCMDARG, @@ -8359,7 +8443,10 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(data_home_dir), MYSQL_SYSVAR(doublewrite), MYSQL_SYSVAR(fast_shutdown), - MYSQL_SYSVAR(file_io_threads), + MYSQL_SYSVAR(read_io_threads), + MYSQL_SYSVAR(write_io_threads), + MYSQL_SYSVAR(max_merged_io), + MYSQL_SYSVAR(thread_concurrency_timer_based), MYSQL_SYSVAR(file_per_table), MYSQL_SYSVAR(flush_log_at_trx_commit), MYSQL_SYSVAR(flush_method), @@ -8388,6 +8475,8 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(thread_concurrency), MYSQL_SYSVAR(thread_sleep_delay), MYSQL_SYSVAR(autoinc_lock_mode), + MYSQL_SYSVAR(extra_dirty_writes), + MYSQL_SYSVAR(io_capacity), NULL }; diff --git a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic index b077ff0c181..4e96e13b8dc 100644 --- a/storage/innobase/include/buf0buf.ic +++ b/storage/innobase/include/buf0buf.ic @@ -513,7 +513,7 @@ buf_block_buf_fix_inc_debug( { ibool ret; - ret = rw_lock_s_lock_func_nowait(&(block->debug_latch), file, line); + ret = rw_lock_s_lock_nowait(&(block->debug_latch), file, line); ut_ad(ret == TRUE); ut_ad(mutex_own(&block->mutex)); diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h index 43662d02a34..337b9f1e783 100644 --- a/storage/innobase/include/log0log.h +++ b/storage/innobase/include/log0log.h @@ -169,6 +169,13 @@ void log_buffer_flush_to_disk(void); /*==========================*/ /******************************************************************** +Flushes the log buffer. Forces it to disk depending on the value of +the configuration parameter innodb_flush_log_at_trx_commit. */ + +void +log_buffer_flush_maybe_sync(void); +/*==========================*/ +/******************************************************************** Advances the smallest lsn for which there are unflushed dirty blocks in the buffer pool and also may make a new checkpoint. NOTE: this function may only be called if the calling thread owns no synchronization objects! */ diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h index 9eb44d3f4a8..70c07ea6d1a 100644 --- a/storage/innobase/include/os0file.h +++ b/storage/innobase/include/os0file.h @@ -535,21 +535,19 @@ os_file_create_subdirs_if_needed( FALSE otherwise */ const char* path); /* in: path name */ /**************************************************************************** -Initializes the asynchronous io system. Creates separate aio array for -non-ibuf read and write, a third aio array for the ibuf i/o, with just one -segment, two aio arrays for log reads and writes with one segment, and a -synchronous aio array of the specified size. The combined number of segments -in the three first aio arrays is the parameter n_segments given to the -function. The caller must create an i/o handler thread for each segment in -the four first arrays, but not for the sync aio array. */ +Initializes the asynchronous io system. Creates n_read_threads segments for +read, n_write_threads segments for writes, one segment for the ibuf i/o, and +one segment for log IO. Returns the number of segments created. When async +IO is not used, and 4 threads should be created to process requests put +in the segments. */ -void +ulint os_aio_init( /*========*/ - ulint n, /* in: maximum number of pending aio operations - allowed; n must be divisible by n_segments */ - ulint n_segments, /* in: combined number of segments in the four - first aio arrays; must be >= 4 */ + ulint ios_per_array, /* in: maximum number of pending aio operations + allowed per array */ + ulint n_read_threads, /* in: number of read threads */ + ulint n_write_threads, /* in: number of write threads */ ulint n_slots_sync); /* in: number of slots in the sync aio array */ /*********************************************************************** Requests an asynchronous i/o operation. */ diff --git a/storage/innobase/include/os0sync.h b/storage/innobase/include/os0sync.h index a39a331c297..26d2786e33b 100644 --- a/storage/innobase/include/os0sync.h +++ b/storage/innobase/include/os0sync.h @@ -12,6 +12,10 @@ Created 9/6/1995 Heikki Tuuri #include "univ.i" #include "ut0lst.h" +#ifdef HAVE_SOLARIS_ATOMIC +#include <atomic.h> +#endif + #ifdef __WIN__ #define os_fast_mutex_t CRITICAL_SECTION @@ -261,6 +265,45 @@ os_fast_mutex_free( /*===============*/ os_fast_mutex_t* fast_mutex); /* in: mutex to free */ +#ifdef UNIV_SYNC_ATOMIC +/************************************************************** +Atomic compare-and-swap for InnoDB. Currently requires GCC atomic builtins. */ +UNIV_INLINE +ibool +os_compare_and_swap( +/*================*/ + /* out: true if swapped */ + volatile lint* ptr, /* in: pointer to target */ + lint oldVal, /* in: value to compare to */ + lint newVal); /* in: value to swap in */ + +/************************************************************** +Atomic increment for InnoDB. Currently requires GCC atomic builtins. */ +UNIV_INLINE +lint +os_atomic_increment( +/*================*/ + /* out: resulting value */ + volatile lint* ptr, /* in: pointer to target */ + lint amount); /* in: amount of increment */ + +/************************************************************** +Memory barrier operations for InnoDB. +Currently requires GCC atomic builtins. */ +UNIV_INLINE +void +os_memory_barrier_load(); + +UNIV_INLINE +void +os_memory_barrier_store(); + +UNIV_INLINE +void +os_memory_barrier(); + +#endif /* UNIV_SYNC_ATOMIC */ + #ifndef UNIV_NONINL #include "os0sync.ic" #endif diff --git a/storage/innobase/include/os0sync.ic b/storage/innobase/include/os0sync.ic index 75dea9369c2..58593789e0d 100644 --- a/storage/innobase/include/os0sync.ic +++ b/storage/innobase/include/os0sync.ic @@ -44,3 +44,88 @@ os_fast_mutex_trylock( #endif #endif } + +#ifdef UNIV_SYNC_ATOMIC +/************************************************************** +Atomic compare-and-swap for InnoDB. Currently requires GCC atomic builtins +or Solaris atomic_* functions. */ +UNIV_INLINE +ibool +os_compare_and_swap( +/*================*/ + /* out: true if swapped */ + volatile lint* ptr, /* in: pointer to target */ + lint oldVal, /* in: value to compare to */ + lint newVal) /* in: value to swap in */ +{ +#ifdef HAVE_GCC_ATOMIC_BUILTINS + return (__sync_bool_compare_and_swap(ptr, oldVal, newVal)); +#elif HAVE_SOLARIS_ATOMIC + lint retVal = (lint)atomic_cas_ulong((volatile ulong_t *)ptr, + oldVal, newVal); + return (retVal == oldVal); +#else +#error "Need support for atomic ops" +#endif +} + +/************************************************************** +Memory barrier for load */ +UNIV_INLINE +void +os_memory_barrier_load() +{ +#ifdef HAVE_GCC_ATOMIC_BUILTINS + __sync_synchronize(); +#elif HAVE_SOLARIS_ATOMIC + membar_consumer(); +#endif +} + +/************************************************************** +Memory barrier for store */ +UNIV_INLINE +void +os_memory_barrier_store() +{ +#ifdef HAVE_GCC_ATOMIC_BUILTINS + __sync_synchronize(); +#elif HAVE_SOLARIS_ATOMIC + membar_producer(); +#endif +} + +/************************************************************** +Memory barrier */ +UNIV_INLINE +void +os_memory_barrier() +{ +#ifdef HAVE_GCC_ATOMIC_BUILTINS + __sync_synchronize(); +#elif HAVE_SOLARIS_ATOMIC + membar_enter(); +#endif +} + + +/************************************************************** +Atomic increment for InnoDB. Currently requires GCC atomic builtins. */ +UNIV_INLINE +lint +os_atomic_increment( +/*================*/ + /* out: resulting value */ + volatile lint* ptr, /* in: pointer to target */ + lint amount) /* in: amount of increment */ +{ +#ifdef HAVE_GCC_ATOMIC_BUILTINS + return (__sync_add_and_fetch(ptr, amount)); +#elif HAVE_SOLARIS_ATOMIC + return ((lint)atomic_add_long_nv((volatile ulong_t *)ptr, amount)); +#else +#error "Need support for atomic ops" +#endif +} + +#endif /* UNIV_SYNC_ATOMIC */ diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index 2516937565d..6816823641c 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -89,7 +89,22 @@ extern ulint srv_awe_window_size; extern ulint srv_mem_pool_size; extern ulint srv_lock_table_size; -extern ulint srv_n_file_io_threads; +extern ibool srv_thread_concurrency_timer_based; + +/* Number of background IO threads for read and write. Replaces + * srv_n_file_io_threads. */ +extern ulint srv_n_read_io_threads; +extern ulint srv_n_write_io_threads; +/* Max number of adjacent IO requests to merge into one large request. */ +extern ulint srv_max_merged_io; + +/* Number of IO operations per second the server can do */ +extern ulint srv_io_capacity; + +/* Flush dirty pages when below max dirty percent */ +extern ibool srv_extra_dirty_writes; + + #ifdef UNIV_LOG_ARCHIVE extern ibool srv_log_archive_on; @@ -235,6 +250,9 @@ extern ulint srv_read_ahead_seq; /* variable to count the number of random read-aheads were done */ extern ulint srv_read_ahead_rnd; +/* Number of threads that may have missed a lock wait wakeup */ +extern ulint sync_wake_ups; + /* In this structure we store status variables to be passed to MySQL */ typedef struct export_var_struct export_struc; @@ -447,11 +465,7 @@ Outputs to a file the output of the InnoDB Monitor. */ void srv_printf_innodb_monitor( /*======================*/ - FILE* file, /* in: output stream */ - ulint* trx_start, /* out: file position of the start of - the list of active transactions */ - ulint* trx_end); /* out: file position of the end of - the list of active transactions */ + FILE* file); /* in: output stream */ /********************************************************************** Function to pass InnoDB status variables to MySQL */ @@ -511,6 +525,8 @@ struct export_var_struct{ ulint innodb_buffer_pool_read_ahead_rnd; ulint innodb_dblwr_pages_written; ulint innodb_dblwr_writes; + ibool innodb_have_sync_atomic; + ibool innodb_heap_enabled; ulint innodb_log_waits; ulint innodb_log_write_requests; ulint innodb_log_writes; @@ -531,6 +547,7 @@ struct export_var_struct{ ulint innodb_rows_inserted; ulint innodb_rows_updated; ulint innodb_rows_deleted; + ulint innodb_wake_ups; }; /* The server system struct */ @@ -547,4 +564,3 @@ struct srv_sys_struct{ extern ulint srv_n_threads_active[]; #endif - diff --git a/storage/innobase/include/sync0rw.h b/storage/innobase/include/sync0rw.h index 008df80a2c7..6de26535689 100644 --- a/storage/innobase/include/sync0rw.h +++ b/storage/innobase/include/sync0rw.h @@ -24,6 +24,12 @@ smaller than 30 and the order of the numerical values like below! */ #define RW_X_LATCH 2 #define RW_NO_LATCH 3 +/* We decrement lock_word by this amount for each x_lock. It is also the +start value for the lock_word, meaning that it limits the maximum number +of concurrent read locks before the rw_lock breaks. The current value of +0x00100000 allows 1,048,575 concurrent readers and 2047 recursive writers.*/ +#define X_LOCK_DECR 0x00100000 + typedef struct rw_lock_struct rw_lock_t; #ifdef UNIV_SYNC_DEBUG typedef struct rw_lock_debug_struct rw_lock_debug_t; @@ -47,14 +53,14 @@ extern ibool rw_lock_debug_waiters; /* This is set to TRUE, if there may be waiters for the event */ #endif /* UNIV_SYNC_DEBUG */ -extern ulint rw_s_system_call_count; -extern ulint rw_s_spin_wait_count; -extern ulint rw_s_exit_count; -extern ulint rw_s_os_wait_count; -extern ulint rw_x_system_call_count; -extern ulint rw_x_spin_wait_count; -extern ulint rw_x_os_wait_count; -extern ulint rw_x_exit_count; +extern ib_longlong rw_s_spin_wait_count; +extern ib_longlong rw_s_spin_round_count; +extern ib_longlong rw_s_exit_count; +extern ib_longlong rw_s_os_wait_count; +extern ib_longlong rw_x_spin_wait_count; +extern ib_longlong rw_x_spin_round_count; +extern ib_longlong rw_x_os_wait_count; +extern ib_longlong rw_x_exit_count; /********************************************************************** Creates, or rather, initializes an rw-lock object in a specified memory @@ -111,6 +117,20 @@ rw_lock_validate( /*=============*/ rw_lock_t* lock); #endif /* UNIV_DEBUG */ +/********************************************************************** +Low-level function which tries to lock an rw-lock in s-mode. Performs no +spinning. */ +UNIV_INLINE +ibool +rw_lock_s_lock_low( +/*===============*/ + /* out: TRUE if success */ + rw_lock_t* lock, /* in: pointer to rw-lock */ + ulint pass, + /* in: pass value; != 0, if the lock will be + passed to another thread to unlock */ + const char* file_name, /* in: file name where lock requested */ + ulint line); /* in: line where requested */ /****************************************************************** NOTE! The following macros should be used in rw s-locking, not the corresponding function. */ @@ -127,8 +147,8 @@ corresponding function. */ NOTE! The following macros should be used in rw s-locking, not the corresponding function. */ -#define rw_lock_s_lock_nowait(M) rw_lock_s_lock_func_nowait(\ - (M), __FILE__, __LINE__) +#define rw_lock_s_lock_nowait(M, F, L) rw_lock_s_lock_low(\ + (M), 0, (F), (L)) /********************************************************************** NOTE! Use the corresponding macro, not directly this function, except if you supply the file name and line number. Lock an rw-lock in shared mode @@ -146,18 +166,6 @@ rw_lock_s_lock_func( const char* file_name,/* in: file name where lock requested */ ulint line); /* in: line where requested */ /********************************************************************** -NOTE! Use the corresponding macro, not directly this function, except if -you supply the file name and line number. Lock an rw-lock in shared mode -for the current thread if the lock can be acquired immediately. */ -UNIV_INLINE -ibool -rw_lock_s_lock_func_nowait( -/*=======================*/ - /* out: TRUE if success */ - rw_lock_t* lock, /* in: pointer to rw-lock */ - const char* file_name,/* in: file name where lock requested */ - ulint line); /* in: line where requested */ -/********************************************************************** NOTE! Use the corresponding macro, not directly this function! Lock an rw-lock in exclusive mode for the current thread if the lock can be obtained immediately. */ @@ -341,6 +349,23 @@ ulint rw_lock_get_reader_count( /*=====================*/ rw_lock_t* lock); +/********************************************************************** +Decrements lock_word the specified amount if it is greater than 0. +This is used by both s_lock and x_lock operations. */ +UNIV_INLINE +ibool +rw_lock_lock_word_decr( + /* out: TRUE if decr occurs */ + rw_lock_t* lock, /* in: rw-lock */ + ulint amount); /* in: amount to decrement */ +/********************************************************************** +Increments lock_word the specified amount and returns new value. */ +UNIV_INLINE +lint +rw_lock_lock_word_incr( + /* out: TRUE if decr occurs */ + rw_lock_t* lock, + ulint amount); /* in: rw-lock */ #ifdef UNIV_SYNC_DEBUG /********************************************************************** Checks if the thread has locked the rw-lock in the specified mode, with @@ -417,44 +442,28 @@ Do not use its fields directly! The structure used in the spin lock implementation of a read-write lock. Several threads may have a shared lock simultaneously in this lock, but only one writer may have an exclusive lock, in which case no shared locks are allowed. To prevent starving of a writer -blocked by readers, a writer may queue for the lock by setting the writer -field. Then no new readers are allowed in. */ +blocked by readers, a writer may queue for x-lock by decrementing lock_word: +no new readers will be let in while the thread waits for readers to exit. */ struct rw_lock_struct { - os_event_t event; /* Used by sync0arr.c for thread queueing */ - -#ifdef __WIN__ - os_event_t wait_ex_event; /* This windows specific event is - used by the thread which has set the - lock state to RW_LOCK_WAIT_EX. The - rw_lock design guarantees that this - thread will be the next one to proceed - once the current the event gets - signalled. See LEMMA 2 in sync0sync.c */ -#endif - - ulint reader_count; /* Number of readers who have locked this - lock in the shared mode */ - ulint writer; /* This field is set to RW_LOCK_EX if there - is a writer owning the lock (in exclusive - mode), RW_LOCK_WAIT_EX if a writer is - queueing for the lock, and - RW_LOCK_NOT_LOCKED, otherwise. */ - os_thread_id_t writer_thread; - /* Thread id of a possible writer thread */ - ulint writer_count; /* Number of times the same thread has - recursively locked the lock in the exclusive - mode */ - mutex_t mutex; /* The mutex protecting rw_lock_struct */ - ulint pass; /* Default value 0. This is set to some + volatile lint lock_word; + /* Holds the state of the lock. */ + volatile ulint waiters;/* 1: there are waiters */ + volatile ulint pass; /* Default value 0. This is set to some value != 0 given by the caller of an x-lock operation, if the x-lock is to be passed to another thread to unlock (which happens in asynchronous i/o). */ - ulint waiters; /* This ulint is set to 1 if there are - waiters (readers or writers) in the global - wait array, waiting for this rw_lock. - Otherwise, == 0. */ + volatile os_thread_id_t writer_thread; + /* Thread id of writer thread */ + os_event_t event; /* Used by sync0arr.c for thread queueing */ + os_event_t wait_ex_event; + /* Event for next-writer to wait on. A thread + must decrement lock_word before waiting. */ +#ifndef UNIV_SYNC_ATOMIC + mutex_t mutex; /* The mutex protecting rw_lock_struct */ +#endif /* UNIV_SYNC_ATOMIC */ + UT_LIST_NODE_T(rw_lock_t) list; /* All allocated rw locks are put into a list */ @@ -464,7 +473,9 @@ struct rw_lock_struct { info list of the lock */ ulint level; /* Level in the global latching order. */ #endif /* UNIV_SYNC_DEBUG */ + ulint count_os_wait; /* Count of os_waits. May not be accurate */ const char* cfile_name;/* File name where lock created */ + /* last s-lock file/line is not guaranteed to be correct */ const char* last_s_file_name;/* File name where last s-locked */ const char* last_x_file_name;/* File name where last x-locked */ ibool writer_is_wait_ex; diff --git a/storage/innobase/include/sync0rw.ic b/storage/innobase/include/sync0rw.ic index eea639f26f4..e3f1d881cb4 100644 --- a/storage/innobase/include/sync0rw.ic +++ b/storage/innobase/include/sync0rw.ic @@ -57,45 +57,68 @@ UNIV_INLINE void rw_lock_set_waiters( /*================*/ - rw_lock_t* lock, - ulint flag) + rw_lock_t* lock) { - lock->waiters = flag; +#ifdef UNIV_SYNC_ATOMIC + os_compare_and_swap(&(lock->waiters), 0, 1); +#else /* UNIV_SYNC_ATOMIC */ + lock->waiters = 1; +#endif /* UNIV_SYNC_ATOMIC */ } UNIV_INLINE -ulint -rw_lock_get_writer( -/*===============*/ +void +rw_lock_reset_waiters( +/*================*/ rw_lock_t* lock) { - return(lock->writer); +#ifdef UNIV_SYNC_ATOMIC + os_compare_and_swap(&(lock->waiters), 1, 0); +#else /* UNIV_SYNC_ATOMIC */ + lock->waiters = 0; +#endif /* UNIV_SYNC_ATOMIC */ } + +/********************************************************************** +Returns the write-status of the lock - this function made more sense +with the old rw_lock implementation. + */ UNIV_INLINE -void -rw_lock_set_writer( +ulint +rw_lock_get_writer( /*===============*/ - rw_lock_t* lock, - ulint flag) + rw_lock_t* lock) { - lock->writer = flag; + lint lock_word = lock->lock_word; + if(lock_word > 0) { + /* return NOT_LOCKED in s-lock state, like the writer + member of the old lock implementation. */ + return RW_LOCK_NOT_LOCKED; + } else if (((-lock_word) % X_LOCK_DECR) == 0) { + return RW_LOCK_EX; + } else { + ut_ad(lock_word > -X_LOCK_DECR); + return RW_LOCK_WAIT_EX; + } } + UNIV_INLINE ulint rw_lock_get_reader_count( /*=====================*/ rw_lock_t* lock) { - return(lock->reader_count); -} -UNIV_INLINE -void -rw_lock_set_reader_count( -/*=====================*/ - rw_lock_t* lock, - ulint count) -{ - lock->reader_count = count; + lint lock_word = lock->lock_word; + if(lock_word > 0) { + /* s-locked, no x-waiters */ + return(X_LOCK_DECR - lock_word); + } else if (lock_word < 0 && lock_word > -X_LOCK_DECR) { + /* s-locked, with x-waiters */ + return (ulint)(-lock_word); + } + return 0; } + +#ifndef UNIV_SYNC_ATOMIC UNIV_INLINE mutex_t* rw_lock_get_mutex( @@ -104,6 +127,7 @@ rw_lock_get_mutex( { return(&(lock->mutex)); } +#endif /********************************************************************** Returns the value of writer_count for the lock. Does not reserve the lock @@ -115,7 +139,87 @@ rw_lock_get_x_lock_count( /* out: value of writer_count */ rw_lock_t* lock) /* in: rw-lock */ { - return(lock->writer_count); + lint lock_copy = lock->lock_word; + /* If there is a reader, lock_word is not divisible by X_LOCK_DECR */ + if(lock_copy > 0 || (-lock_copy) % X_LOCK_DECR != 0) { + return 0; + } + return ((-lock_copy) / X_LOCK_DECR) + 1; +} + +/********************************************************************** +Two different implementations for decrementing the lock_word of a rw_lock: +one for systems supporting atomic operations, one for others. This does +does not support recusive x-locks: they should be handled by the caller and +need not be atomic since they are performed by the current lock holder. +Returns true if the decrement was made, false if not. */ +UNIV_INLINE +ibool +rw_lock_lock_word_decr( + /* out: TRUE if decr occurs */ + rw_lock_t* lock, /* in: rw-lock */ + ulint amount) /* in: amount of decrement */ +{ + +#ifdef UNIV_SYNC_ATOMIC + + lint local_lock_word = lock->lock_word; + while (local_lock_word > 0) { + if(os_compare_and_swap(&(lock->lock_word), + local_lock_word, + local_lock_word - amount)) { + return TRUE; + } + local_lock_word = lock->lock_word; + } + return(FALSE); + +#else /* UNIV_SYNC_ATOMIC */ + + ibool success = FALSE; + mutex_enter(&(lock->mutex)); + if(lock->lock_word > 0) { + lock->lock_word -= amount; + success = TRUE; + } + mutex_exit(&(lock->mutex)); + return success; + +#endif /* UNIV_SYNC_ATOMIC */ + +} + +/********************************************************************** +Two different implementations for incrementing the lock_word of a rw_lock: +one for systems supporting atomic operations, one for others. +Returns the value of lock_word after increment. */ +UNIV_INLINE +lint +rw_lock_lock_word_incr( + /* out: lock->lock_word after increment */ + rw_lock_t* lock, /* in: rw-lock */ + ulint amount) /* in: amount of increment */ +{ + +#ifdef UNIV_SYNC_ATOMIC + + return(os_atomic_increment(&(lock->lock_word), amount)); + +#else /* UNIV_SYNC_ATOMIC */ + + lint local_lock_word; + + mutex_enter(&(lock->mutex)); + + lock->lock_word += amount; + local_lock_word = lock->lock_word; + + mutex_exit(&(lock->mutex)); + + return local_lock_word; + +#endif /* UNIV_SYNC_ATOMIC */ + } /********************************************************************** @@ -133,27 +237,24 @@ rw_lock_s_lock_low( const char* file_name, /* in: file name where lock requested */ ulint line) /* in: line where requested */ { - ut_ad(mutex_own(rw_lock_get_mutex(lock))); - - /* Check if the writer field is free */ - - if (UNIV_LIKELY(lock->writer == RW_LOCK_NOT_LOCKED)) { - /* Set the shared lock by incrementing the reader count */ - lock->reader_count++; + /* TODO: study performance of UNIV_LIKELY branch prediction hints. */ + if (!rw_lock_lock_word_decr(lock, 1)) { + /* Locking did not succeed */ + return(FALSE); + } #ifdef UNIV_SYNC_DEBUG - rw_lock_add_debug_info(lock, pass, RW_LOCK_SHARED, file_name, - line); + rw_lock_add_debug_info(lock, pass, RW_LOCK_SHARED, file_name, line); #endif - lock->last_s_file_name = file_name; - lock->last_s_line = line; - - return(TRUE); /* locking succeeded */ - } + /* These debugging values are not set safely: they may be incorrect + or even refer to a line that is invalid for the file name. */ + lock->last_s_file_name = file_name; + lock->last_s_line = line; - return(FALSE); /* locking did not succeed */ + return(TRUE); /* locking succeeded */ } +/* TODO: The "direct" functions are not used. Remove them? */ /********************************************************************** Low-level function which locks an rw-lock in s-mode when we know that it is possible and none else is currently accessing the rw-lock structure. @@ -166,11 +267,10 @@ rw_lock_s_lock_direct( const char* file_name, /* in: file name where requested */ ulint line) /* in: line where lock requested */ { - ut_ad(lock->writer == RW_LOCK_NOT_LOCKED); - ut_ad(rw_lock_get_reader_count(lock) == 0); + ut_ad(lock->lock_word == X_LOCK_DECR); - /* Set the shared lock by incrementing the reader count */ - lock->reader_count++; + /* Indicate there is a new reader by decrementing lock_word */ + lock->lock_word--; lock->last_s_file_name = file_name; lock->last_s_line = line; @@ -180,6 +280,7 @@ rw_lock_s_lock_direct( #endif } +/* TODO: The "direct" functions are not used. Remove them? */ /********************************************************************** Low-level function which locks an rw-lock in x-mode when we know that it is not locked and none else is currently accessing the rw-lock structure. @@ -193,12 +294,10 @@ rw_lock_x_lock_direct( ulint line) /* in: line where lock requested */ { ut_ad(rw_lock_validate(lock)); - ut_ad(rw_lock_get_reader_count(lock) == 0); - ut_ad(rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED); + ut_ad(lock->lock_word == X_LOCK_DECR); - rw_lock_set_writer(lock, RW_LOCK_EX); + lock->lock_word -= X_LOCK_DECR; lock->writer_thread = os_thread_get_curr_id(); - lock->writer_count++; lock->pass = 0; lock->last_x_file_name = file_name; @@ -240,15 +339,12 @@ rw_lock_s_lock_func( ut_ad(!rw_lock_own(lock, RW_LOCK_SHARED)); /* see NOTE above */ #endif /* UNIV_SYNC_DEBUG */ - mutex_enter(rw_lock_get_mutex(lock)); - - if (UNIV_LIKELY(rw_lock_s_lock_low(lock, pass, file_name, line))) { - mutex_exit(rw_lock_get_mutex(lock)); + /* TODO: study performance of UNIV_LIKELY branch prediction hints. */ + if (rw_lock_s_lock_low(lock, pass, file_name, line)) { return; /* Success */ } else { /* Did not succeed, try spin wait */ - mutex_exit(rw_lock_get_mutex(lock)); rw_lock_s_lock_spin(lock, pass, file_name, line); @@ -258,86 +354,66 @@ rw_lock_s_lock_func( /********************************************************************** NOTE! Use the corresponding macro, not directly this function! Lock an -rw-lock in shared mode for the current thread if the lock can be acquired -immediately. */ +rw-lock in exclusive mode for the current thread if the lock can be +obtained immediately. */ UNIV_INLINE ibool -rw_lock_s_lock_func_nowait( +rw_lock_x_lock_func_nowait( /*=======================*/ /* out: TRUE if success */ rw_lock_t* lock, /* in: pointer to rw-lock */ const char* file_name,/* in: file name where lock requested */ ulint line) /* in: line where requested */ { - ibool success = FALSE; - - mutex_enter(rw_lock_get_mutex(lock)); + os_thread_id_t curr_thread = os_thread_get_curr_id(); - if (lock->writer == RW_LOCK_NOT_LOCKED) { - /* Set the shared lock by incrementing the reader count */ - lock->reader_count++; + ibool success; -#ifdef UNIV_SYNC_DEBUG - rw_lock_add_debug_info(lock, 0, RW_LOCK_SHARED, file_name, - line); -#endif - - lock->last_s_file_name = file_name; - lock->last_s_line = line; +#ifdef UNIV_SYNC_ATOMIC + success = os_compare_and_swap(&(lock->lock_word), X_LOCK_DECR, 0); +#else + success = FALSE; + mutex_enter(&(lock->mutex)); + if(lock->lock_word == X_LOCK_DECR) { + lock->lock_word = 0; success = TRUE; } + mutex_exit(&(lock->mutex)); - mutex_exit(rw_lock_get_mutex(lock)); - - return(success); -} - -/********************************************************************** -NOTE! Use the corresponding macro, not directly this function! Lock an -rw-lock in exclusive mode for the current thread if the lock can be -obtained immediately. */ -UNIV_INLINE -ibool -rw_lock_x_lock_func_nowait( -/*=======================*/ - /* out: TRUE if success */ - rw_lock_t* lock, /* in: pointer to rw-lock */ - const char* file_name,/* in: file name where lock requested */ - ulint line) /* in: line where requested */ -{ - ibool success = FALSE; - os_thread_id_t curr_thread = os_thread_get_curr_id(); - mutex_enter(rw_lock_get_mutex(lock)); - - if (UNIV_UNLIKELY(rw_lock_get_reader_count(lock) != 0)) { - } else if (UNIV_LIKELY(rw_lock_get_writer(lock) - == RW_LOCK_NOT_LOCKED)) { - rw_lock_set_writer(lock, RW_LOCK_EX); +#endif + if(success) { lock->writer_thread = curr_thread; lock->pass = 0; -relock: - lock->writer_count++; -#ifdef UNIV_SYNC_DEBUG - rw_lock_add_debug_info(lock, 0, RW_LOCK_EX, file_name, line); -#endif + } else if (!(lock->pass) && + os_thread_eq(lock->writer_thread, curr_thread)) { + /* Must verify pass first: otherwise another thread can + call move_ownership suddenly allowing recursive locks. + and after we have verified our thread_id matches + (though move_ownership has since changed it).*/ - lock->last_x_file_name = file_name; - lock->last_x_line = line; + /* Relock: this lock_word modification is safe since no other + threads can modify (lock, unlock, or reserve) lock_word while + there is an exclusive writer and this is the writer thread. */ + lock->lock_word -= X_LOCK_DECR; - success = TRUE; - } else if (rw_lock_get_writer(lock) == RW_LOCK_EX - && lock->pass == 0 - && os_thread_eq(lock->writer_thread, curr_thread)) { - goto relock; + ut_ad(((-lock->lock_word) % X_LOCK_DECR) == 0); + + } else { + /* Failure */ + return(FALSE); } +#ifdef UNIV_SYNC_DEBUG + rw_lock_add_debug_info(lock, 0, RW_LOCK_EX, file_name, line); +#endif - mutex_exit(rw_lock_get_mutex(lock)); + lock->last_x_file_name = file_name; + lock->last_x_line = line; ut_ad(rw_lock_validate(lock)); - return(success); + return(TRUE); } /********************************************************************** @@ -353,39 +429,21 @@ rw_lock_s_unlock_func( #endif ) { - mutex_t* mutex = &(lock->mutex); - ibool sg = FALSE; - - /* Acquire the mutex protecting the rw-lock fields */ - mutex_enter(mutex); - - /* Reset the shared lock by decrementing the reader count */ - - ut_a(lock->reader_count > 0); - lock->reader_count--; + ut_ad((lock->lock_word % X_LOCK_DECR) != 0); #ifdef UNIV_SYNC_DEBUG rw_lock_remove_debug_info(lock, pass, RW_LOCK_SHARED); #endif - /* If there may be waiters and this was the last s-lock, - signal the object */ - - if (UNIV_UNLIKELY(lock->waiters) - && lock->reader_count == 0) { - sg = TRUE; - - rw_lock_set_waiters(lock, 0); - } + /* Increment lock_word to indicate 1 less reader */ + if(rw_lock_lock_word_incr(lock, 1) == 0) { - mutex_exit(mutex); - - if (UNIV_UNLIKELY(sg)) { -#ifdef __WIN__ + /* wait_ex waiter exists. It may not be asleep, but we signal + anyway. We do not wake other waiters, because they can't + exist without wait_ex waiter and wait_ex waiter goes first.*/ os_event_set(lock->wait_ex_event); -#endif - os_event_set(lock->event); sync_array_object_signalled(sync_primary_wait_array); + } ut_ad(rw_lock_validate(lock)); @@ -395,6 +453,7 @@ rw_lock_s_unlock_func( #endif } +/* TODO: The "direct" functions are not used. Remove them? */ /********************************************************************** Releases a shared mode lock when we know there are no waiters and none else will access the lock during the time this function is executed. */ @@ -404,17 +463,16 @@ rw_lock_s_unlock_direct( /*====================*/ rw_lock_t* lock) /* in: rw-lock */ { - /* Reset the shared lock by decrementing the reader count */ - - ut_ad(lock->reader_count > 0); - - lock->reader_count--; + ut_ad(lock->lock_word < X_LOCK_DECR); #ifdef UNIV_SYNC_DEBUG rw_lock_remove_debug_info(lock, 0, RW_LOCK_SHARED); #endif - ut_ad(!lock->waiters); + /* Decrease reader count by incrementing lock_word */ + lock->lock_word++; + + ut_ad(!rw_lock_get_waiters(lock)); ut_ad(rw_lock_validate(lock)); #ifdef UNIV_SYNC_PERF_STAT rw_s_exit_count++; @@ -434,42 +492,34 @@ rw_lock_x_unlock_func( #endif ) { - ibool sg = FALSE; - - /* Acquire the mutex protecting the rw-lock fields */ - mutex_enter(&(lock->mutex)); + uint local_pass; + ut_ad((lock->lock_word % X_LOCK_DECR) == 0); - /* Reset the exclusive lock if this thread no longer has an x-mode - lock */ - - ut_ad(lock->writer_count > 0); - - lock->writer_count--; - - if (lock->writer_count == 0) { - rw_lock_set_writer(lock, RW_LOCK_NOT_LOCKED); - } + /* + Must reset pass while we still have the lock. + If we are not the last unlocker, we correct it later in the function, + which is harmless since we still hold the lock. + */ + local_pass = lock->pass; + lock->pass = 1; #ifdef UNIV_SYNC_DEBUG rw_lock_remove_debug_info(lock, pass, RW_LOCK_EX); #endif - /* If there may be waiters, signal the lock */ - if (UNIV_UNLIKELY(lock->waiters) - && lock->writer_count == 0) { - - sg = TRUE; - rw_lock_set_waiters(lock, 0); - } + if(rw_lock_lock_word_incr(lock, X_LOCK_DECR) == X_LOCK_DECR) { + /* Lock is now free. May have to signal read/write waiters. + We do not need to signal wait_ex waiters, since they cannot + exist when there is a writer. */ + if(rw_lock_get_waiters(lock)) { + rw_lock_reset_waiters(lock); + os_event_set(lock->event); + sync_array_object_signalled(sync_primary_wait_array); + } - mutex_exit(&(lock->mutex)); - - if (UNIV_UNLIKELY(sg)) { -#ifdef __WIN__ - os_event_set(lock->wait_ex_event); -#endif - os_event_set(lock->event); - sync_array_object_signalled(sync_primary_wait_array); + } else { + /* We still hold x-lock, so we correct pass. */ + lock->pass = local_pass; } ut_ad(rw_lock_validate(lock)); @@ -479,6 +529,7 @@ rw_lock_x_unlock_func( #endif } +/* TODO: The "direct" functions are not used. Remove them? */ /********************************************************************** Releases an exclusive mode lock when we know there are no waiters, and none else will access the lock durint the time this function is executed. */ @@ -491,19 +542,15 @@ rw_lock_x_unlock_direct( /* Reset the exclusive lock if this thread no longer has an x-mode lock */ - ut_ad(lock->writer_count > 0); - - lock->writer_count--; - - if (lock->writer_count == 0) { - rw_lock_set_writer(lock, RW_LOCK_NOT_LOCKED); - } + ut_ad((lock->lock_word % X_LOCK_DECR) == 0); #ifdef UNIV_SYNC_DEBUG rw_lock_remove_debug_info(lock, 0, RW_LOCK_EX); #endif + lock->pass = 1; + lock->lock_word += X_LOCK_DECR; - ut_ad(!lock->waiters); + ut_ad(!rw_lock_get_waiters(lock)); ut_ad(rw_lock_validate(lock)); #ifdef UNIV_SYNC_PERF_STAT diff --git a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h index 6a61330f97e..ae6c72bcd15 100644 --- a/storage/innobase/include/sync0sync.h +++ b/storage/innobase/include/sync0sync.h @@ -16,6 +16,9 @@ Created 9/5/1995 Heikki Tuuri #include "os0thread.h" #include "os0sync.h" #include "sync0arr.h" +#ifndef WIN32 +#include "my_atomic.h" +#endif #ifndef UNIV_HOTBACKUP extern my_bool timed_mutexes; @@ -252,7 +255,7 @@ mutex_n_reserved(void); NOT to be used outside this module except in debugging! Gets the value of the lock word. */ UNIV_INLINE -ulint +byte mutex_get_lock_word( /*================*/ const mutex_t* mutex); /* in: mutex */ @@ -471,9 +474,16 @@ implementation of a mutual exclusion semaphore. */ struct mutex_struct { os_event_t event; /* Used by sync0arr.c for the wait queue */ - ulint lock_word; /* This ulint is the target of the atomic - test-and-set instruction in Win32 */ -#if !defined(_WIN32) || !defined(UNIV_CAN_USE_X86_ASSEMBLER) + + byte lock_word; /* This byte is the target of the atomic + test-and-set instruction in Win32 and + x86 32/64 with GCC 4.1.0 or later version */ +#if defined(_WIN32) && defined(UNIV_CAN_USE_X86_ASSEMBLER) +#elif defined(MY_ATOMIC_NOLOCK) + /* We have my_atomic_* routines that are + intrinsically atomic, so no need for the + mutex. */ +#else os_fast_mutex_t os_fast_mutex; /* In other systems we use this OS mutex in place of lock_word */ @@ -526,8 +536,7 @@ to 20 microseconds. */ /* The number of system calls made in this module. Intended for performance monitoring. */ -extern ulint mutex_system_call_count; -extern ulint mutex_exit_count; +extern ib_longlong mutex_exit_count; #ifdef UNIV_SYNC_DEBUG /* Latching order checks start when this is set TRUE */ diff --git a/storage/innobase/include/sync0sync.ic b/storage/innobase/include/sync0sync.ic index ee640abefa6..f5a85e0e7fb 100644 --- a/storage/innobase/include/sync0sync.ic +++ b/storage/innobase/include/sync0sync.ic @@ -6,16 +6,6 @@ Mutex, the basic synchronization primitive Created 9/5/1995 Heikki Tuuri *******************************************************/ -#if defined(not_defined) && defined(__GNUC__) && defined(UNIV_INTEL_X86) -/* %z0: Use the size of operand %0 which in our case is *m to determine -instruction size, it should end up as xchgl. "1" in the input constraint, -says that "in" has to go in the same place as "out".*/ -#define TAS(m, in, out) \ - asm volatile ("xchg%z0 %2, %0" \ - : "=g" (*(m)), "=r" (out) \ - : "1" (in)) /* Note: "1" here refers to "=r" (out) */ -#endif - /********************************************************************** Sets the waiters field in a mutex. */ @@ -59,7 +49,7 @@ mutex_signal_object( Performs an atomic test-and-set instruction to the lock_word field of a mutex. */ UNIV_INLINE -ulint +byte mutex_test_and_set( /*===============*/ /* out: the previous value of lock_word: 0 or @@ -67,18 +57,18 @@ mutex_test_and_set( mutex_t* mutex) /* in: mutex */ { #if defined(_WIN32) && defined(UNIV_CAN_USE_X86_ASSEMBLER) - ulint res; - ulint* lw; /* assembler code is used to ensure that + byte res; + byte* lw; /* assembler code is used to ensure that lock_word is loaded from memory */ ut_ad(mutex); - ut_ad(sizeof(ulint) == 4); + ut_ad(sizeof(byte) == 1); lw = &(mutex->lock_word); __asm MOV ECX, lw __asm MOV EDX, 1 - __asm XCHG EDX, DWORD PTR [ECX] - __asm MOV res, EDX + __asm XCHG DL, BYTE PTR [ECX] + __asm MOV res, DL /* The fence below would prevent this thread from reading the data structure protected by the mutex @@ -98,12 +88,9 @@ mutex_test_and_set( /* mutex_fence(); */ return(res); -#elif defined(not_defined) && defined(__GNUC__) && defined(UNIV_INTEL_X86) - ulint res; - - TAS(&mutex->lock_word, 1, res); - - return(res); +#elif defined(MY_ATOMIC_NOLOCK) + return ((byte)my_atomic_swap8( + (int8 volatile *)&(mutex->lock_word), 1)); #else ibool ret; @@ -117,7 +104,7 @@ mutex_test_and_set( mutex->lock_word = 1; } - return(ret); + return((byte)ret); #endif } @@ -131,7 +118,7 @@ mutex_reset_lock_word( mutex_t* mutex) /* in: mutex */ { #if defined(_WIN32) && defined(UNIV_CAN_USE_X86_ASSEMBLER) - ulint* lw; /* assembler code is used to ensure that + byte* lw; /* assembler code is used to ensure that lock_word is loaded from memory */ ut_ad(mutex); @@ -139,11 +126,12 @@ mutex_reset_lock_word( __asm MOV EDX, 0 __asm MOV ECX, lw - __asm XCHG EDX, DWORD PTR [ECX] -#elif defined(not_defined) && defined(__GNUC__) && defined(UNIV_INTEL_X86) - ulint res; - - TAS(&mutex->lock_word, 0, res); + __asm XCHG DL, BYTE PTR [ECX] +#elif defined(MY_ATOMIC_NOLOCK) + /* In theory __sync_lock_release should be used to release the lock. + Unfortunately, it does not work properly alone. The workaround is + that more conservative __sync_lock_test_and_set is used instead. */ + (void)my_atomic_swap8((int8 volatile *)&(mutex->lock_word), 0); #else mutex->lock_word = 0; @@ -154,12 +142,12 @@ mutex_reset_lock_word( /********************************************************************** Gets the value of the lock word. */ UNIV_INLINE -ulint +byte mutex_get_lock_word( /*================*/ const mutex_t* mutex) /* in: mutex */ { - const volatile ulint* ptr; /* declared volatile to ensure that + const volatile byte* ptr; /* declared volatile to ensure that lock_word is loaded from memory */ ut_ad(mutex); diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i index 8ab62e655ff..8318816300d 100644 --- a/storage/innobase/include/univ.i +++ b/storage/innobase/include/univ.i @@ -9,6 +9,10 @@ Created 1/20/1994 Heikki Tuuri #ifndef univ_i #define univ_i +#ifdef __SUNPRO_C +# include <sun_prefetch.h> +#endif + #if (defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)) && !defined(MYSQL_SERVER) && !defined(__WIN__) # undef __WIN__ # define __WIN__ @@ -56,9 +60,9 @@ of the 32-bit x86 assembler in mutex operations. */ # endif /* We only try to do explicit inlining of functions with gcc and -Microsoft Visual C++ */ + Sun Studio */ -# if !defined(__GNUC__) +# if !defined(__GNUC__) && !defined(__SUNPRO_C) # undef UNIV_MUST_NOT_INLINE /* Remove compiler warning */ # define UNIV_MUST_NOT_INLINE # endif @@ -116,6 +120,16 @@ by one. */ #define UNIV_SET_MEM_TO_ZERO #endif +/* Use malloc instead of innodb additional memory pool (great with tcmalloc) */ +#define UNIV_DISABLE_MEM_POOL + +#if defined(HAVE_GCC_ATOMIC_BUILTINS) || defined(HAVE_SOLARIS_ATOMIC) +/* + * We have a full set of atomic ops available - we will use them + */ +#define UNIV_SYNC_ATOMIC +#endif + /* #define UNIV_SQL_DEBUG #define UNIV_LOG_DEBUG @@ -274,6 +288,11 @@ it is read. */ /* Minimize cache-miss latency by moving data at addr into a cache before it is read or written. */ # define UNIV_PREFETCH_RW(addr) __builtin_prefetch(addr, 1, 3) +#elif defined(__SUNPRO_C) +# define UNIV_EXPECT(expr,value) (expr) +# define UNIV_LIKELY_NULL(expr) (expr) +# define UNIV_PREFETCH_R(addr) sun_prefetch_read_many(addr) +# define UNIV_PREFETCH_RW(addr) sun_prefetch_write_many(addr) #else /* Dummy versions of the macros */ # define UNIV_EXPECT(expr,value) (expr) diff --git a/storage/innobase/include/ut0ut.h b/storage/innobase/include/ut0ut.h index 8ad1782b178..95d7ba017f1 100644 --- a/storage/innobase/include/ut0ut.h +++ b/storage/innobase/include/ut0ut.h @@ -17,6 +17,24 @@ Created 1/20/1994 Heikki Tuuri typedef time_t ib_time_t; +#ifdef HAVE_PAUSE_INSTRUCTION +#define PAUSE_INSTRUCTION() {__asm__ __volatile__ ("pause");} +#else +#ifdef HAVE_FAKE_PAUSE_INSTRUCTION +#define PAUSE_INSTRUCTION() {__asm__ __volatile__ ("rep; nop");} +#else +#ifdef UNIV_SYNC_ATOMIC +#define PAUSE_INSTRUCTION() \ + { \ + volatile lint volatile_var; \ + os_compare_and_swap(&volatile_var, 0, 1); \ + } +#else +#define PAUSE_INSTRUCTION() +#endif +#endif +#endif + /************************************************************ Gets the high 32 bits in a ulint. That is makes a shift >> 32, but since there seem to be compiler bugs in both gcc and Visual C++, @@ -156,6 +174,18 @@ ut_usectime( /* out: 0 on success, -1 otherwise */ ulint* sec, /* out: seconds since the Epoch */ ulint* ms); /* out: microseconds since the Epoch+*sec */ + +/************************************************************** +Returns diff in microseconds (end_sec,end_ms) - (start_sec,start_ms). */ + +ib_longlong +ut_usecdiff( +/*========*/ + ulint end_sec, /* in: seconds since the Epoch */ + ulint end_ms, /* in: microseconds since the Epoch+*sec1 */ + ulint start_sec, /* in: seconds since the Epoch */ + ulint start_ms); /* in: microseconds since the Epoch+*sec2 */ + /************************************************************** Returns the difference of two times in seconds. */ diff --git a/storage/innobase/log/log0log.c b/storage/innobase/log/log0log.c index b10c348b24d..7eb5e3db422 100644 --- a/storage/innobase/log/log0log.c +++ b/storage/innobase/log/log0log.c @@ -1517,6 +1517,26 @@ log_buffer_flush_to_disk(void) } /******************************************************************** +Flush the log buffer. Force it to disk depending on the value of +innodb_flush_log_at_trx_commit. */ + +void +log_buffer_flush_maybe_sync(void) +/*==========================*/ +{ + dulint lsn; + + mutex_enter(&(log_sys->mutex)); + + lsn = log_sys->lsn; + + mutex_exit(&(log_sys->mutex)); + + /* Force log buffer to disk when innodb_flush_log_at_trx_commit = 1. */ + log_write_up_to(lsn, LOG_WAIT_ALL_GROUPS, + srv_flush_log_at_trx_commit == 1 ? TRUE : FALSE); +} +/******************************************************************** Tries to establish a big enough margin of free space in the log buffer, such that a new log entry can be catenated without an immediate need for a flush. */ static diff --git a/storage/innobase/mem/mem0pool.c b/storage/innobase/mem/mem0pool.c index 27da86a0309..315f719ca09 100644 --- a/storage/innobase/mem/mem0pool.c +++ b/storage/innobase/mem/mem0pool.c @@ -329,6 +329,10 @@ mem_area_alloc( minus MEM_AREA_EXTRA_SIZE */ mem_pool_t* pool) /* in: memory pool */ { +#ifdef UNIV_DISABLE_MEM_POOL + (void)pool; /* Remove compiler warning */ + return malloc(size); +#else /* UNIV_DISABLE_MEM_POOL */ mem_area_t* area; ulint n; ibool ret; @@ -407,6 +411,7 @@ mem_area_alloc( ut_2_exp(n) - MEM_AREA_EXTRA_SIZE); return((void*)(MEM_AREA_EXTRA_SIZE + ((byte*)area))); +#endif /* UNIV_DISABLE_MEM_POOL */ } /************************************************************************ @@ -459,6 +464,10 @@ mem_area_free( buffer */ mem_pool_t* pool) /* in: memory pool */ { +#ifdef UNIV_DISABLE_MEM_POOL + (void)pool; /* Remove compiler warning */ + free(ptr); +#else /* UNIV_DISABLE_MEM_POOL */ mem_area_t* area; mem_area_t* buddy; void* new_ptr; @@ -570,6 +579,7 @@ mem_area_free( mutex_exit(&(pool->mutex)); ut_ad(mem_pool_validate(pool)); +#endif /* UNIV_DISABLE_MEM_POOL */ } /************************************************************************ diff --git a/storage/innobase/os/os0file.c b/storage/innobase/os/os0file.c index 8fd959512c1..4a9d3334e7d 100644 --- a/storage/innobase/os/os0file.c +++ b/storage/innobase/os/os0file.c @@ -62,6 +62,28 @@ ibool os_aio_use_native_aio = FALSE; ibool os_aio_print_debug = FALSE; +/* State for the state of an IO request in simulated AIO. + Protocol for simulated aio: + client requests IO: find slot with reserved = FALSE. Add entry with + status = OS_AIO_NOT_ISSUED. + IO thread wakes: find adjacent slots with reserved = TRUE and status = + OS_AIO_NOT_ISSUED. Change status for slots to + OS_AIO_ISSUED. + IO operation completes: set status for slots to OS_AIO_DONE. set status + for the first slot to OS_AIO_CLAIMED and return + result for that slot. + When there are multiple read and write threads, they all compete to execute + the requests in the array (os_aio_array_t). This avoids the need to load + balance requests at the time the request is made at the cost of waking all + threads when a request is available. +*/ +typedef enum { + OS_AIO_NOT_ISSUED, /* Available to be processed by an IO thread. */ + OS_AIO_ISSUED, /* Being processed by an IO thread. */ + OS_AIO_DONE, /* Request processed. */ + OS_AIO_CLAIMED /* Result being returned to client. */ +} os_aio_status; + /* The aio array slot structure */ typedef struct os_aio_slot_struct os_aio_slot_t; @@ -70,6 +92,8 @@ struct os_aio_slot_struct{ ulint pos; /* index of the slot in the aio array */ ibool reserved; /* TRUE if this slot is reserved */ + os_aio_status status; /* Status for current request. Valid when reserved + is TRUE. Used only in simulated aio. */ time_t reservation_time;/* time when reserved */ ulint len; /* length of the block to read or write */ @@ -80,11 +104,6 @@ struct os_aio_slot_struct{ ulint offset_high; /* 32 high bits of file offset */ os_file_t file; /* file where to read or write */ const char* name; /* file name or path */ - ibool io_already_done;/* used only in simulated aio: - TRUE if the physical i/o already - made and only the slot message - needs to be passed to the caller - of os_aio_simulated_handle */ fil_node_t* message1; /* message which is given by the */ void* message2; /* the requester of an aio operation and which can be used to identify @@ -114,9 +133,6 @@ struct os_aio_array_struct{ in this array */ ulint n_slots; /* Total number of slots in the aio array. This must be divisible by n_threads. */ - ulint n_segments;/* Number of segments in the aio array of - pending aio requests. A thread can wait - separately for any one of the segments. */ ulint n_reserved;/* Number of reserved slots in the aio array outside the ibuf segment */ os_aio_slot_t* slots; /* Pointer to the slots in the array */ @@ -133,6 +149,17 @@ struct os_aio_array_struct{ /* Array of events used in simulated aio */ os_event_t* os_aio_segment_wait_events = NULL; +/* Number of threads for reading and writing. */ +ulint os_aio_read_threads = 0; +ulint os_aio_write_threads = 0; + +/* Number for the first global segment for reading. */ +const ulint os_aio_first_read_segment = 2; + +/* Number for the first global segment for writing. Set to +2 + os_aio_read_write_threads. */ +ulint os_aio_first_write_segment = 0; + /* The aio arrays for non-ibuf i/o and ibuf i/o, as well as sync aio. These are NULL when the module has not yet been initialized. */ static os_aio_array_t* os_aio_read_array = NULL; @@ -141,11 +168,39 @@ static os_aio_array_t* os_aio_ibuf_array = NULL; static os_aio_array_t* os_aio_log_array = NULL; static os_aio_array_t* os_aio_sync_array = NULL; +/* Per thread buffer used for merged IO requests. Used by +os_aio_simulated_handle so that a buffer doesn't have to be allocated +for each request. */ +static char* os_aio_thread_buffer[SRV_MAX_N_IO_THREADS]; +static ulint os_aio_thread_buffer_size[SRV_MAX_N_IO_THREADS]; + +/* Count pages read and written per thread */ +static ulint os_aio_thread_io_reads[SRV_MAX_N_IO_THREADS]; +static ulint os_aio_thread_io_writes[SRV_MAX_N_IO_THREADS]; + +/* Number of IO operations done. One request can be for N pages. */ +static ulint os_aio_thread_io_requests[SRV_MAX_N_IO_THREADS]; + +/* usecs spent blocked on an IO request */ +static double os_aio_thread_io_wait[SRV_MAX_N_IO_THREADS]; +/* max usecs spent blocked on an IO request */ +static double os_aio_thread_max_io_wait[SRV_MAX_N_IO_THREADS]; + +/* Number of IO global segments. An IO handler thread is created for each +global segment, except for the segment associated with os_aio_sync_array. +Several segments can be associated with os_aio_{read,write}_array. One +segment is created for each of the other arrays. This is also the number +of valid entries in srv_io_thread_reads, srv_io_thread_writes, +srv_io_thread_op_info, srv_io_thread_function and os_aio_segment_wait_events. */ static ulint os_aio_n_segments = ULINT_UNDEFINED; -/* If the following is TRUE, read i/o handler threads try to -wait until a batch of new read requests have been posted */ -static ibool os_aio_recommend_sleep_for_read_threads = FALSE; +/* Set to TRUE to temporarily block reads from being scheduled while a batch +of read requests is added to allow them to be merged by the IO handler thread +if they are adjacent. Declared volatile because we don't want this to be +read from a register in a loop when another thread may change the value in +memory. +*/ +static volatile ibool os_aio_recommend_sleep_for_read_threads = FALSE; ulint os_n_file_reads = 0; ulint os_bytes_read_since_printout = 0; @@ -165,6 +220,14 @@ ulint os_file_n_pending_pwrites = 0; ulint os_n_pending_writes = 0; ulint os_n_pending_reads = 0; +static double time_usecs() { + ulint sec, ms; + if (ut_usectime(&sec, &ms)) + return 0; + else + return sec * 1000000.0 + ms; +} + /*************************************************************************** Gets the operating system version. Currently works only on Windows. */ @@ -2884,9 +2947,8 @@ os_aio_array_t* os_aio_array_create( /*================*/ /* out, own: aio array */ - ulint n, /* in: maximum number of pending aio operations - allowed; n must be divisible by n_segments */ - ulint n_segments) /* in: number of segments in the aio array */ + ulint n) /* in: maximum number of pending aio operations + allowed */ { os_aio_array_t* array; ulint i; @@ -2895,7 +2957,6 @@ os_aio_array_create( OVERLAPPED* over; #endif ut_a(n > 0); - ut_a(n_segments > 0); array = ut_malloc(sizeof(os_aio_array_t)); @@ -2906,7 +2967,6 @@ os_aio_array_create( os_event_set(array->is_empty); array->n_slots = n; - array->n_segments = n_segments; array->n_reserved = 0; array->slots = ut_malloc(n * sizeof(os_aio_slot_t)); #ifdef __WIN__ @@ -2933,70 +2993,75 @@ os_aio_array_create( /**************************************************************************** Initializes the asynchronous io system. Calls also os_io_init_simple. -Creates a separate aio array for -non-ibuf read and write, a third aio array for the ibuf i/o, with just one -segment, two aio arrays for log reads and writes with one segment, and a -synchronous aio array of the specified size. The combined number of segments -in the three first aio arrays is the parameter n_segments given to the -function. The caller must create an i/o handler thread for each segment in -the four first arrays, but not for the sync aio array. */ +Creates an aio array for each of non-ibuf read, non-ibuf write, ibuf IO, +log IO, and synchronous IO. The caller must create i/o handler thread for all +but the synchronous aio array. Multiple threads can access the same array for +the non-ibuf read (prefetch) and write (flush dirty buffer pages) arrays. +Return the number of AIO handler threads. */ -void +ulint os_aio_init( /*========*/ - ulint n, /* in: maximum number of pending aio operations - allowed; n must be divisible by n_segments */ - ulint n_segments, /* in: combined number of segments in the four - first aio arrays; must be >= 4 */ + ulint ios_per_array, /* in: maximum number of pending aio operations + allowed per array */ + ulint n_read_threads, /* in: number of read threads */ + ulint n_write_threads, /* in: number of write threads */ ulint n_slots_sync) /* in: number of slots in the sync aio array */ { - ulint n_read_segs; - ulint n_write_segs; - ulint n_per_seg; ulint i; + ulint n_segments = 2 + n_read_threads + n_write_threads; #ifdef POSIX_ASYNC_IO sigset_t sigset; #endif - ut_ad(n % n_segments == 0); - ut_ad(n_segments >= 4); + ut_a(ios_per_array >= OS_AIO_N_PENDING_IOS_PER_THREAD); + ut_a(n_read_threads >= 1 && n_read_threads <= 64); + ut_a(n_write_threads >= 1 && n_write_threads <= 64); + ut_a(n_segments < SRV_MAX_N_IO_THREADS); os_io_init_simple(); for (i = 0; i < n_segments; i++) { srv_set_io_thread_op_info(i, "not started yet"); + os_aio_thread_io_reads[i] = 0; + os_aio_thread_io_writes[i] = 0; + os_aio_thread_io_requests[i] = 0; + os_aio_thread_buffer[i] = 0; + os_aio_thread_buffer_size[i] = 0; + os_aio_thread_io_wait[i] = 0; + os_aio_thread_max_io_wait[i] = 0; } - n_per_seg = n / n_segments; - n_write_segs = (n_segments - 2) / 2; - n_read_segs = n_segments - 2 - n_write_segs; - - /* fprintf(stderr, "Array n per seg %lu\n", n_per_seg); */ + os_aio_read_threads = n_read_threads; + os_aio_write_threads = n_write_threads; + os_aio_first_write_segment = os_aio_first_read_segment + os_aio_read_threads; + + fprintf(stderr, + "InnoDB: ios_per_array %lu read threads %lu write threads %lu\n", + ios_per_array, os_aio_read_threads, os_aio_write_threads); - os_aio_ibuf_array = os_aio_array_create(n_per_seg, 1); + os_aio_ibuf_array = os_aio_array_create(ios_per_array); srv_io_thread_function[0] = "insert buffer thread"; - os_aio_log_array = os_aio_array_create(n_per_seg, 1); + os_aio_log_array = os_aio_array_create(ios_per_array); srv_io_thread_function[1] = "log thread"; - os_aio_read_array = os_aio_array_create(n_read_segs * n_per_seg, - n_read_segs); - for (i = 2; i < 2 + n_read_segs; i++) { + os_aio_read_array = os_aio_array_create(ios_per_array); + for (i = os_aio_first_read_segment; i < os_aio_first_write_segment; i++) { ut_a(i < SRV_MAX_N_IO_THREADS); srv_io_thread_function[i] = "read thread"; } - os_aio_write_array = os_aio_array_create(n_write_segs * n_per_seg, - n_write_segs); - for (i = 2 + n_read_segs; i < n_segments; i++) { + os_aio_write_array = os_aio_array_create(ios_per_array); + for (i = os_aio_first_write_segment; i < n_segments; i++) { ut_a(i < SRV_MAX_N_IO_THREADS); srv_io_thread_function[i] = "write thread"; } - os_aio_sync_array = os_aio_array_create(n_slots_sync, 1); + os_aio_sync_array = os_aio_array_create(n_slots_sync); - os_aio_n_segments = n_segments; + os_aio_n_segments = 2 + os_aio_read_threads + os_aio_write_threads; os_aio_validate(); @@ -3024,7 +3089,8 @@ os_aio_init( pthread_sigmask(SIG_BLOCK, &sigset, NULL); */ #endif - } + return os_aio_n_segments; +} #ifdef WIN_ASYNC_IO /**************************************************************************** @@ -3082,76 +3148,28 @@ os_aio_wait_until_no_pending_writes(void) } /************************************************************************** -Calculates segment number for a slot. */ +Calculates aio array from global segment number. */ static -ulint -os_aio_get_segment_no_from_slot( -/*============================*/ - /* out: segment number (which is the number - used by, for example, i/o-handler threads) */ - os_aio_array_t* array, /* in: aio wait array */ - os_aio_slot_t* slot) /* in: slot in this array */ -{ - ulint segment; - ulint seg_len; - - if (array == os_aio_ibuf_array) { - segment = 0; - - } else if (array == os_aio_log_array) { - segment = 1; - - } else if (array == os_aio_read_array) { - seg_len = os_aio_read_array->n_slots - / os_aio_read_array->n_segments; - - segment = 2 + slot->pos / seg_len; - } else { - ut_a(array == os_aio_write_array); - seg_len = os_aio_write_array->n_slots - / os_aio_write_array->n_segments; - - segment = os_aio_read_array->n_segments + 2 - + slot->pos / seg_len; - } - - return(segment); -} - -/************************************************************************** -Calculates local segment number and aio array from global segment number. */ -static -ulint -os_aio_get_array_and_local_segment( +os_aio_array_t* +os_aio_get_array( /*===============================*/ - /* out: local segment number within - the aio array */ - os_aio_array_t** array, /* out: aio wait array */ + /* out: aio wait array */ ulint global_segment)/* in: global segment number */ { - ulint segment; - ut_a(global_segment < os_aio_n_segments); if (global_segment == 0) { - *array = os_aio_ibuf_array; - segment = 0; - + return os_aio_ibuf_array; + } else if (global_segment == 1) { - *array = os_aio_log_array; - segment = 0; + return os_aio_log_array; - } else if (global_segment < os_aio_read_array->n_segments + 2) { - *array = os_aio_read_array; + } else if (global_segment < os_aio_first_write_segment) { + return os_aio_read_array; - segment = global_segment - 2; } else { - *array = os_aio_write_array; - - segment = global_segment - (os_aio_read_array->n_segments + 2); - } - - return(segment); + return os_aio_write_array; + } } /*********************************************************************** @@ -3273,7 +3291,7 @@ loop: break; } } - + ut_a(i < array->n_slots); array->n_reserved++; if (array->n_reserved == 1) { @@ -3295,7 +3313,7 @@ loop: slot->buf = buf; slot->offset = offset; slot->offset_high = offset_high; - slot->io_already_done = FALSE; + slot->status = OS_AIO_NOT_ISSUED; #ifdef WIN_ASYNC_IO control = &(slot->control); @@ -3348,6 +3366,7 @@ os_aio_array_free_slot( ut_ad(slot->reserved); slot->reserved = FALSE; + slot->status = OS_AIO_NOT_ISSUED; array->n_reserved--; @@ -3371,39 +3390,58 @@ static void os_aio_simulated_wake_handler_thread( /*=================================*/ - ulint global_segment) /* in: the number of the segment in the aio - arrays */ + os_aio_array_t* array) /* in: aio array for which wakeup is done */ { - os_aio_array_t* array; os_aio_slot_t* slot; - ulint segment; ulint n; ulint i; ut_ad(!os_aio_use_native_aio); + n = array->n_slots; - segment = os_aio_get_array_and_local_segment(&array, global_segment); - - n = array->n_slots / array->n_segments; - - /* Look through n slots after the segment * n'th slot */ + /* Look through n slots */ os_mutex_enter(array->mutex); for (i = 0; i < n; i++) { - slot = os_aio_array_get_nth_slot(array, i + segment * n); - - if (slot->reserved) { - /* Found an i/o request */ - - break; - } + slot = os_aio_array_get_nth_slot(array, i); + + if (slot->reserved && + (slot->status == OS_AIO_NOT_ISSUED || + slot->status == OS_AIO_DONE)) { + /* Found an i/o request + OS_AIO_NOT_ISSUED means the read or write request has + * yet to be done. OS_AIO_DONE means the request has been + * done but it was part of a set of requests merged into + * one read or write call and was not the first block in + * the request, so the handling of the IO completion for + * that block has not been done. */ + break; + } } os_mutex_exit(array->mutex); if (i < n) { - os_event_set(os_aio_segment_wait_events[global_segment]); + if (array == os_aio_ibuf_array) { + os_event_set(os_aio_segment_wait_events[0]); + + } else if (array == os_aio_log_array) { + os_event_set(os_aio_segment_wait_events[1]); + + } else if (array == os_aio_read_array) { + ulint x; + for (x = os_aio_first_read_segment; x < os_aio_first_write_segment; x++) + os_event_set(os_aio_segment_wait_events[x]); + + } else if (array == os_aio_write_array) { + ulint x; + for (x = os_aio_first_write_segment; x < os_aio_n_segments; x++) + os_event_set(os_aio_segment_wait_events[x]); + + } else { + ut_a(0); + } } } @@ -3414,8 +3452,6 @@ void os_aio_simulated_wake_handler_threads(void) /*=======================================*/ { - ulint i; - if (os_aio_use_native_aio) { /* We do not use simulated aio: do nothing */ @@ -3423,10 +3459,11 @@ os_aio_simulated_wake_handler_threads(void) } os_aio_recommend_sleep_for_read_threads = FALSE; - - for (i = 0; i < os_aio_n_segments; i++) { - os_aio_simulated_wake_handler_thread(i); - } + + os_aio_simulated_wake_handler_thread(os_aio_ibuf_array); + os_aio_simulated_wake_handler_thread(os_aio_log_array); + os_aio_simulated_wake_handler_thread(os_aio_read_array); + os_aio_simulated_wake_handler_thread(os_aio_write_array); } /************************************************************************** @@ -3439,18 +3476,13 @@ void os_aio_simulated_put_read_threads_to_sleep(void) /*============================================*/ { - os_aio_array_t* array; ulint g; + /* TODO(mcallaghan): provide similar function for write? */ os_aio_recommend_sleep_for_read_threads = TRUE; - - for (g = 0; g < os_aio_n_segments; g++) { - os_aio_get_array_and_local_segment(&array, g); - - if (array == os_aio_read_array) { - - os_event_reset(os_aio_segment_wait_events[g]); - } + + for (g = os_aio_first_read_segment; g < os_aio_first_write_segment; g++) { + os_event_reset(os_aio_segment_wait_events[g]); } } @@ -3580,9 +3612,7 @@ try_again: #endif } else { if (!wake_later) { - os_aio_simulated_wake_handler_thread( - os_aio_get_segment_no_from_slot( - array, slot)); + os_aio_simulated_wake_handler_thread(array); } } } else if (type == OS_FILE_WRITE) { @@ -3598,9 +3628,7 @@ try_again: #endif } else { if (!wake_later) { - os_aio_simulated_wake_handler_thread( - os_aio_get_segment_no_from_slot( - array, slot)); + os_aio_simulated_wake_handler_thread(array); } } } else { @@ -3666,7 +3694,7 @@ ibool os_aio_windows_handle( /*==================*/ /* out: TRUE if the aio operation succeeded */ - ulint segment, /* in: the number of the segment in the aio + ulint global_segment, /* in: the number of the segment in the aio arrays to wait for; segment 0 is the ibuf i/o thread, segment 1 the log i/o thread, then follow the non-ibuf read threads, and as @@ -3684,7 +3712,6 @@ os_aio_windows_handle( void** message2, ulint* type) /* out: OS_FILE_WRITE or ..._READ */ { - ulint orig_seg = segment; os_aio_array_t* array; os_aio_slot_t* slot; ulint n; @@ -3693,39 +3720,35 @@ os_aio_windows_handle( BOOL ret; DWORD len; - if (segment == ULINT_UNDEFINED) { + if (global_segment == ULINT_UNDEFINED) { array = os_aio_sync_array; - segment = 0; } else { - segment = os_aio_get_array_and_local_segment(&array, segment); + array = os_aio_get_array(global_segment); } /* NOTE! We only access constant fields in os_aio_array. Therefore we do not have to acquire the protecting mutex yet */ ut_ad(os_aio_validate()); - ut_ad(segment < array->n_segments); - n = array->n_slots / array->n_segments; + n = array->n_slots; if (array == os_aio_sync_array) { os_event_wait(os_aio_array_get_nth_slot(array, pos)->event); i = pos; } else { - srv_set_io_thread_op_info(orig_seg, "wait Windows aio"); - i = os_event_wait_multiple(n, - (array->native_events) - + segment * n); + srv_set_io_thread_op_info(global_segment, "wait Windows aio"); + i = os_event_wait_multiple(n, (array->native_events)); } os_mutex_enter(array->mutex); - slot = os_aio_array_get_nth_slot(array, i + segment * n); + slot = os_aio_array_get_nth_slot(array, i); ut_a(slot->reserved); - if (orig_seg != ULINT_UNDEFINED) { - srv_set_io_thread_op_info(orig_seg, + if (global_segment != ULINT_UNDEFINED) { + srv_set_io_thread_op_info(global_segment, "get windows aio return value"); } @@ -3898,14 +3921,16 @@ os_aio_simulated_handle( ulint* type) /* out: OS_FILE_WRITE or ..._READ */ { os_aio_array_t* array; - ulint segment; os_aio_slot_t* slot; os_aio_slot_t* slot2; os_aio_slot_t* consecutive_ios[OS_AIO_MERGE_N_CONSECUTIVE]; + os_aio_slot_t* lowest_request; + os_aio_slot_t* oldest_request; ulint n_consecutive; ulint total_len; ulint offs; ulint lowest_offset; + ulint oldest_offset; ulint biggest_age; ulint age; byte* combined_buf; @@ -3914,7 +3939,9 @@ os_aio_simulated_handle( ulint n; ulint i; - segment = os_aio_get_array_and_local_segment(&array, global_segment); + double start_usecs, stop_usecs, elapsed_usecs; + time_t now; + array = os_aio_get_array(global_segment); restart: /* NOTE! We only access constant fields in os_aio_array. Therefore @@ -3923,11 +3950,10 @@ restart: srv_set_io_thread_op_info(global_segment, "looking for i/o requests (a)"); ut_ad(os_aio_validate()); - ut_ad(segment < array->n_segments); - n = array->n_slots / array->n_segments; + n = array->n_slots; - /* Look through n slots after the segment * n'th slot */ + /* Look through n slots */ if (array == os_aio_read_array && os_aio_recommend_sleep_for_read_threads) { @@ -3947,9 +3973,9 @@ restart: done */ for (i = 0; i < n; i++) { - slot = os_aio_array_get_nth_slot(array, i + segment * n); + slot = os_aio_array_get_nth_slot(array, i); - if (slot->reserved && slot->io_already_done) { + if (slot->reserved && slot->status == OS_AIO_DONE) { if (os_aio_print_debug) { fprintf(stderr, @@ -3964,74 +3990,64 @@ restart: } } - n_consecutive = 0; - - /* If there are at least 2 seconds old requests, then pick the oldest - one to prevent starvation. If several requests have the same age, - then pick the one at the lowest offset. */ - biggest_age = 0; - lowest_offset = ULINT_MAX; + now = time(NULL); + oldest_request = lowest_request = NULL; + oldest_offset = lowest_offset = ULINT_MAX; + /* Find the oldest request and the request with the smallest offset */ for (i = 0; i < n; i++) { - slot = os_aio_array_get_nth_slot(array, i + segment * n); + slot = os_aio_array_get_nth_slot(array, i); - if (slot->reserved) { - age = (ulint)difftime(time(NULL), - slot->reservation_time); + if (slot->reserved && slot->status == OS_AIO_NOT_ISSUED) { + age = (ulint)difftime(now, slot->reservation_time); + /* If there are at least 2 seconds old requests, then pick the oldest + one to prevent starvation. If several requests have the same age, + then pick the one at the lowest offset. */ if ((age >= 2 && age > biggest_age) || (age >= 2 && age == biggest_age - && slot->offset < lowest_offset)) { - - /* Found an i/o request */ - consecutive_ios[0] = slot; - - n_consecutive = 1; + && slot->offset < oldest_offset)) { + /* Found an i/o request */ biggest_age = age; - lowest_offset = slot->offset; + oldest_request = slot; + oldest_offset = slot->offset; } - } - } - - if (n_consecutive == 0) { - /* There were no old requests. Look for an i/o request at the - lowest offset in the array (we ignore the high 32 bits of the - offset in these heuristics) */ - lowest_offset = ULINT_MAX; + /* Look for an i/o request at the lowest offset in the array + * (we ignore the high 32 bits of the offset) */ + if (slot->offset < lowest_offset) { + /* Found an i/o request */ + lowest_request = slot; - for (i = 0; i < n; i++) { - slot = os_aio_array_get_nth_slot(array, - i + segment * n); - if (slot->reserved && slot->offset < lowest_offset) { - - /* Found an i/o request */ - consecutive_ios[0] = slot; - - n_consecutive = 1; lowest_offset = slot->offset; } } } - if (n_consecutive == 0) { + if (!lowest_request && !oldest_request) { /* No i/o requested at the moment */ goto wait_for_io; } - slot = consecutive_ios[0]; - + if (oldest_request) { + slot = oldest_request; + } else { + slot = lowest_request; + } + consecutive_ios[0] = slot; + n_consecutive = 1; + /* Check if there are several consecutive blocks to read or write */ consecutive_loop: for (i = 0; i < n; i++) { - slot2 = os_aio_array_get_nth_slot(array, i + segment * n); + slot2 = os_aio_array_get_nth_slot(array, i); if (slot2->reserved && slot2 != slot && slot2->offset == slot->offset + slot->len @@ -4039,7 +4055,8 @@ consecutive_loop: && slot->offset + slot->len > slot->offset && slot2->offset_high == slot->offset_high && slot2->type == slot->type - && slot2->file == slot->file) { + && slot2->file == slot->file + && slot2->status == OS_AIO_NOT_ISSUED) { /* Found a consecutive i/o request */ @@ -4048,7 +4065,8 @@ consecutive_loop: slot = slot2; - if (n_consecutive < OS_AIO_MERGE_N_CONSECUTIVE) { + if (n_consecutive < OS_AIO_MERGE_N_CONSECUTIVE && + n_consecutive < srv_max_merged_io) { goto consecutive_loop; } else { @@ -4068,6 +4086,8 @@ consecutive_loop: for (i = 0; i < n_consecutive; i++) { total_len += consecutive_ios[i]->len; + ut_a(consecutive_ios[i]->status == OS_AIO_NOT_ISSUED); + consecutive_ios[i]->status = OS_AIO_ISSUED; } if (n_consecutive == 1) { @@ -4075,7 +4095,16 @@ consecutive_loop: combined_buf = slot->buf; combined_buf2 = NULL; } else { - combined_buf2 = ut_malloc(total_len + UNIV_PAGE_SIZE); + if ((total_len + UNIV_PAGE_SIZE) > os_aio_thread_buffer_size[global_segment]) { + + if (os_aio_thread_buffer[global_segment]) + ut_free(os_aio_thread_buffer[global_segment]); + + os_aio_thread_buffer[global_segment] = ut_malloc(total_len + UNIV_PAGE_SIZE); + + os_aio_thread_buffer_size[global_segment] = total_len + UNIV_PAGE_SIZE; + } + combined_buf2 = os_aio_thread_buffer[global_segment]; ut_a(combined_buf2); @@ -4086,6 +4115,9 @@ consecutive_loop: this assumes that there is just one i/o-handler thread serving a single segment of slots! */ + ut_a(slot->reserved); + ut_a(slot->status == OS_AIO_ISSUED); + os_mutex_exit(array->mutex); if (slot->type == OS_FILE_WRITE && n_consecutive > 1) { @@ -4112,6 +4144,7 @@ consecutive_loop: /* Do the i/o with ordinary, synchronous i/o functions: */ if (slot->type == OS_FILE_WRITE) { + os_aio_thread_io_writes[global_segment] += n_consecutive; if (array == os_aio_write_array) { if ((total_len % UNIV_PAGE_SIZE != 0) || (slot->offset % UNIV_PAGE_SIZE != 0)) { @@ -4126,18 +4159,30 @@ consecutive_loop: os_file_check_page_trailers(combined_buf, total_len); } - + start_usecs = time_usecs(); ret = os_file_write(slot->name, slot->file, combined_buf, slot->offset, slot->offset_high, total_len); + stop_usecs = time_usecs(); + elapsed_usecs = stop_usecs - start_usecs; + if (elapsed_usecs < 0) elapsed_usecs = 0; if (array == os_aio_write_array) { os_file_check_page_trailers(combined_buf, total_len); } } else { + start_usecs = time_usecs(); + os_aio_thread_io_reads[global_segment] += n_consecutive; ret = os_file_read(slot->file, combined_buf, slot->offset, slot->offset_high, total_len); + stop_usecs = time_usecs(); + elapsed_usecs = stop_usecs - start_usecs; + if (elapsed_usecs < 0) elapsed_usecs = 0; } + if (elapsed_usecs > os_aio_thread_max_io_wait[global_segment]) + os_aio_thread_max_io_wait[global_segment] = elapsed_usecs; + os_aio_thread_io_wait[global_segment] += elapsed_usecs; + os_aio_thread_io_requests[global_segment]++; ut_a(ret); srv_set_io_thread_op_info(global_segment, "file i/o done"); @@ -4160,16 +4205,13 @@ consecutive_loop: } } - if (combined_buf2) { - ut_free(combined_buf2); - } - os_mutex_enter(array->mutex); /* Mark the i/os done in slots */ for (i = 0; i < n_consecutive; i++) { - consecutive_ios[i]->io_already_done = TRUE; + ut_a(consecutive_ios[i]->status == OS_AIO_ISSUED); + consecutive_ios[i]->status = OS_AIO_DONE; } /* We return the messages for the first slot now, and if there were @@ -4179,6 +4221,8 @@ consecutive_loop: slot_io_done: ut_a(slot->reserved); + ut_a(slot->status == OS_AIO_DONE); + slot->status = OS_AIO_CLAIMED; *message1 = slot->message1; *message2 = slot->message2; @@ -4188,6 +4232,7 @@ slot_io_done: os_mutex_exit(array->mutex); os_aio_array_free_slot(array, slot); + srv_set_io_thread_op_info(global_segment, "exited handler"); return(ret); @@ -4234,7 +4279,6 @@ os_aio_array_validate( os_mutex_enter(array->mutex); ut_a(array->n_slots > 0); - ut_a(array->n_segments > 0); for (i = 0; i < array->n_slots; i++) { slot = os_aio_array_get_nth_slot(array, i); @@ -4284,11 +4328,19 @@ os_aio_print( double time_elapsed; double avg_bytes_read; ulint i; - - for (i = 0; i < srv_n_file_io_threads; i++) { - fprintf(file, "I/O thread %lu state: %s (%s)", (ulong) i, - srv_io_thread_op_info[i], - srv_io_thread_function[i]); + ulint num_issued, num_done, num_claimed; + + for (i = 0; i < os_aio_n_segments; i++) { + fprintf(file, + "I/O thread %lu state: %s (%s) reads %lu writes %lu " + "requests %lu io secs %lf io msecs/request %lf max_io_wait %lf", + i, srv_io_thread_op_info[i], srv_io_thread_function[i], + os_aio_thread_io_reads[i], os_aio_thread_io_writes[i], + os_aio_thread_io_requests[i], + os_aio_thread_io_wait[i] / 1000000.0, + os_aio_thread_io_requests[i] ? + os_aio_thread_io_wait[i] / os_aio_thread_io_requests[i] / 1000.0 : 0.0, + os_aio_thread_max_io_wait[i] / 1000.0); #ifndef __WIN__ if (os_aio_segment_wait_events[i]->is_set) { @@ -4308,14 +4360,21 @@ loop: os_mutex_enter(array->mutex); ut_a(array->n_slots > 0); - ut_a(array->n_segments > 0); - n_reserved = 0; + num_done = num_issued = num_claimed = 0; for (i = 0; i < array->n_slots; i++) { slot = os_aio_array_get_nth_slot(array, i); if (slot->reserved) { + if (slot->status == OS_AIO_ISSUED) + num_issued++; + else if (slot->status == OS_AIO_DONE) + num_done++; + else { + ut_ad(slot->status == OS_AIO_CLAIMED); + num_claimed++; + } n_reserved++; #if 0 fprintf(stderr, "Reserved slot, messages %p %p\n", @@ -4361,6 +4420,13 @@ loop: goto loop; } + putc('\n', file); + fprintf(file, + "Summary of background IO slot status: %lu issued, " + "%lu done, %lu claimed, sleep set %d\n", + num_issued, num_done, num_claimed, + (int)os_aio_recommend_sleep_for_read_threads); + putc('\n', file); current_time = time(NULL); time_elapsed = 0.001 + difftime(current_time, os_last_printout); diff --git a/storage/innobase/row/row0sel.c b/storage/innobase/row/row0sel.c index f53dfe8a686..8c3b00a7841 100644 --- a/storage/innobase/row/row0sel.c +++ b/storage/innobase/row/row0sel.c @@ -1249,7 +1249,7 @@ table_loop: rw_lock_s_lock(&btr_search_latch); search_latch_locked = TRUE; - } else if (btr_search_latch.writer_is_wait_ex) { + } else if (rw_lock_get_writer(&btr_search_latch) == RW_LOCK_WAIT_EX) { /* There is an x-latch request waiting: release the s-latch for a moment; as an s-latch here is often @@ -3285,7 +3285,7 @@ row_search_for_mysql( /* PHASE 0: Release a possible s-latch we are holding on the adaptive hash index latch if there is someone waiting behind */ - if (UNIV_UNLIKELY(btr_search_latch.writer != RW_LOCK_NOT_LOCKED) + if (UNIV_UNLIKELY(rw_lock_get_writer(&btr_search_latch) != RW_LOCK_NOT_LOCKED) && trx->has_search_latch) { /* There is an x-latch request on the adaptive hash index: diff --git a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c index e2d8bd4c600..e0fe6f40197 100644 --- a/storage/innobase/srv/srv0srv.c +++ b/storage/innobase/srv/srv0srv.c @@ -171,7 +171,16 @@ ulint srv_awe_window_size = 0; /* size in pages; MySQL inits ulint srv_mem_pool_size = ULINT_MAX; /* size in bytes */ ulint srv_lock_table_size = ULINT_MAX; -ulint srv_n_file_io_threads = ULINT_MAX; + +ulint srv_io_capacity = ULINT_MAX; /* Number of IO operations per + second the server can do */ + +ibool srv_extra_dirty_writes = TRUE; /* Write dirty pages to disk when pct + dirty < max dirty pct */ + +ulint srv_n_read_io_threads = ULINT_MAX; +ulint srv_n_write_io_threads = ULINT_MAX; +ulint srv_max_merged_io = 64; #ifdef UNIV_LOG_ARCHIVE ibool srv_log_archive_on = FALSE; @@ -278,6 +287,7 @@ Value 10 should be good if there are less than 4 processors + 4 disks in the computer. Bigger computers need bigger values. Value 0 will disable the concurrency check. */ +ibool srv_thread_concurrency_timer_based = TRUE; ulong srv_thread_concurrency = 0; ulong srv_commit_concurrency = 0; @@ -343,10 +353,10 @@ ibool srv_use_awe = FALSE; ibool srv_use_adaptive_hash_indexes = TRUE; /*-------------------------------------------*/ -ulong srv_n_spin_wait_rounds = 20; +ulong srv_n_spin_wait_rounds = 30; ulong srv_n_free_tickets_to_enter = 500; ulong srv_thread_sleep_delay = 10000; -ulint srv_spin_wait_delay = 5; +ulint srv_spin_wait_delay = 6; ibool srv_priority_boost = TRUE; ibool srv_print_thread_releases = FALSE; @@ -412,6 +422,30 @@ FILE* srv_misc_tmpfile; ulint srv_main_thread_process_no = 0; ulint srv_main_thread_id = 0; +// The following count work done by srv_master_thread. + +// Iterations by the 'once per second' loop. +ulint srv_main_1_second_loops = 0; +// Calls to sleep by the 'once per second' loop. +ulint srv_main_sleeps = 0; +// Iterations by the 'once per 10 seconds' loop. +ulint srv_main_10_second_loops = 0; +// Iterations of the loop bounded by the 'background_loop' label. +ulint srv_main_background_loops = 0; +// Iterations of the loop bounded by the 'flush_loop' label. +ulint srv_main_flush_loops = 0; +// Calls to log_buffer_flush_to_disk. +ulint srv_sync_flush = 0; +// Calls to log_buffer_flush_maybe_sync. +ulint srv_async_flush = 0; + +// Number of microseconds threads wait because of +// innodb_thread_concurrency +static ib_longlong srv_thread_wait_mics = 0; + +// Number of microseconds for spinlock delay +static ib_longlong srv_timed_spin_delay = 0; + /* IMPLEMENTATION OF THE SERVER MAIN PROGRAM ========================================= @@ -631,6 +665,53 @@ are indexed by the type of the thread. */ ulint srv_n_threads_active[SRV_MASTER + 1]; ulint srv_n_threads[SRV_MASTER + 1]; +static void time_spin_delay() +{ + ulint start_sec, end_sec; + ulint start_usec, end_usec; + int i; + + srv_timed_spin_delay = 0; + + if (ut_usectime(&start_sec, &start_usec)) + return; + + for (i = 0; i < (int)SYNC_SPIN_ROUNDS; ++i) + ut_delay(ut_rnd_interval(0, srv_spin_wait_delay)); + + if (ut_usectime(&end_sec, &end_usec)) + return; + + srv_timed_spin_delay =ut_usecdiff(end_sec, end_usec, + start_sec, start_usec); +} + +/************************************************************************* +Prints counters for work done by srv_master_thread. */ + +static +void +srv_print_extra( +/*===================*/ + FILE *file) /* in: output stream */ +{ + fprintf(file, "srv_master_thread loops: %lu 1_second, %lu sleeps, " + "%lu 10_second, %lu background, %lu flush\n", + srv_main_1_second_loops, srv_main_sleeps, + srv_main_10_second_loops, srv_main_background_loops, + srv_main_flush_loops); + fprintf(file, "srv_master_thread log flush: %lu sync, %lu async\n", + srv_sync_flush, srv_async_flush); + fprintf(file, "srv_wait_thread_mics %lld microseconds, %.1f seconds\n", + srv_thread_wait_mics, + (double) srv_thread_wait_mics / 1000000.0); + fprintf(file, + "spinlock delay for %d delay %d rounds is %lld mics\n", + (int)srv_spin_wait_delay, + (int)SYNC_SPIN_ROUNDS, + srv_timed_spin_delay); +} + /************************************************************************* Sets the info describing an i/o thread current state. */ @@ -864,6 +945,8 @@ srv_init(void) dict_table_t* table; ulint i; + time_spin_delay(); + srv_sys = mem_alloc(sizeof(srv_sys_t)); kernel_mutex_temp = mem_alloc(sizeof(mutex_t)); @@ -979,6 +1062,95 @@ ulong srv_max_purge_lag = 0; Puts an OS thread to wait if there are too many concurrent threads (>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue. */ +#ifdef UNIV_SYNC_ATOMIC +static void +inc_srv_conc_n_threads(lint *n_threads) +{ + *n_threads = os_atomic_increment(&srv_conc_n_threads, 1); +} + +static void +dec_srv_conc_n_threads() +{ + os_atomic_increment(&srv_conc_n_threads, -1); +} +#endif + +static void +print_already_in_error(trx_t* trx) +{ + ut_print_timestamp(stderr); + fputs(" InnoDB: Error: trying to declare trx" + " to enter InnoDB, but\n" + "InnoDB: it already is declared.\n", stderr); + trx_print(stderr, trx, 0); + putc('\n', stderr); + return; +} + +#ifdef UNIV_SYNC_ATOMIC +static void +enter_innodb_with_tickets(trx_t* trx) +{ + trx->declared_to_be_inside_innodb = TRUE; + trx->n_tickets_to_enter_innodb = SRV_FREE_TICKETS_TO_ENTER; + return; +} + +static void +srv_conc_enter_innodb_timer_based(trx_t* trx) +{ + lint conc_n_threads; + ibool has_yielded = FALSE; + ulint has_slept = 0; + + if (trx->declared_to_be_inside_innodb) { + print_already_in_error(trx); + } +retry: + if (srv_conc_n_threads < (lint) srv_thread_concurrency) { + inc_srv_conc_n_threads(&conc_n_threads); + if (conc_n_threads <= (lint) srv_thread_concurrency) { + enter_innodb_with_tickets(trx); + return; + } + dec_srv_conc_n_threads(&conc_n_threads); + } + if (!has_yielded) + { + has_yielded = TRUE; + os_thread_yield(); + goto retry; + } + if (trx->has_search_latch + || NULL != UT_LIST_GET_FIRST(trx->trx_locks)) { + + inc_srv_conc_n_threads(&conc_n_threads); + enter_innodb_with_tickets(trx); + return; + } + if (has_slept < 2) + { + trx->op_info = "sleeping before entering InnoDB"; + os_thread_sleep(10000); + trx->op_info = ""; + has_slept++; + } + inc_srv_conc_n_threads(&conc_n_threads); + enter_innodb_with_tickets(trx); + return; +} + +static void +srv_conc_exit_innodb_timer_based(trx_t* trx) +{ + dec_srv_conc_n_threads(); + trx->declared_to_be_inside_innodb = FALSE; + trx->n_tickets_to_enter_innodb = 0; + return; +} +#endif + void srv_conc_enter_innodb( /*==================*/ @@ -1009,15 +1181,17 @@ srv_conc_enter_innodb( return; } +#ifdef UNIV_SYNC_ATOMIC + if (srv_thread_concurrency_timer_based) { + srv_conc_enter_innodb_timer_based(trx); + return; + } +#endif + os_fast_mutex_lock(&srv_conc_mutex); retry: if (trx->declared_to_be_inside_innodb) { - ut_print_timestamp(stderr); - fputs(" InnoDB: Error: trying to declare trx" - " to enter InnoDB, but\n" - "InnoDB: it already is declared.\n", stderr); - trx_print(stderr, trx, 0); - putc('\n', stderr); + print_already_in_error(trx); os_fast_mutex_unlock(&srv_conc_mutex); return; @@ -1146,19 +1320,27 @@ srv_conc_force_enter_innodb( trx_t* trx) /* in: transaction object associated with the thread */ { + if (UNIV_LIKELY(!srv_thread_concurrency)) { return; } ut_ad(srv_conc_n_threads >= 0); - +#ifdef UNIV_SYNC_ATOMIC + if (srv_thread_concurrency_timer_based) { + lint conc_n_threads; + + inc_srv_conc_n_threads(&conc_n_threads); + trx->declared_to_be_inside_innodb = TRUE; + trx->n_tickets_to_enter_innodb = 1; + return; + } +#endif os_fast_mutex_lock(&srv_conc_mutex); - srv_conc_n_threads++; trx->declared_to_be_inside_innodb = TRUE; trx->n_tickets_to_enter_innodb = 1; - os_fast_mutex_unlock(&srv_conc_mutex); } @@ -1185,6 +1367,14 @@ srv_conc_force_exit_innodb( return; } +#ifdef UNIV_SYNC_ATOMIC + if (srv_thread_concurrency_timer_based) + { + srv_conc_exit_innodb_timer_based(trx); + return; + } +#endif + os_fast_mutex_lock(&srv_conc_mutex); ut_ad(srv_conc_n_threads > 0); @@ -1593,11 +1783,16 @@ srv_release_mysql_thread_if_suspended( /********************************************************************** Refreshes the values used to calculate per-second averages. */ static -void +ibool srv_refresh_innodb_monitor_stats(void) /*==================================*/ { - mutex_enter(&srv_innodb_monitor_mutex); + /* Sometimes we will skip stats update to avoid deadlock, since + since this function is called by the background wake-up thread */ + if (mutex_enter_nowait(&srv_innodb_monitor_mutex)) { + /* mutex_enter_nowait returns 1 on failure */ + return FALSE; + } srv_last_monitor_time = time(NULL); @@ -1616,6 +1811,7 @@ srv_refresh_innodb_monitor_stats(void) srv_n_rows_read_old = srv_n_rows_read; mutex_exit(&srv_innodb_monitor_mutex); + return TRUE; } /********************************************************************** @@ -1624,11 +1820,7 @@ Outputs to a file the output of the InnoDB Monitor. */ void srv_printf_innodb_monitor( /*======================*/ - FILE* file, /* in: output stream */ - ulint* trx_start, /* out: file position of the start of - the list of active transactions */ - ulint* trx_end) /* out: file position of the end of - the list of active transactions */ + FILE* file) /* in: output stream */ { double time_elapsed; time_t current_time; @@ -1656,6 +1848,11 @@ srv_printf_innodb_monitor( "Per second averages calculated from the last %lu seconds\n", (ulong)time_elapsed); + fputs("----------\n" + "BACKGROUND THREAD\n" + "----------\n", file); + srv_print_extra(file); + fputs("----------\n" "SEMAPHORES\n" "----------\n", file); @@ -1677,24 +1874,6 @@ srv_printf_innodb_monitor( mutex_exit(&dict_foreign_err_mutex); - lock_print_info_summary(file); - if (trx_start) { - long t = ftell(file); - if (t < 0) { - *trx_start = ULINT_UNDEFINED; - } else { - *trx_start = (ulint) t; - } - } - lock_print_info_all_transactions(file); - if (trx_end) { - long t = ftell(file); - if (t < 0) { - *trx_end = ULINT_UNDEFINED; - } else { - *trx_end = (ulint) t; - } - } fputs("--------\n" "FILE I/O\n" "--------\n", file); @@ -1844,6 +2023,16 @@ srv_export_innodb_status(void) export_vars.innodb_buffer_pool_pages_misc = buf_pool->max_size - UT_LIST_GET_LEN(buf_pool->LRU) - UT_LIST_GET_LEN(buf_pool->free); +#ifdef UNIV_SYNC_ATOMIC + export_vars.innodb_have_sync_atomic = 1; +#else + export_vars.innodb_have_sync_atomic = 0; +#endif +#ifdef UNIV_DISABLE_MEM_POOL + export_vars.innodb_heap_enabled = 0; +#else + export_vars.innodb_heap_enabled = 1; +#endif export_vars.innodb_page_size = UNIV_PAGE_SIZE; export_vars.innodb_log_waits = srv_log_waits; export_vars.innodb_os_log_written = srv_os_log_written; @@ -1873,6 +2062,7 @@ srv_export_innodb_status(void) export_vars.innodb_rows_inserted = srv_n_rows_inserted; export_vars.innodb_rows_updated = srv_n_rows_updated; export_vars.innodb_rows_deleted = srv_n_rows_deleted; + export_vars.innodb_wake_ups = sync_wake_ups; mutex_exit(&srv_innodb_monitor_mutex); } @@ -1924,14 +2114,13 @@ loop: last_monitor_time = time(NULL); if (srv_print_innodb_monitor) { - srv_printf_innodb_monitor(stderr, NULL, NULL); + srv_printf_innodb_monitor(stderr); } if (srv_innodb_status) { mutex_enter(&srv_monitor_file_mutex); rewind(srv_monitor_file); - srv_printf_innodb_monitor(srv_monitor_file, NULL, - NULL); + srv_printf_innodb_monitor(srv_monitor_file); os_file_set_eof(srv_monitor_file); mutex_exit(&srv_monitor_file_mutex); } @@ -2060,7 +2249,10 @@ exit_func: /************************************************************************* A thread which prints warnings about semaphore waits which have lasted -too long. These can be used to track bugs which cause hangs. */ +too long. These can be used to track bugs which cause hangs. +NOTE: This thread should not wait for any innodb mutexes or rw_locks. +A deadlock could arise where the thread holding that lock requires waking +by this background thread while this thread is blocked on that lock. */ os_thread_ret_t srv_error_monitor_thread( @@ -2072,10 +2264,6 @@ srv_error_monitor_thread( { /* number of successive fatal timeouts observed */ ulint fatal_cnt = 0; - dulint old_lsn; - dulint new_lsn; - - old_lsn = srv_start_lsn; #ifdef UNIV_DEBUG_THREAD_CREATION fprintf(stderr, "Error monitor thread starts, id %lu\n", @@ -2084,29 +2272,8 @@ srv_error_monitor_thread( loop: srv_error_monitor_active = TRUE; - /* Try to track a strange bug reported by Harald Fuchs and others, - where the lsn seems to decrease at times */ - - new_lsn = log_get_lsn(); - - if (ut_dulint_cmp(new_lsn, old_lsn) < 0) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Error: old log sequence number %lu %lu" - " was greater\n" - "InnoDB: than the new log sequence number %lu %lu!\n" - "InnoDB: Please submit a bug report" - " to http://bugs.mysql.com\n", - (ulong) ut_dulint_get_high(old_lsn), - (ulong) ut_dulint_get_low(old_lsn), - (ulong) ut_dulint_get_high(new_lsn), - (ulong) ut_dulint_get_low(new_lsn)); - } - - old_lsn = new_lsn; - if (difftime(time(NULL), srv_last_monitor_time) > 60) { - /* We referesh InnoDB Monitor values so that averages are + /* We refresh InnoDB Monitor values so that averages are printed from at most 60 last seconds */ srv_refresh_innodb_monitor_stats(); @@ -2197,6 +2364,14 @@ srv_wake_master_thread(void) } /************************************************************************* +Returns the number of IO operations that is X percent of the capacity. + +PCT_IO(5) -> returns the number of IO operations that is 5% of the max +where max is srv_io_capacity. +*/ +#define PCT_IO(pct) ((ulint) (srv_io_capacity * ((double) pct / 100.0))) + +/************************************************************************* The master thread controlling the server. */ os_thread_ret_t @@ -2227,6 +2402,9 @@ srv_master_thread( fprintf(stderr, "Master thread starts, id %lu\n", os_thread_pf(os_thread_get_curr_id())); #endif + fprintf(stderr, "InnoDB master thread running with io_capacity %lu\n", + srv_io_capacity); + srv_main_thread_process_no = os_proc_get_number(); srv_main_thread_id = os_thread_pf(os_thread_get_curr_id()); @@ -2268,10 +2446,12 @@ loop: n_ios_old = log_sys->n_log_ios + buf_pool->n_pages_read + buf_pool->n_pages_written; srv_main_thread_op_info = "sleeping"; + srv_main_1_second_loops++; if (!skip_sleep) { os_thread_sleep(1000000); + srv_main_sleeps++; } skip_sleep = FALSE; @@ -2297,27 +2477,28 @@ loop: srv_main_thread_op_info = "flushing log"; log_buffer_flush_to_disk(); + srv_sync_flush++; srv_main_thread_op_info = "making checkpoint"; log_free_check(); - /* If there were less than 5 i/os during the - one second sleep, we assume that there is free - disk i/o capacity available, and it makes sense to - do an insert buffer merge. */ + /* If i/os during one second sleep were less than 5% of + capacity, we assume that there is free disk i/o capacity + available, and it makes sense to do an insert buffer merge. */ n_pend_ios = buf_get_n_pending_ios() + log_sys->n_pending_writes; n_ios = log_sys->n_log_ios + buf_pool->n_pages_read + buf_pool->n_pages_written; - if (n_pend_ios < 3 && (n_ios - n_ios_old < 5)) { + if (n_pend_ios < PCT_IO(3) && (n_ios - n_ios_old < PCT_IO(5))) { srv_main_thread_op_info = "doing insert buffer merge"; - ibuf_contract_for_n_pages( - TRUE, srv_insert_buffer_batch_size / 4); + ibuf_contract_for_n_pages(TRUE, PCT_IO(20) / 4); srv_main_thread_op_info = "flushing log"; - log_buffer_flush_to_disk(); + /* No fsync when srv_flush_log_at_trx_commit != 1 */ + log_buffer_flush_maybe_sync(); + srv_async_flush++; } if (UNIV_UNLIKELY(buf_get_modified_ratio_pct() @@ -2326,7 +2507,8 @@ loop: /* Try to keep the number of modified pages in the buffer pool under the limit wished by the user */ - n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100, + n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, + PCT_IO(100), ut_dulint_max); /* If we had to do the flush, it may have taken @@ -2354,30 +2536,40 @@ loop: seconds */ mem_validate_all_blocks(); #endif - /* If there were less than 200 i/os during the 10 second period, - we assume that there is free disk i/o capacity available, and it - makes sense to flush 100 pages. */ + /* If i/os during the 10 second period were less than 200% of + capacity, we assume that there is free disk i/o capacity + available, and it makes sense to flush srv_io_capacity pages. + + Note that this is done regardless of the fraction of dirty + pages relative to the max requested by the user. The one second + loop above requests writes for that case. The writes done here + are not required, and may be disabled. */ n_pend_ios = buf_get_n_pending_ios() + log_sys->n_pending_writes; n_ios = log_sys->n_log_ios + buf_pool->n_pages_read + buf_pool->n_pages_written; - if (n_pend_ios < 3 && (n_ios - n_ios_very_old < 200)) { + if (srv_extra_dirty_writes && + n_pend_ios < 3 && (n_ios - n_ios_very_old < PCT_IO(200))) { srv_main_thread_op_info = "flushing buffer pool pages"; - buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max); + buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100), ut_dulint_max); srv_main_thread_op_info = "flushing log"; - log_buffer_flush_to_disk(); + /* No fsync when srv_flush_log_at_trx_commit != 1 */ + log_buffer_flush_maybe_sync(); + srv_async_flush++; } /* We run a batch of insert buffer merge every 10 seconds, even if the server were active */ srv_main_thread_op_info = "doing insert buffer merge"; - ibuf_contract_for_n_pages(TRUE, srv_insert_buffer_batch_size / 4); + ibuf_contract_for_n_pages(TRUE, PCT_IO(20) / 4); srv_main_thread_op_info = "flushing log"; - log_buffer_flush_to_disk(); + /* No fsync when srv_flush_log_at_trx_commit != 1 */ + log_buffer_flush_maybe_sync(); + srv_async_flush++; /* We run a full purge every 10 seconds, even if the server were active */ @@ -2403,6 +2595,7 @@ loop: log_buffer_flush_to_disk(); last_flush_time = current_time; + srv_sync_flush++; } } @@ -2416,14 +2609,16 @@ loop: (> 70 %), we assume we can afford reserving the disk(s) for the time it requires to flush 100 pages */ - n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100, + n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, + PCT_IO(100), ut_dulint_max); } else { /* Otherwise, we only flush a small number of pages so that we do not unnecessarily use much disk i/o capacity from other work */ - n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 10, + n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, + PCT_IO(10), ut_dulint_max); } @@ -2457,7 +2652,7 @@ background_loop: /* The server has been quiet for a while: start running background operations */ - + srv_main_background_loops++; srv_main_thread_op_info = "doing background drop tables"; n_tables_to_drop = row_drop_tables_for_mysql_in_background(); @@ -2495,6 +2690,7 @@ background_loop: log_buffer_flush_to_disk(); last_flush_time = current_time; + srv_sync_flush++; } } @@ -2512,8 +2708,11 @@ background_loop: if (srv_fast_shutdown && srv_shutdown_state > 0) { n_bytes_merged = 0; } else { - n_bytes_merged = ibuf_contract_for_n_pages( - TRUE, srv_insert_buffer_batch_size); + /* This should do an amount of IO similar to the number of + * dirty pages that will be flushed in the call to + * buf_flush_batch below. Otherwise, the system favors + * clean pages over cleanup throughput. */ + n_bytes_merged = ibuf_contract_for_n_pages(TRUE, PCT_IO(100)); } srv_main_thread_op_info = "reserving kernel mutex"; @@ -2527,9 +2726,10 @@ background_loop: flush_loop: srv_main_thread_op_info = "flushing buffer pool pages"; - + srv_main_flush_loops++; if (srv_fast_shutdown < 2) { - n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100, + n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, + PCT_IO(100), ut_dulint_max); } else { /* In the fastest shutdown we do not flush the buffer pool @@ -2552,7 +2752,17 @@ flush_loop: srv_main_thread_op_info = "flushing log"; - log_buffer_flush_to_disk(); + current_time = time(NULL); + if (difftime(current_time, last_flush_time) > 1) { + srv_main_thread_op_info = (char*) "flushing log"; + log_buffer_flush_to_disk(); + last_flush_time = current_time; + srv_sync_flush++; + } else { + /* No fsync when srv_flush_log_at_trx_commit != 1 */ + log_buffer_flush_maybe_sync(); + srv_async_flush++; + } srv_main_thread_op_info = "making checkpoint"; diff --git a/storage/innobase/srv/srv0start.c b/storage/innobase/srv/srv0start.c index ea88039f3dd..2acefbb22c2 100644 --- a/storage/innobase/srv/srv0start.c +++ b/storage/innobase/srv/srv0start.c @@ -986,6 +986,7 @@ innobase_start_or_create_for_mysql(void) ulint i; ibool srv_file_per_table_original_value = srv_file_per_table; mtr_t mtr; + ulint n_threads; #ifdef HAVE_DARWIN_THREADS # ifdef F_FULLFSYNC /* This executable has been compiled on Mac OS X 10.3 or later. @@ -1063,6 +1064,16 @@ innobase_start_or_create_for_mysql(void) return(DB_ERROR); } +#ifdef UNIV_DISABLE_MEM_POOL + fprintf(stderr, + "InnoDB: The InnoDB memory heap has been disabled.\n"); +#endif + +#ifdef UNIV_SYNC_ATOMIC + fprintf(stderr, + "InnoDB: Mutex and rw_lock use atomics.\n"); +#endif + /* Since InnoDB does not currently clean up all its internal data structures in MySQL Embedded Server Library server_end(), we print an error message if someone tries to start up InnoDB a @@ -1239,24 +1250,34 @@ innobase_start_or_create_for_mysql(void) } /* Restrict the maximum number of file i/o threads */ - if (srv_n_file_io_threads > SRV_MAX_N_IO_THREADS) { - - srv_n_file_io_threads = SRV_MAX_N_IO_THREADS; + if ((srv_n_read_io_threads + srv_n_write_io_threads) > SRV_MAX_N_IO_THREADS) { + fprintf(stderr, + "InnoDB: requested too many read(%d) or write(%d) IO threads, max is %d\n", + (int)srv_n_read_io_threads, + (int)srv_n_write_io_threads, + SRV_MAX_N_IO_THREADS); + return(DB_ERROR); } if (!os_aio_use_native_aio) { - /* In simulated aio we currently have use only for 4 threads */ - srv_n_file_io_threads = 4; - - os_aio_init(8 * SRV_N_PENDING_IOS_PER_THREAD - * srv_n_file_io_threads, - srv_n_file_io_threads, - SRV_MAX_N_PENDING_SYNC_IOS); + /* More than 4 threads are now supported. */ + n_threads = os_aio_init(8 * SRV_N_PENDING_IOS_PER_THREAD, + srv_n_read_io_threads, + srv_n_write_io_threads, + SRV_MAX_N_PENDING_SYNC_IOS); } else { - os_aio_init(SRV_N_PENDING_IOS_PER_THREAD - * srv_n_file_io_threads, - srv_n_file_io_threads, - SRV_MAX_N_PENDING_SYNC_IOS); + /* Might need more slots here. Alas, I don't do windows. */ + n_threads = os_aio_init(SRV_N_PENDING_IOS_PER_THREAD, + srv_n_read_io_threads, + srv_n_write_io_threads, + SRV_MAX_N_PENDING_SYNC_IOS); + } + + if (n_threads > SRV_MAX_N_IO_THREADS) { + fprintf(stderr, + "InnoDB: requested too many IO threads(%d), max is %d\n", + (int)n_threads, SRV_MAX_N_IO_THREADS); + return(DB_ERROR); } fil_init(srv_max_n_open_files); @@ -1296,7 +1317,7 @@ innobase_start_or_create_for_mysql(void) /* Create i/o-handler threads: */ - for (i = 0; i < srv_n_file_io_threads; i++) { + for (i = 0; i < n_threads; i++) { n[i] = i; os_thread_create(io_handler_thread, n + i, thread_ids + i); diff --git a/storage/innobase/sync/sync0arr.c b/storage/innobase/sync/sync0arr.c index 154593a9035..bb64ac07342 100644 --- a/storage/innobase/sync/sync0arr.c +++ b/storage/innobase/sync/sync0arr.c @@ -110,6 +110,10 @@ struct sync_array_struct { since creation of the array */ }; +/* Counts the number of times that sync_arr_wake_threads_if_sema_free has + * found a thread that can run because it may have missed a wakeup signal. */ +ulint sync_wake_ups = 0; + #ifdef UNIV_SYNC_DEBUG /********************************************************************** This function is called only in the debug version. Detects a deadlock @@ -295,28 +299,25 @@ sync_array_validate( } /*********************************************************************** -Puts the cell event in reset state. */ +Returns the event that the thread owning the cell waits for. */ static -ib_longlong -sync_cell_event_reset( -/*==================*/ - /* out: value of signal_count - at the time of reset. */ - ulint type, /* in: lock type mutex/rw_lock */ - void* object) /* in: the rw_lock/mutex object */ +os_event_t +sync_cell_get_event( +/*================*/ + sync_cell_t* cell) /* in: non-empty sync array cell */ { + ulint type = cell->request_type; + if (type == SYNC_MUTEX) { - return(os_event_reset(((mutex_t *) object)->event)); -#ifdef __WIN__ + return(((mutex_t *) cell->wait_object)->event); } else if (type == RW_LOCK_WAIT_EX) { - return(os_event_reset( - ((rw_lock_t *) object)->wait_ex_event)); -#endif - } else { - return(os_event_reset(((rw_lock_t *) object)->event)); + return(((rw_lock_t *) cell->wait_object)->wait_ex_event); + } else { /* RW_LOCK_SHARED and RW_LOCK_EX wait on the same event */ + return(((rw_lock_t *) cell->wait_object)->event); } } + /********************************************************************** Reserves a wait array cell for waiting for an object. The event of the cell is reset to nonsignalled state. */ @@ -332,6 +333,7 @@ sync_array_reserve_cell( ulint* index) /* out: index of the reserved cell */ { sync_cell_t* cell; + os_event_t event; ulint i; ut_a(object); @@ -370,8 +372,8 @@ sync_array_reserve_cell( /* Make sure the event is reset and also store the value of signal_count at which the event was reset. */ - cell->signal_count = sync_cell_event_reset(type, - object); + event = sync_cell_get_event(cell); + cell->signal_count = os_event_reset(event); cell->reservation_time = time(NULL); @@ -411,19 +413,7 @@ sync_array_wait_event( ut_a(!cell->waiting); ut_ad(os_thread_get_curr_id() == cell->thread); - if (cell->request_type == SYNC_MUTEX) { - event = ((mutex_t*) cell->wait_object)->event; -#ifdef __WIN__ - /* On windows if the thread about to wait is the one which - has set the state of the rw_lock to RW_LOCK_WAIT_EX, then - it waits on a special event i.e.: wait_ex_event. */ - } else if (cell->request_type == RW_LOCK_WAIT_EX) { - event = ((rw_lock_t*) cell->wait_object)->wait_ex_event; -#endif - } else { - event = ((rw_lock_t*) cell->wait_object)->event; - } - + event = sync_cell_get_event(cell); cell->waiting = TRUE; #ifdef UNIV_SYNC_DEBUG @@ -462,6 +452,7 @@ sync_array_cell_print( mutex_t* mutex; rw_lock_t* rwlock; ulint type; + ulint writer; type = cell->request_type; @@ -491,12 +482,14 @@ sync_array_cell_print( (ulong) mutex->waiters); } else if (type == RW_LOCK_EX -#ifdef __WIN__ || type == RW_LOCK_WAIT_EX -#endif || type == RW_LOCK_SHARED) { - fputs(type == RW_LOCK_EX ? "X-lock on" : "S-lock on", file); + switch(type) { + case RW_LOCK_EX: fputs("X-lock on", file); break; + case RW_LOCK_WAIT_EX: fputs("wait-X-lock on", file); break; + default: fputs("S-lock on", file); break; + } rwlock = cell->old_wait_rw_lock; @@ -504,22 +497,25 @@ sync_array_cell_print( " RW-latch at %p created in file %s line %lu\n", (void*) rwlock, rwlock->cfile_name, (ulong) rwlock->cline); - if (rwlock->writer != RW_LOCK_NOT_LOCKED) { + writer = rw_lock_get_writer(rwlock); + if (writer != RW_LOCK_NOT_LOCKED) { fprintf(file, "a writer (thread id %lu) has" " reserved it in mode %s", (ulong) os_thread_pf(rwlock->writer_thread), - rwlock->writer == RW_LOCK_EX + writer == RW_LOCK_EX ? " exclusive\n" : " wait exclusive\n"); } fprintf(file, - "number of readers %lu, waiters flag %lu\n" + "number of readers %lu, waiters flag %lu, " + "lock_word: %ld\n" "Last time read locked in file %s line %lu\n" "Last time write locked in file %s line %lu\n", - (ulong) rwlock->reader_count, + (ulong) rw_lock_get_reader_count(rwlock), (ulong) rwlock->waiters, + rwlock->lock_word, rwlock->last_s_file_name, (ulong) rwlock->last_s_line, rwlock->last_x_file_name, @@ -553,7 +549,8 @@ sync_array_find_thread( cell = sync_array_get_nth_cell(arr, i); if (cell->wait_object != NULL - && os_thread_eq(cell->thread, thread)) { + && os_thread_eq(cell->thread, thread) + && cell->waiting)) { return(cell); /* Found */ } @@ -778,28 +775,30 @@ sync_arr_cell_can_wake_up( return(TRUE); } - } else if (cell->request_type == RW_LOCK_EX - || cell->request_type == RW_LOCK_WAIT_EX) { + } else if (cell->request_type == RW_LOCK_EX) { lock = cell->wait_object; - if (rw_lock_get_reader_count(lock) == 0 - && rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED) { + /* X_LOCK_DECR is the unlocked state */ + if (lock->lock_word == X_LOCK_DECR) { return(TRUE); } - if (rw_lock_get_reader_count(lock) == 0 - && rw_lock_get_writer(lock) == RW_LOCK_WAIT_EX - && os_thread_eq(lock->writer_thread, cell->thread)) { + } else if (cell->request_type == RW_LOCK_WAIT_EX) { + + lock = cell->wait_object; + + /* lock_word == 0 means all readers have left */ + if (lock->lock_word == 0) { return(TRUE); } - } else if (cell->request_type == RW_LOCK_SHARED) { lock = cell->wait_object; - if (rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED) { + /* lock_word > 0 means no writer or reserved writer */ + if (lock->lock_word > 0) { return(TRUE); } @@ -844,11 +843,15 @@ sync_array_object_signalled( /*========================*/ sync_array_t* arr) /* in: wait array */ { +#ifdef UNIV_SYNC_ATOMIC + (void)os_atomic_increment((volatile lint *)&(arr->sg_count), 1); +#else sync_array_enter(arr); arr->sg_count++; sync_array_exit(arr); +#endif } /************************************************************************** @@ -868,6 +871,7 @@ sync_arr_wake_threads_if_sema_free(void) sync_cell_t* cell; ulint count; ulint i; + os_event_t event; sync_array_enter(arr); @@ -877,36 +881,25 @@ sync_arr_wake_threads_if_sema_free(void) while (count < arr->n_reserved) { cell = sync_array_get_nth_cell(arr, i); + i++; - if (cell->wait_object != NULL) { - + if (cell->wait_object == NULL) { + continue; + } count++; - if (sync_arr_cell_can_wake_up(cell)) { - - if (cell->request_type == SYNC_MUTEX) { - mutex_t* mutex; + if (!cell->waiting) { + continue; + } - mutex = cell->wait_object; - os_event_set(mutex->event); -#ifdef __WIN__ - } else if (cell->request_type - == RW_LOCK_WAIT_EX) { - rw_lock_t* lock; + if (sync_arr_cell_can_wake_up(cell)) { - lock = cell->wait_object; - os_event_set(lock->wait_ex_event); -#endif - } else { - rw_lock_t* lock; + event = sync_cell_get_event(cell); - lock = cell->wait_object; - os_event_set(lock->event); - } - } + os_event_set(event); + sync_wake_ups++; } - i++; } sync_array_exit(arr); @@ -1026,4 +1019,3 @@ sync_array_print_info( sync_array_exit(arr); } - diff --git a/storage/innobase/sync/sync0rw.c b/storage/innobase/sync/sync0rw.c index 367f019ce55..2fcf75009a6 100644 --- a/storage/innobase/sync/sync0rw.c +++ b/storage/innobase/sync/sync0rw.c @@ -15,35 +15,110 @@ Created 9/11/1995 Heikki Tuuri #include "mem0mem.h" #include "srv0srv.h" -/* number of system calls made during shared latching */ -ulint rw_s_system_call_count = 0; +/* + IMPLEMENTATION OF THE RW_LOCK + ============================= +The status of a rw_lock is held in lock_word. The initial value of lock_word is +X_LOCK_DECR. lock_word is decremented by 1 for each s-lock and by X_LOCK_DECR +for each x-lock. This describes the lock state for each value of lock_word: + +lock_word == X_LOCK_DECR: Unlocked. +0 < lock_word < X_LOCK_DECR: Read locked, no waiting writers. + (X_LOCK_DECR - lock_word) is the + number of readers that hold the lock. +lock_word == 0: Write locked +-X_LOCK_DECR < lock_word < 0: Read locked, with a waiting writer. + (-lock_word) is the number of readers + that hold the lock. +lock_word <= -X_LOCK_DECR: Recursively write locked. lock_word has been + decremented by X_LOCK_DECR once for each lock, + so the number of locks is: + ((-lock_word) / X_LOCK_DECR) + 1 +When lock_word <= -X_LOCK_DECR, we also know that lock_word % X_LOCK_DECR == 0: +other values of lock_word are invalid. + +The lock_word is always read and updated atomically and consistently, so that +it always represents the state of the lock, and the state of the lock changes +with a single atomic operation. This lock_word holds all of the information +that a thread needs in order to determine if it is eligible to gain the lock +or if it must spin or sleep. The one exception to this is that writer_thread +must be verified before recursive write locks: to solve this scenario, we make +writer_thread readable by all threads, but only writeable by the x-lock holder. + +The other members of the lock obey the following rules to remain consistent: + +pass: This is only set to 1 to prevent recursive x-locks. It must + be set as specified by x_lock caller after the lock_word + indicates that the thread holds the lock, but before that + thread resumes execution. It must also be set to 1 during the + final x_unlock, but before the lock_word status is updated. + When an x_lock or move_ownership call wishes to change + pass, it must first update the writer_thread appropriately. +writer_thread: Must be set to the writers thread_id after the lock_word + indicates that the thread holds the lock, but before that + thread resumes execution. writer_thread may be invalid and + should not be read when pass == 1. A thread trying to become + writer never reads its own stale writer_thread, since it sets + pass during its previous unlock call. +waiters: May be set to 1 anytime, but to avoid unnecessary wake-up + signals, it should only be set to 1 when there are threads + waiting on event. Must be 1 when a writer starts waiting to + ensure the current x-locking thread sends a wake-up signal + during unlock. May only be reset to 0 immediately before a + a wake-up signal is sent to event. +event: Threads wait on event for read or writer lock when another + thread has an x-lock or an x-lock reservation (wait_ex). A + thread may only wait on event after performing the following + actions in order: + (1) Record the counter value of event (with os_event_reset). + (2) Set waiters to 1. + (3) Verify lock_word <= 0. + (1) must come before (2) to ensure signal is not missed. + (2) must come before (3) to ensure a signal is sent. + These restrictions force the above ordering. + Immediately before sending the wake-up signal, we should: + (1) Verify lock_word == X_LOCK_DECR (unlocked) + (2) Reset waiters to 0. +wait_ex_event: A thread may only wait on the wait_ex_event after it has + performed the following actions in order: + (1) Decrement lock_word by X_LOCK_DECR. + (2) Record counter value of wait_ex_event (os_event_reset, + called from sync_array_reserve_cell). + (3) Verify that lock_word < 0. + (1) must come first to ensures no other threads become reader + or next writer, and notifies unlocker that signal must be sent. + (2) must come before (3) to ensure the signal is not missed. + These restrictions force the above ordering. + Immediately before sending the wake-up signal, we should: + Verify lock_word == 0 (waiting thread holds x_lock) +*/ + /* number of spin waits on rw-latches, resulted during shared (read) locks */ -ulint rw_s_spin_wait_count = 0; +ib_longlong rw_s_spin_wait_count = 0; +ib_longlong rw_s_spin_round_count = 0; /* number of OS waits on rw-latches, resulted during shared (read) locks */ -ulint rw_s_os_wait_count = 0; +ib_longlong rw_s_os_wait_count = 0; /* number of unlocks (that unlock shared locks), set only when UNIV_SYNC_PERF_STAT is defined */ -ulint rw_s_exit_count = 0; - -/* number of system calls made during exclusive latching */ -ulint rw_x_system_call_count = 0; +ib_longlong rw_s_exit_count = 0; /* number of spin waits on rw-latches, resulted during exclusive (write) locks */ -ulint rw_x_spin_wait_count = 0; +ib_longlong rw_x_spin_wait_count = 0; +ib_longlong rw_x_spin_round_count = 0; /* number of OS waits on rw-latches, resulted during exclusive (write) locks */ -ulint rw_x_os_wait_count = 0; +ib_longlong rw_x_os_wait_count = 0; /* number of unlocks (that unlock exclusive locks), set only when UNIV_SYNC_PERF_STAT is defined */ -ulint rw_x_exit_count = 0; +ib_longlong rw_x_exit_count = 0; /* The global list of rw-locks */ rw_lock_list_t rw_lock_list; @@ -119,6 +194,7 @@ rw_lock_create_func( /* If this is the very first time a synchronization object is created, then the following call initializes the sync system. */ +#ifndef UNIV_SYNC_ATOMIC mutex_create(rw_lock_get_mutex(lock), SYNC_NO_ORDER_CHECK); lock->mutex.cfile_name = cfile_name; @@ -129,12 +205,12 @@ rw_lock_create_func( lock->mutex.mutex_type = 1; #endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */ - rw_lock_set_waiters(lock, 0); - rw_lock_set_writer(lock, RW_LOCK_NOT_LOCKED); - lock->writer_count = 0; - rw_lock_set_reader_count(lock, 0); +#endif /* UNIV_SYNC_ATOMIC */ - lock->writer_is_wait_ex = FALSE; + lock->lock_word = X_LOCK_DECR; + lock->waiters = 0; + lock->pass = 1; + /* We do not have to initialize writer_thread until pass == 0 */ #ifdef UNIV_SYNC_DEBUG UT_LIST_INIT(lock->debug_list); @@ -147,15 +223,13 @@ rw_lock_create_func( lock->cfile_name = cfile_name; lock->cline = (unsigned int) cline; + lock->count_os_wait = 0; lock->last_s_file_name = "not yet reserved"; lock->last_x_file_name = "not yet reserved"; lock->last_s_line = 0; lock->last_x_line = 0; lock->event = os_event_create(NULL); - -#ifdef __WIN__ lock->wait_ex_event = os_event_create(NULL); -#endif mutex_enter(&rw_lock_list_mutex); @@ -180,20 +254,18 @@ rw_lock_free( rw_lock_t* lock) /* in: rw-lock */ { ut_ad(rw_lock_validate(lock)); - ut_a(rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED); - ut_a(rw_lock_get_waiters(lock) == 0); - ut_a(rw_lock_get_reader_count(lock) == 0); + ut_a(lock->lock_word == X_LOCK_DECR); lock->magic_n = 0; +#ifndef UNIV_SYNC_ATOMIC mutex_free(rw_lock_get_mutex(lock)); +#endif /* UNIV_SYNC_ATOMIC */ mutex_enter(&rw_lock_list_mutex); os_event_free(lock->event); -#ifdef __WIN__ os_event_free(lock->wait_ex_event); -#endif if (UT_LIST_GET_PREV(list, lock)) { ut_a(UT_LIST_GET_PREV(list, lock)->magic_n == RW_LOCK_MAGIC_N); @@ -219,19 +291,12 @@ rw_lock_validate( { ut_a(lock); - mutex_enter(rw_lock_get_mutex(lock)); + ulint waiters = rw_lock_get_waiters(lock); + lint lock_word = lock->lock_word; ut_a(lock->magic_n == RW_LOCK_MAGIC_N); - ut_a((rw_lock_get_reader_count(lock) == 0) - || (rw_lock_get_writer(lock) != RW_LOCK_EX)); - ut_a((rw_lock_get_writer(lock) == RW_LOCK_EX) - || (rw_lock_get_writer(lock) == RW_LOCK_WAIT_EX) - || (rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED)); - ut_a((rw_lock_get_waiters(lock) == 0) - || (rw_lock_get_waiters(lock) == 1)); - ut_a((lock->writer != RW_LOCK_EX) || (lock->writer_count > 0)); - - mutex_exit(rw_lock_get_mutex(lock)); + ut_a(waiters == 0 || waiters == 1); + ut_a(lock_word > -X_LOCK_DECR ||(-lock_word) % X_LOCK_DECR == 0); return(TRUE); } @@ -253,18 +318,15 @@ rw_lock_s_lock_spin( ulint line) /* in: line where requested */ { ulint index; /* index of the reserved wait cell */ - ulint i; /* spin round count */ + ulint i = 0; /* spin round count */ ut_ad(rw_lock_validate(lock)); + rw_s_spin_wait_count++; /* Count calls to this function */ lock_loop: - rw_s_spin_wait_count++; /* Spin waiting for the writer field to become free */ - i = 0; - - while (rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED - && i < SYNC_SPIN_ROUNDS) { + while (i < SYNC_SPIN_ROUNDS && lock->lock_word <= 0) { if (srv_spin_wait_delay) { ut_delay(ut_rnd_interval(0, srv_spin_wait_delay)); } @@ -285,28 +347,32 @@ lock_loop: lock->cfile_name, (ulong) lock->cline, (ulong) i); } - mutex_enter(rw_lock_get_mutex(lock)); - /* We try once again to obtain the lock */ - if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) { - mutex_exit(rw_lock_get_mutex(lock)); + rw_s_spin_round_count += i; return; /* Success */ } else { - /* If we get here, locking did not succeed, we may - suspend the thread to wait in the wait array */ - rw_s_system_call_count++; + if (i < SYNC_SPIN_ROUNDS) { + goto lock_loop; + } + + rw_s_spin_round_count += i; sync_array_reserve_cell(sync_primary_wait_array, lock, RW_LOCK_SHARED, file_name, line, &index); - rw_lock_set_waiters(lock, 1); + /* Set waiters before checking lock_word to ensure wake-up + signal is sent. This may lead to some unnecessary signals. */ + rw_lock_set_waiters(lock); - mutex_exit(rw_lock_get_mutex(lock)); + if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) { + sync_array_free_cell(sync_primary_wait_array, index); + return; /* Success */ + } if (srv_print_latch_waits) { fprintf(stderr, @@ -317,11 +383,13 @@ lock_loop: (ulong) lock->cline); } - rw_s_system_call_count++; + /* these stats may not be accurate */ + lock->count_os_wait++; rw_s_os_wait_count++; sync_array_wait_event(sync_primary_wait_array, index); + i = 0; goto lock_loop; } } @@ -343,113 +411,137 @@ rw_lock_x_lock_move_ownership( { ut_ad(rw_lock_is_locked(lock, RW_LOCK_EX)); +#ifdef UNIV_SYNC_ATOMIC + lock->writer_thread = os_thread_get_curr_id(); + os_memory_barrier_store(); + lock->pass = 0; +#else /* UNIV_SYNC_ATOMIC */ mutex_enter(&(lock->mutex)); - lock->writer_thread = os_thread_get_curr_id(); - lock->pass = 0; - mutex_exit(&(lock->mutex)); +#endif /* UNIV_SYNC_ATOMIC */ } /********************************************************************** -Low-level function for acquiring an exclusive lock. */ +Function for the next writer to call. Waits for readers to exit. +The caller must have already decremented lock_word by X_LOCK_DECR.*/ UNIV_INLINE -ulint -rw_lock_x_lock_low( -/*===============*/ - /* out: RW_LOCK_NOT_LOCKED if did - not succeed, RW_LOCK_EX if success, - RW_LOCK_WAIT_EX, if got wait reservation */ +void +rw_lock_x_lock_wait( +/*================*/ rw_lock_t* lock, /* in: pointer to rw-lock */ +#ifdef UNIV_SYNC_DEBUG ulint pass, /* in: pass value; != 0, if the lock will be passed to another thread to unlock */ +#endif const char* file_name,/* in: file name where lock requested */ ulint line) /* in: line where requested */ { - ut_ad(mutex_own(rw_lock_get_mutex(lock))); - - if (rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED) { + ulint index; + ulint i = 0; - if (rw_lock_get_reader_count(lock) == 0) { + ut_ad(lock->lock_word <= 0); - rw_lock_set_writer(lock, RW_LOCK_EX); - lock->writer_thread = os_thread_get_curr_id(); - lock->writer_count++; - lock->pass = pass; + while (lock->lock_word < 0) { + if (srv_spin_wait_delay) { + ut_delay(ut_rnd_interval(0, srv_spin_wait_delay)); + } + if(i < SYNC_SPIN_ROUNDS) { + i++; + continue; + } -#ifdef UNIV_SYNC_DEBUG - rw_lock_add_debug_info(lock, pass, RW_LOCK_EX, - file_name, line); -#endif - lock->last_x_file_name = file_name; - lock->last_x_line = (unsigned int) line; + /* If there is still a reader, then go to sleep.*/ + rw_x_spin_round_count += i; + i = 0; + sync_array_reserve_cell(sync_primary_wait_array, + lock, + RW_LOCK_WAIT_EX, + file_name, line, + &index); + /* Check lock_word to ensure wake-up isn't missed.*/ + if(lock->lock_word < 0) { - /* Locking succeeded, we may return */ - return(RW_LOCK_EX); - } else { - /* There are readers, we have to wait */ - rw_lock_set_writer(lock, RW_LOCK_WAIT_EX); - lock->writer_thread = os_thread_get_curr_id(); - lock->pass = pass; - lock->writer_is_wait_ex = TRUE; + /* these stats may not be accurate */ + lock->count_os_wait++; + rw_x_os_wait_count++; + /* Add debug info as it is needed to detect possible + deadlock. We must add info for WAIT_EX thread for + deadlock detection to work properly. */ #ifdef UNIV_SYNC_DEBUG rw_lock_add_debug_info(lock, pass, RW_LOCK_WAIT_EX, file_name, line); #endif - return(RW_LOCK_WAIT_EX); - } - - } else if ((rw_lock_get_writer(lock) == RW_LOCK_WAIT_EX) - && os_thread_eq(lock->writer_thread, - os_thread_get_curr_id())) { - - if (rw_lock_get_reader_count(lock) == 0) { - - rw_lock_set_writer(lock, RW_LOCK_EX); - lock->writer_count++; - lock->pass = pass; - lock->writer_is_wait_ex = FALSE; - + sync_array_wait_event(sync_primary_wait_array, + index); #ifdef UNIV_SYNC_DEBUG - rw_lock_remove_debug_info(lock, pass, RW_LOCK_WAIT_EX); - rw_lock_add_debug_info(lock, pass, RW_LOCK_EX, - file_name, line); + rw_lock_remove_debug_info(lock, pass, + RW_LOCK_WAIT_EX); #endif - - lock->last_x_file_name = file_name; - lock->last_x_line = (unsigned int) line; - - /* Locking succeeded, we may return */ - return(RW_LOCK_EX); + /* It is possible to wake when lock_word < 0. + We must pass the while-loop check to proceed.*/ + } else { + sync_array_free_cell(sync_primary_wait_array, + index); } + } + rw_x_spin_round_count += i; +} - return(RW_LOCK_WAIT_EX); - - } else if ((rw_lock_get_writer(lock) == RW_LOCK_EX) - && os_thread_eq(lock->writer_thread, - os_thread_get_curr_id()) - && (lock->pass == 0) - && (pass == 0)) { +/********************************************************************** +Low-level function for acquiring an exclusive lock. */ +UNIV_INLINE +ibool +rw_lock_x_lock_low( +/*===============*/ + /* out: RW_LOCK_NOT_LOCKED if did + not succeed, RW_LOCK_EX if success. */ + rw_lock_t* lock, /* in: pointer to rw-lock */ + ulint pass, /* in: pass value; != 0, if the lock will + be passed to another thread to unlock */ + const char* file_name,/* in: file name where lock requested */ + ulint line) /* in: line where requested */ +{ + os_thread_id_t curr_thread = os_thread_get_curr_id(); - lock->writer_count++; + if(rw_lock_lock_word_decr(lock, X_LOCK_DECR)) { + ut_ad(lock->pass); + /* Decrement occurred: we are writer or next-writer. */ + lock->writer_thread = curr_thread; + lock->pass = pass; + rw_lock_x_lock_wait(lock, #ifdef UNIV_SYNC_DEBUG - rw_lock_add_debug_info(lock, pass, RW_LOCK_EX, file_name, - line); + pass, #endif + file_name, line); - lock->last_x_file_name = file_name; - lock->last_x_line = (unsigned int) line; - - /* Locking succeeded, we may return */ - return(RW_LOCK_EX); + } else { + /* Decrement failed: relock or failed lock */ + /* Must verify pass first: otherwise another thread can + call move_ownership suddenly allowing recursive locks. + and after we have verified our thread_id matches + (though move_ownership has since changed it).*/ + if(!pass && !(lock->pass) && + os_thread_eq(lock->writer_thread, curr_thread)) { + /* Relock */ + lock->lock_word -= X_LOCK_DECR; + } else { + /* Another thread locked before us */ + return(FALSE); + } } +#ifdef UNIV_SYNC_DEBUG + rw_lock_add_debug_info(lock, pass, RW_LOCK_EX, + file_name, line); +#endif + lock->last_x_file_name = file_name; + lock->last_x_line = (unsigned int) line; - /* Locking did not succeed */ - return(RW_LOCK_NOT_LOCKED); + return(TRUE); } /********************************************************************** @@ -472,47 +564,30 @@ rw_lock_x_lock_func( ulint line) /* in: line where requested */ { ulint index; /* index of the reserved wait cell */ - ulint state; /* lock state acquired */ ulint i; /* spin round count */ + ibool spinning = FALSE; ut_ad(rw_lock_validate(lock)); -lock_loop: - /* Acquire the mutex protecting the rw-lock fields */ - mutex_enter_fast(&(lock->mutex)); - - state = rw_lock_x_lock_low(lock, pass, file_name, line); + i = 0; - mutex_exit(&(lock->mutex)); +lock_loop: - if (state == RW_LOCK_EX) { + if (rw_lock_x_lock_low(lock, pass, file_name, line)) { + rw_x_spin_round_count += i; return; /* Locking succeeded */ - } else if (state == RW_LOCK_NOT_LOCKED) { - - /* Spin waiting for the writer field to become free */ - i = 0; - - while (rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED - && i < SYNC_SPIN_ROUNDS) { - if (srv_spin_wait_delay) { - ut_delay(ut_rnd_interval(0, - srv_spin_wait_delay)); - } + } else { - i++; + if (!spinning) { + spinning = TRUE; + rw_x_spin_wait_count++; } - if (i == SYNC_SPIN_ROUNDS) { - os_thread_yield(); - } - } else if (state == RW_LOCK_WAIT_EX) { - - /* Spin waiting for the reader count field to become zero */ - i = 0; - while (rw_lock_get_reader_count(lock) != 0 - && i < SYNC_SPIN_ROUNDS) { + /* Spin waiting for the lock_word to become free */ + while (i < SYNC_SPIN_ROUNDS + && lock->lock_word <= 0) { if (srv_spin_wait_delay) { ut_delay(ut_rnd_interval(0, srv_spin_wait_delay)); @@ -522,12 +597,13 @@ lock_loop: } if (i == SYNC_SPIN_ROUNDS) { os_thread_yield(); + } else { + goto lock_loop; } - } else { - i = 0; /* Eliminate a compiler warning */ - ut_error; } + rw_x_spin_round_count += i; + if (srv_print_latch_waits) { fprintf(stderr, "Thread %lu spin wait rw-x-lock at %p" @@ -536,39 +612,20 @@ lock_loop: lock->cfile_name, (ulong) lock->cline, (ulong) i); } - rw_x_spin_wait_count++; - - /* We try once again to obtain the lock. Acquire the mutex protecting - the rw-lock fields */ - - mutex_enter(rw_lock_get_mutex(lock)); - - state = rw_lock_x_lock_low(lock, pass, file_name, line); - - if (state == RW_LOCK_EX) { - mutex_exit(rw_lock_get_mutex(lock)); - - return; /* Locking succeeded */ - } - - rw_x_system_call_count++; - sync_array_reserve_cell(sync_primary_wait_array, lock, -#ifdef __WIN__ - /* On windows RW_LOCK_WAIT_EX signifies - that this thread should wait on the - special wait_ex_event. */ - (state == RW_LOCK_WAIT_EX) - ? RW_LOCK_WAIT_EX : -#endif RW_LOCK_EX, file_name, line, &index); - rw_lock_set_waiters(lock, 1); + /* Waiters must be set before checking lock_word, to ensure signal + is sent. This could lead to a few unnecessary wake-up signals. */ + rw_lock_set_waiters(lock); - mutex_exit(rw_lock_get_mutex(lock)); + if (rw_lock_x_lock_low(lock, pass, file_name, line)) { + sync_array_free_cell(sync_primary_wait_array, index); + return; /* Locking succeeded */ + } if (srv_print_latch_waits) { fprintf(stderr, @@ -578,11 +635,13 @@ lock_loop: lock->cfile_name, (ulong) lock->cline); } - rw_x_system_call_count++; + /* these stats may not be accurate */ + lock->count_os_wait++; rw_x_os_wait_count++; sync_array_wait_event(sync_primary_wait_array, index); + i = 0; goto lock_loop; } @@ -730,7 +789,7 @@ rw_lock_own( ut_ad(lock); ut_ad(rw_lock_validate(lock)); - mutex_enter(&(lock->mutex)); + rw_lock_debug_mutex_enter(); info = UT_LIST_GET_FIRST(lock->debug_list); @@ -740,7 +799,7 @@ rw_lock_own( && (info->pass == 0) && (info->lock_type == lock_type)) { - mutex_exit(&(lock->mutex)); + rw_lock_debug_mutex_exit(); /* Found! */ return(TRUE); @@ -748,7 +807,7 @@ rw_lock_own( info = UT_LIST_GET_NEXT(list, info); } - mutex_exit(&(lock->mutex)); + rw_lock_debug_mutex_exit(); return(FALSE); } @@ -770,22 +829,18 @@ rw_lock_is_locked( ut_ad(lock); ut_ad(rw_lock_validate(lock)); - mutex_enter(&(lock->mutex)); - if (lock_type == RW_LOCK_SHARED) { - if (lock->reader_count > 0) { + if (rw_lock_get_reader_count(lock) > 0) { ret = TRUE; } } else if (lock_type == RW_LOCK_EX) { - if (lock->writer == RW_LOCK_EX) { + if (rw_lock_get_writer(lock) == RW_LOCK_EX) { ret = TRUE; } } else { ut_error; } - mutex_exit(&(lock->mutex)); - return(ret); } @@ -814,11 +869,10 @@ rw_lock_list_print_info( count++; +#ifndef UNIV_SYNC_ATOMIC mutex_enter(&(lock->mutex)); - - if ((rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED) - || (rw_lock_get_reader_count(lock) != 0) - || (rw_lock_get_waiters(lock) != 0)) { +#endif + if (lock->lock_word != X_LOCK_DECR) { fprintf(file, "RW-LOCK: %p ", (void*) lock); @@ -834,8 +888,10 @@ rw_lock_list_print_info( info = UT_LIST_GET_NEXT(list, info); } } - +#ifndef UNIV_SYNC_ATOMIC mutex_exit(&(lock->mutex)); +#endif + lock = UT_LIST_GET_NEXT(list, lock); } @@ -858,9 +914,10 @@ rw_lock_print( "RW-LATCH INFO\n" "RW-LATCH: %p ", (void*) lock); - if ((rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED) - || (rw_lock_get_reader_count(lock) != 0) - || (rw_lock_get_waiters(lock) != 0)) { +#ifndef UNIV_SYNC_ATOMIC + mutex_enter(&(lock->mutex)); +#endif + if (lock->lock_word != X_LOCK_DECR) { if (rw_lock_get_waiters(lock)) { fputs(" Waiters for the lock exist\n", stderr); @@ -874,6 +931,9 @@ rw_lock_print( info = UT_LIST_GET_NEXT(list, info); } } +#ifndef UNIV_SYNC_ATOMIC + mutex_exit(&(lock->mutex)); +#endif } /************************************************************************* @@ -922,14 +982,11 @@ rw_lock_n_locked(void) lock = UT_LIST_GET_FIRST(rw_lock_list); while (lock != NULL) { - mutex_enter(rw_lock_get_mutex(lock)); - if ((rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED) - || (rw_lock_get_reader_count(lock) != 0)) { + if (lock->lock_word != X_LOCK_DECR) { count++; } - mutex_exit(rw_lock_get_mutex(lock)); lock = UT_LIST_GET_NEXT(list, lock); } diff --git a/storage/innobase/sync/sync0sync.c b/storage/innobase/sync/sync0sync.c index 944fd2a97fc..a8b1ac4926e 100644 --- a/storage/innobase/sync/sync0sync.c +++ b/storage/innobase/sync/sync0sync.c @@ -138,18 +138,13 @@ Therefore, this thread is guaranteed to catch the os_set_event() signalled unconditionally at the release of the lock. Q.E.D. */ -/* The number of system calls made in this module. Intended for performance -monitoring. */ - -ulint mutex_system_call_count = 0; - /* Number of spin waits on mutexes: for performance monitoring */ /* round=one iteration of a spin loop */ -ulint mutex_spin_round_count = 0; -ulint mutex_spin_wait_count = 0; -ulint mutex_os_wait_count = 0; -ulint mutex_exit_count = 0; +ib_longlong mutex_spin_round_count = 0; +ib_longlong mutex_spin_wait_count = 0; +ib_longlong mutex_os_wait_count = 0; +ib_longlong mutex_exit_count = 0; /* The global array of wait cells for implementation of the database's own mutexes and read-write locks */ @@ -243,6 +238,8 @@ mutex_create_func( { #if defined(_WIN32) && defined(UNIV_CAN_USE_X86_ASSEMBLER) mutex_reset_lock_word(mutex); +#elif defined(MY_ATOMIC_NOLOCK) + mutex_reset_lock_word(mutex); #else os_fast_mutex_init(&(mutex->os_fast_mutex)); mutex->lock_word = 0; @@ -333,7 +330,9 @@ mutex_free( os_event_free(mutex->event); -#if !defined(_WIN32) || !defined(UNIV_CAN_USE_X86_ASSEMBLER) +#if defined(_WIN32) && defined(UNIV_CAN_USE_X86_ASSEMBLER) +#elif defined(MY_ATOMIC_NOLOCK) +#else os_fast_mutex_free(&(mutex->os_fast_mutex)); #endif /* If we free the mutex protecting the mutex list (freeing is @@ -450,6 +449,12 @@ mutex_spin_wait( #endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */ ut_ad(mutex); + /* This update is not thread safe, but we don't mind if the count + isn't exact. Moved out of ifdef that follows because we are willing + to sacrifice the cost of counting this as the data is valuable. + Count the number of calls to mutex_spin_wait. */ + mutex_spin_wait_count++; + mutex_loop: i = 0; @@ -462,7 +467,6 @@ mutex_loop: spin_loop: #if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP - mutex_spin_wait_count++; mutex->count_spin_loop++; #endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */ @@ -527,8 +531,6 @@ spin_loop: sync_array_reserve_cell(sync_primary_wait_array, mutex, SYNC_MUTEX, file_name, line, &index); - mutex_system_call_count++; - /* The memory order of the array reservation and the change in the waiters field is important: when we suspend a thread, we first reserve the cell and then set waiters field to 1. When threads are @@ -575,7 +577,6 @@ spin_loop: mutex->cfile_name, (ulong) mutex->cline, (ulong) i); #endif - mutex_system_call_count++; mutex_os_wait_count++; #ifndef UNIV_HOTBACKUP @@ -1377,21 +1378,31 @@ sync_print_wait_info( FILE* file) /* in: file where to print */ { #ifdef UNIV_SYNC_DEBUG - fprintf(file, "Mutex exits %lu, rws exits %lu, rwx exits %lu\n", + fprintf(file, "Mutex exits %llu, rws exits %llu, rwx exits %llu\n", mutex_exit_count, rw_s_exit_count, rw_x_exit_count); #endif fprintf(file, - "Mutex spin waits %lu, rounds %lu, OS waits %lu\n" - "RW-shared spins %lu, OS waits %lu;" - " RW-excl spins %lu, OS waits %lu\n", - (ulong) mutex_spin_wait_count, - (ulong) mutex_spin_round_count, - (ulong) mutex_os_wait_count, - (ulong) rw_s_spin_wait_count, - (ulong) rw_s_os_wait_count, - (ulong) rw_x_spin_wait_count, - (ulong) rw_x_os_wait_count); + "Mutex spin waits %llu, rounds %llu, OS waits %llu\n" + "RW-shared spins %llu, OS waits %llu;" + " RW-excl spins %llu, OS waits %llu\n", + mutex_spin_wait_count, + mutex_spin_round_count, + mutex_os_wait_count, + rw_s_spin_wait_count, + rw_s_os_wait_count, + rw_x_spin_wait_count, + rw_x_os_wait_count); + + fprintf(file, + "Spin rounds per wait: %.2f mutex, %.2f RW-shared, " + "%.2f RW-excl\n", + (double) mutex_spin_round_count / + (mutex_spin_wait_count ? mutex_spin_wait_count : 1), + (double) rw_s_spin_round_count / + (rw_s_spin_wait_count ? rw_s_spin_wait_count : 1), + (double) rw_x_spin_round_count / + (rw_x_spin_wait_count ? rw_x_spin_wait_count : 1)); } /*********************************************************************** diff --git a/storage/innobase/ut/ut0ut.c b/storage/innobase/ut/ut0ut.c index 6b5bcef1830..1ae43172894 100644 --- a/storage/innobase/ut/ut0ut.c +++ b/storage/innobase/ut/ut0ut.c @@ -154,6 +154,23 @@ ut_usectime( } /************************************************************** +Returns diff in microseconds (end_sec,end_ms) - (start_sec,start_ms) */ + +ib_longlong +ut_usecdiff( +/*========*/ + ulint end_sec, /* in: seconds since the Epoch */ + ulint end_ms, /* in: microseconds since the Epoch+*sec1 */ + ulint start_sec, /* in: seconds since the Epoch */ + ulint start_ms) /* in: microseconds since the Epoch+*sec2 */ +{ + ib_longlong end_mics = end_sec * 1000000LL + end_ms; + ib_longlong start_mics = start_sec * 1000000LL + start_ms; + + return end_mics - start_mics; +} + +/************************************************************** Returns the difference of two times in seconds. */ double @@ -348,6 +365,7 @@ ut_delay( j = 0; for (i = 0; i < delay * 50; i++) { + PAUSE_INSTRUCTION(); j += i; } diff --git a/storage/myisam/Makefile.am b/storage/myisam/Makefile.am index f50c312b8e4..797d7523301 100644 --- a/storage/myisam/Makefile.am +++ b/storage/myisam/Makefile.am @@ -150,5 +150,20 @@ SUFFIXES = .sh @CHMOD@ +x $@-t @MV@ $@-t $@ +if HAVE_DTRACE_DASH_G +libmyisam_a_LIBADD = probes_mysql.o +libmyisam_a_DEPENDENCIES = probes_mysql.o dtrace_files dtrace_providers +CLEANFILES += probes_mysql.o dtrace_files dtrace_providers +DTRACEFILES = ha_myisam.o +DTRACEPROVIDER = $(abs_top_srcdir)/sql/probes_mysql.d + +dtrace_files: + echo $(DTRACEFILES) > $@ +dtrace_providers: + echo $(DTRACEPROVIDER) > $@ +probes_mysql.o: $(DTRACEPROVIDER) $(DTRACEFILES) + $(DTRACE) $(DTRACEFLAGS) -G -s $< $(DTRACEFILES) -o $@ +endif + # Don't update the files from bitkeeper %::SCCS/s.% diff --git a/storage/myisam/ha_myisam.cc b/storage/myisam/ha_myisam.cc index 4073044bf63..c6411a6a273 100644 --- a/storage/myisam/ha_myisam.cc +++ b/storage/myisam/ha_myisam.cc @@ -20,6 +20,7 @@ #define MYSQL_SERVER 1 #include "mysql_priv.h" +#include "probes_mysql.h" #include <mysql/plugin.h> #include <m_ctype.h> #include <my_bit.h> @@ -1081,6 +1082,9 @@ int ha_myisam::repair(THD *thd, MI_CHECK ¶m, bool do_optimize) param.out_flag= 0; strmov(fixed_name,file->filename); + // Release latches since this can take a long time + ha_release_temporary_latches(thd); + // Don't lock tables if we have used LOCK TABLE if (!thd->locked_tables && mi_lock_database(file, table->s->tmp_table ? F_EXTRA_LCK : F_WRLCK)) @@ -1602,10 +1606,12 @@ int ha_myisam::index_read_map(uchar *buf, const uchar *key, key_part_map keypart_map, enum ha_rkey_function find_flag) { + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); DBUG_ASSERT(inited==INDEX); ha_statistic_increment(&SSV::ha_read_key_count); int error=mi_rkey(file, buf, active_index, key, keypart_map, find_flag); table->status=error ? STATUS_NOT_FOUND: 0; + MYSQL_INDEX_READ_ROW_DONE(error); return error; } @@ -1613,57 +1619,69 @@ int ha_myisam::index_read_idx_map(uchar *buf, uint index, const uchar *key, key_part_map keypart_map, enum ha_rkey_function find_flag) { + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); ha_statistic_increment(&SSV::ha_read_key_count); int error=mi_rkey(file, buf, index, key, keypart_map, find_flag); table->status=error ? STATUS_NOT_FOUND: 0; + MYSQL_INDEX_READ_ROW_DONE(error); return error; } int ha_myisam::index_read_last_map(uchar *buf, const uchar *key, key_part_map keypart_map) { + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); DBUG_ENTER("ha_myisam::index_read_last"); DBUG_ASSERT(inited==INDEX); ha_statistic_increment(&SSV::ha_read_key_count); int error=mi_rkey(file, buf, active_index, key, keypart_map, HA_READ_PREFIX_LAST); table->status=error ? STATUS_NOT_FOUND: 0; + MYSQL_INDEX_READ_ROW_DONE(error); DBUG_RETURN(error); } int ha_myisam::index_next(uchar *buf) { + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); DBUG_ASSERT(inited==INDEX); ha_statistic_increment(&SSV::ha_read_next_count); int error=mi_rnext(file,buf,active_index); table->status=error ? STATUS_NOT_FOUND: 0; + MYSQL_INDEX_READ_ROW_DONE(error); return error; } int ha_myisam::index_prev(uchar *buf) { + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); DBUG_ASSERT(inited==INDEX); ha_statistic_increment(&SSV::ha_read_prev_count); int error=mi_rprev(file,buf, active_index); table->status=error ? STATUS_NOT_FOUND: 0; + MYSQL_INDEX_READ_ROW_DONE(error); return error; } int ha_myisam::index_first(uchar *buf) { + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); DBUG_ASSERT(inited==INDEX); ha_statistic_increment(&SSV::ha_read_first_count); int error=mi_rfirst(file, buf, active_index); table->status=error ? STATUS_NOT_FOUND: 0; + MYSQL_INDEX_READ_ROW_DONE(error); return error; } int ha_myisam::index_last(uchar *buf) { + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); DBUG_ASSERT(inited==INDEX); ha_statistic_increment(&SSV::ha_read_last_count); int error=mi_rlast(file, buf, active_index); table->status=error ? STATUS_NOT_FOUND: 0; + MYSQL_INDEX_READ_ROW_DONE(error); return error; } @@ -1673,12 +1691,14 @@ int ha_myisam::index_next_same(uchar *buf, { int error; DBUG_ASSERT(inited==INDEX); + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); ha_statistic_increment(&SSV::ha_read_next_count); do { error= mi_rnext_same(file,buf); } while (error == HA_ERR_RECORD_DELETED); table->status=error ? STATUS_NOT_FOUND: 0; + MYSQL_INDEX_READ_ROW_DONE(error); return error; } @@ -1692,9 +1712,12 @@ int ha_myisam::rnd_init(bool scan) int ha_myisam::rnd_next(uchar *buf) { + MYSQL_READ_ROW_START(table_share->db.str, table_share->table_name.str, + TRUE); ha_statistic_increment(&SSV::ha_read_rnd_next_count); int error=mi_scan(file, buf); table->status=error ? STATUS_NOT_FOUND: 0; + MYSQL_READ_ROW_DONE(error); return error; } @@ -1705,9 +1728,12 @@ int ha_myisam::restart_rnd_next(uchar *buf, uchar *pos) int ha_myisam::rnd_pos(uchar *buf, uchar *pos) { + MYSQL_READ_ROW_START(table_share->db.str, table_share->table_name.str, + FALSE); ha_statistic_increment(&SSV::ha_read_rnd_count); int error=mi_rrnd(file, buf, my_get_ptr(pos,ref_length)); table->status=error ? STATUS_NOT_FOUND: 0; + MYSQL_READ_ROW_DONE(error); return error; } diff --git a/storage/myisammrg/Makefile.am b/storage/myisammrg/Makefile.am index 1ca51bc9d03..2ae808c2011 100644 --- a/storage/myisammrg/Makefile.am +++ b/storage/myisammrg/Makefile.am @@ -40,5 +40,20 @@ libmyisammrg_a_SOURCES = myrg_open.c myrg_extra.c myrg_info.c myrg_locking.c \ EXTRA_DIST = CMakeLists.txt plug.in +if HAVE_DTRACE_DASH_G +libmyisammrg_a_LIBADD = probes_mysql.o +libmyisammrg_a_DEPENDENCIES = probes_mysql.o dtrace_files dtrace_providers +CLEANFILES = probes_mysql.o dtrace_files dtrace_providers +DTRACEFILES = ha_myisammrg.o +DTRACEPROVIDER = $(abs_top_srcdir)/sql/probes_mysql.d + +dtrace_files: + echo $(DTRACEFILES) > $@ +dtrace_providers: + echo $(DTRACEPROVIDER) > $@ +probes_mysql.o: $(DTRACEPROVIDER) $(DTRACEFILES) + $(DTRACE) $(DTRACEFLAGS) -G -s $< $(DTRACEFILES) -o $@ +endif + # Don't update the files from bitkeeper %::SCCS/s.% diff --git a/storage/myisammrg/ha_myisammrg.cc b/storage/myisammrg/ha_myisammrg.cc index 956f0e421cc..999dcf00b93 100644 --- a/storage/myisammrg/ha_myisammrg.cc +++ b/storage/myisammrg/ha_myisammrg.cc @@ -96,6 +96,7 @@ #define MYSQL_SERVER 1 #include "mysql_priv.h" +#include "probes_mysql.h" #include <mysql/plugin.h> #include <m_ctype.h> #include "../myisam/ha_myisam.h" @@ -662,9 +663,11 @@ int ha_myisammrg::index_read_map(uchar * buf, const uchar * key, enum ha_rkey_function find_flag) { DBUG_ASSERT(this->file->children_attached); + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); ha_statistic_increment(&SSV::ha_read_key_count); int error=myrg_rkey(file,buf,active_index, key, keypart_map, find_flag); table->status=error ? STATUS_NOT_FOUND: 0; + MYSQL_INDEX_READ_ROW_DONE(error); return error; } @@ -673,9 +676,11 @@ int ha_myisammrg::index_read_idx_map(uchar * buf, uint index, const uchar * key, enum ha_rkey_function find_flag) { DBUG_ASSERT(this->file->children_attached); + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); ha_statistic_increment(&SSV::ha_read_key_count); int error=myrg_rkey(file,buf,index, key, keypart_map, find_flag); table->status=error ? STATUS_NOT_FOUND: 0; + MYSQL_INDEX_READ_ROW_DONE(error); return error; } @@ -683,46 +688,56 @@ int ha_myisammrg::index_read_last_map(uchar *buf, const uchar *key, key_part_map keypart_map) { DBUG_ASSERT(this->file->children_attached); + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); ha_statistic_increment(&SSV::ha_read_key_count); int error=myrg_rkey(file,buf,active_index, key, keypart_map, HA_READ_PREFIX_LAST); table->status=error ? STATUS_NOT_FOUND: 0; + MYSQL_INDEX_READ_ROW_DONE(error); return error; } int ha_myisammrg::index_next(uchar * buf) { DBUG_ASSERT(this->file->children_attached); + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); ha_statistic_increment(&SSV::ha_read_next_count); int error=myrg_rnext(file,buf,active_index); table->status=error ? STATUS_NOT_FOUND: 0; + MYSQL_INDEX_READ_ROW_DONE(error); return error; } int ha_myisammrg::index_prev(uchar * buf) { DBUG_ASSERT(this->file->children_attached); + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); ha_statistic_increment(&SSV::ha_read_prev_count); int error=myrg_rprev(file,buf, active_index); table->status=error ? STATUS_NOT_FOUND: 0; + MYSQL_INDEX_READ_ROW_DONE(error); return error; } int ha_myisammrg::index_first(uchar * buf) { DBUG_ASSERT(this->file->children_attached); + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); ha_statistic_increment(&SSV::ha_read_first_count); int error=myrg_rfirst(file, buf, active_index); table->status=error ? STATUS_NOT_FOUND: 0; + MYSQL_INDEX_READ_ROW_DONE(error); return error; } int ha_myisammrg::index_last(uchar * buf) { DBUG_ASSERT(this->file->children_attached); + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); ha_statistic_increment(&SSV::ha_read_last_count); int error=myrg_rlast(file, buf, active_index); table->status=error ? STATUS_NOT_FOUND: 0; + MYSQL_INDEX_READ_ROW_DONE(error); return error; } @@ -732,12 +747,14 @@ int ha_myisammrg::index_next_same(uchar * buf, { int error; DBUG_ASSERT(this->file->children_attached); + MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); ha_statistic_increment(&SSV::ha_read_next_count); do { error= myrg_rnext_same(file,buf); } while (error == HA_ERR_RECORD_DELETED); table->status=error ? STATUS_NOT_FOUND: 0; + MYSQL_INDEX_READ_ROW_DONE(error); return error; } @@ -752,9 +769,12 @@ int ha_myisammrg::rnd_init(bool scan) int ha_myisammrg::rnd_next(uchar *buf) { DBUG_ASSERT(this->file->children_attached); + MYSQL_READ_ROW_START(table_share->db.str, table_share->table_name.str, + TRUE); ha_statistic_increment(&SSV::ha_read_rnd_next_count); int error=myrg_rrnd(file, buf, HA_OFFSET_ERROR); table->status=error ? STATUS_NOT_FOUND: 0; + MYSQL_READ_ROW_DONE(error); return error; } @@ -762,9 +782,12 @@ int ha_myisammrg::rnd_next(uchar *buf) int ha_myisammrg::rnd_pos(uchar * buf, uchar *pos) { DBUG_ASSERT(this->file->children_attached); + MYSQL_READ_ROW_START(table_share->db.str, table_share->table_name.str, + TRUE); ha_statistic_increment(&SSV::ha_read_rnd_count); int error=myrg_rrnd(file, buf, my_get_ptr(pos,ref_length)); table->status=error ? STATUS_NOT_FOUND: 0; + MYSQL_READ_ROW_DONE(error); return error; } diff --git a/storage/ndb/include/portlib/prefetch.h b/storage/ndb/include/portlib/prefetch.h index f098c2ba6c0..fc4670115da 100644 --- a/storage/ndb/include/portlib/prefetch.h +++ b/storage/ndb/include/portlib/prefetch.h @@ -43,7 +43,7 @@ inline void prefetch(void* p) __asm(" ldl r31,0(a0);", p); #endif /* NDB_ALPHA */ #ifdef NDB_FORTE6 - sparc_prefetch_read_once(p); + sun_prefetch_read_once(p); #else (void)p; #endif @@ -55,7 +55,7 @@ inline void writehint(void* p) __asm(" wh64 (a0);", p); #endif /* NDB_ALPHA */ #ifdef NDB_FORTE6 - sparc_prefetch_write_once(p); + sun_prefetch_write_once(p); #else (void)p; #endif |