diff options
author | unknown <monty@bitch.mysql.fi> | 2001-10-11 19:55:38 +0300 |
---|---|---|
committer | unknown <monty@bitch.mysql.fi> | 2001-10-11 19:55:38 +0300 |
commit | 35b11f8345235f7232ba6256abdfa270b9d30f92 (patch) | |
tree | e7beca27a77a2c8731d8db0ffdb07f5e950ba4b3 | |
parent | a77ea9107793f57d3369e4f17205ab4b02537a43 (diff) | |
parent | dd94eb03890805bcf0679f42f3cd62d247c2538f (diff) | |
download | mariadb-git-35b11f8345235f7232ba6256abdfa270b9d30f92.tar.gz |
Merge hundin:/my/mysql-4.0 into bitch.mysql.fi:/my/mysql-4.0
BitKeeper/etc/ignore:
auto-union
144 files changed, 5766 insertions, 6641 deletions
diff --git a/.bzrignore b/.bzrignore index 2422a369b5d..e903e37c866 100644 --- a/.bzrignore +++ b/.bzrignore @@ -255,6 +255,7 @@ libmysqld/opt_sum.cc libmysqld/password.c libmysqld/procedure.cc libmysqld/records.cc +libmysqld/repl_failsafe.cc libmysqld/simple-test libmysqld/slave.cc libmysqld/sql_acl.cc diff --git a/BUILD/compile-pentium-pgcc b/BUILD/compile-pentium-pgcc index ee70fd2bde0..2d806009b21 100755 --- a/BUILD/compile-pentium-pgcc +++ b/BUILD/compile-pentium-pgcc @@ -1,10 +1,21 @@ AM_MAKEFLAGS="-j 2" -make -k clean +gmake -k clean || true /bin/rm -f */.deps/*.P config.cache - -aclocal; autoheader; aclocal; automake; autoconf - + +aclocal && autoheader && aclocal && automake && autoconf +(cd bdb/dist && sh s_all) +(cd innobase && aclocal && autoheader && aclocal && automake && autoconf) +if [ -d gemini ] +then + (cd gemini && aclocal && autoheader && aclocal && automake && autoconf) +fi + export PATH=/usr/local/pgcc/bin:$PATH -CFLAGS="-O6 -mpentiumpro -fomit-frame-pointer -mstack-align-double" CXX=gcc CXXFLAGS="-O6 -mpentiumpro -fomit-frame-pointer -felide-constructors -fno-exceptions -fno-rtti -mstack-align-double" ./configure --prefix=/usr/local/mysql --enable-assembler --with-mysqld-ldflags=-all-static --disable-shared --with-extra-charsets=complex --enable-thread-safe-client -make -j 2 +CFLAGS="-Wimplicit -Wreturn-type -Wid-clash-51 -Wswitch -Wtrigraphs -Wcomment -W -Wchar-subscripts -Wformat -Wimplicit-function-dec -Wimplicit-int -Wparentheses -Wsign-compare -Wwrite-strings -Wunused -O6 -mpentiumpro -fomit-frame-pointer -mstack-align-double" CXX=gcc CXXFLAGS="-Wimplicit -Wreturn-type -Wid-clash-51 -Wswitch -Wtrigraphs -Wcomment -W -Wchar-subscripts -Wformat -Wimplicit-function-dec -Wimplicit-int -Wparentheses -Wsign-compare -Wwrite-strings -Woverloaded-virtual -Wextern-inline -Wsign-promo -Wreorder -Wctor-dtor-privacy -Wnon-virtual-dtor -felide-constructors -fno-exceptions -fno-rtti -O6 -fomit-frame-pointer -mpentiumpro -mstack-align-double" ./configure --prefix=/usr/local/mysql --enable-assembler --with-extra-charsets=complex --enable-thread-safe-client --with-mysqld-ldflags=-all-static --with-client-ldflags=-all-static + +gmake -j 4 + +mkdir -p tmp +nm --numeric-sort sql/mysqld > tmp/mysqld.sym +objdump -d sql/mysqld > tmp/mysqld.S strip sql/mysqld diff --git a/Build-tools/Do-compile b/Build-tools/Do-compile index 9409e3752df..10d97dfbe82 100755 --- a/Build-tools/Do-compile +++ b/Build-tools/Do-compile @@ -19,7 +19,7 @@ if ($opt_innodb || $opt_bdb) chomp($host=`hostname`); $full_host_name=$host; -info("Compiling MySQL$version_suffix at $host$suffix, stage: $opt_stage\n"); +info("Compiling MySQL$version_suffix at $host, stage: $opt_stage\n"); $connect_option= ($opt_tcpip ? "--host=$host" : ""); $host =~ /^([^.-]*)/; $host=$1 . $opt_suffix; @@ -65,10 +65,11 @@ $sendmail=find("/usr/lib/sendmail","/usr/sbin/sendmail"); $sur= $opt_sur ? "/my/local/bin/sur" : ""; delete $ENV{'MYSQL_PWD'}; # Reset possibly password delete $ENV{'MY_BASEDIR_VERSION'}; -$ENV{'MYSQL_TCP_PORT'}= $mysql_tcp_port= 3334 + $opt_build_thread; +$ENV{'MYSQL_TCP_PORT'}= $mysql_tcp_port= 3334 + $opt_build_thread*2; $ENV{'MYSQL_UNIX_PORT'}=$mysql_unix_port="$opt_tmp/mysql$opt_suffix.build"; $ENV{"PERL5LIB"}="$pwd/$host/perl5:$pwd/$host/perl5/site_perl"; $slave_port=$mysql_tcp_port+16; +$manager_port=$mysql_tcp_port+1; if (-x "$host/bin/mysqladmin") { @@ -78,6 +79,7 @@ if (-x "$host/bin/mysqladmin") log_system("$host/bin/mysqladmin --no-defaults -u root -P 9306 -h $host -s shutdown"); log_system("$host/bin/mysqladmin --no-defaults -u root -P 9307 -h $host -s shutdown"); } +kill_all("mysqlmanager"); if ($opt_stage == 0) { @@ -110,7 +112,7 @@ if ($opt_stage == 0 && ! $opt_use_old_distribution) # Fix file times; This is needed because the time for files may be # in the future - system("touch timestamp; find $var -newer timestamp -print | xargs touch; rm -f timestamp"); + system("touch timestamp; find . -newer timestamp -print | xargs touch; rm -f timestamp"); sleep(2); # Ensure that files we don't want to rebuild are newer than other files foreach $name ("configure", @@ -151,7 +153,7 @@ if ($opt_stage <= 1) { $opt_config_options.= " --with-innodb" } - check_system("$opt_config_env ./configure --prefix=/usr/local/mysql \"--with-comment=Official MySQL$version_suffix binary\" --with-extra-charsets=complex \"--with-server-suffix=$version_suffix\" $opt_config_options","Thank you for choosing MySQL"); + check_system("$opt_config_env ./configure --prefix=/usr/local/mysql \"--with-comment=Official MySQL$version_suffix binary\" --with-extra-charsets=complex \"--with-server-suffix=$version_suffix\" --enable-thread-safe-client $opt_config_options","Thank you for choosing MySQL"); if (-d "$pwd/$host/include-mysql") { safe_system("cp -r $pwd/$host/include-mysql/* $pwd/$host/$ver/include"); @@ -207,13 +209,13 @@ if ($opt_stage <= 4 && !$opt_no_test) $tar_file =~ /(mysql-[^\/]*)\.tar/; $ver=$1; $test_dir="$pwd/$host/test/$ver"; -$ENV{"LD_LIBRARY_PATH"}= "$testdir/lib:" . $ENV{"LD_LIBRARY_PATH"}; +$ENV{"LD_LIBRARY_PATH"}= "$test_dir/lib:" . $ENV{"LD_LIBRARY_PATH"}; if ($opt_stage <= 5 && !$opt_no_test && !$opt_no_mysqltest) { system("mkdir $bench_tmpdir") if (! -d $bench_tmpdir); safe_cd("${test_dir}/mysql-test"); - check_system("./mysql-test-run --tmpdir=$bench_tmpdir --master_port=$mysql_tcp_port --slave_port=$slave_port --sleep=10", "tests were successful"); + check_system("./mysql-test-run --tmpdir=$bench_tmpdir --master_port=$mysql_tcp_port --slave_port=$slave_port --manager-port=$manager_port --sleep=10", "tests were successful"); } # Start the server if we are going to run any of the benchmarks @@ -235,7 +237,7 @@ if (!$opt_no_test) { $extra.=" --innodb_data_file_path=ibdata1:100M"; } - safe_system("./bin/mysqld --no-defaults --basedir . --datadir ./data --skip-l\ocking $extra >> $log 2>&1 &"); + safe_system("./bin/mysqld --no-defaults --basedir . --datadir ./data --skip-locking $extra >> $log 2>&1 &"); sleep(2); } @@ -315,7 +317,7 @@ exit 0; sub usage { print <<EOF; -$0 version 1.2 +$0 version 1.4 $0 takes the following options: @@ -329,7 +331,7 @@ Compile with support for Innodb tables Compile with support for Berkeley DB tables --user 'user_name' -Mail 'user_name'\@analytikerna.se if something went wrong. +Mail 'user_name'\@mysql.com if something went wrong. If user is empty then no mail is sent. --distribution 'distribution_file' @@ -528,3 +530,44 @@ sub rm_all } } } + +sub kill_all +{ + my ($pattern) = @_; + my ($USER,$BSD,$LINUX,$pscmd, $user, $pid); + $user=$ENV{'USER'}; + $BSD = -f '/vmunix' || $ENV{"OS"} eq "SunOS4"; + $LINUX = $^O eq 'linux'; + $pscmd = $BSD ? "/bin/ps -auxww" : $LINUX ? "/bin/ps axuw" : "/bin/ps -ef"; + + open(PS, "$pscmd|") || die "can't run $pscmd: $!"; + + # Catch any errors with eval. A bad pattern, for instance. + process: + while ($cand = <PS>) + { + chop($cand); + ($pid_user, $pid) = split(' ', $cand); + next if $pid == $$; + next process if (! ($cand =~ $pattern) || $pid_user ne $user); + print LOG "Killing $_\n"; + &killpid($pid); + } +} + +sub killpid +{ + local($pid) = @_; + kill 15, $pid; + for (1..5) + { + sleep 2; + return if kill(0, $pid) == 0; + } + kill 9, $pid; + for (1..5) { + sleep 2; + return if kill(0, $pid) == 0; + } + print LOG "$pid will not die!\n"; +} diff --git a/Build-tools/Do-rpm b/Build-tools/Do-rpm index 046ba93a1dd..6ce8c9c3e45 100755 --- a/Build-tools/Do-rpm +++ b/Build-tools/Do-rpm @@ -5,6 +5,7 @@ function copy_to_bmachine { if [ x$local_build = x1 ]; then + rm -f $2 cp $1 $2 else scp $1 $owner@$bmachine:$2 @@ -14,6 +15,7 @@ function copy_to_bmachine function copy_from_bmachine { if [ x$local_build = x1 ]; then + rm -f $2 cp $1 $2 else scp $owner@$bmachine:$1 $2 @@ -90,10 +92,7 @@ while test $# -gt 0; do done echo "Removing old MySQL packages" -rm -rf $rpmdir/BUILD/mysql-* -rm -f $rpmdir/SOURCES/mysql-* -rm -f $rpmdir/SRPMS/MySQL-* -rm -f $rpmdir/SPEC/mysql-* +rm -f $bpath/NEW-RPMS/MySQL-*rpm if [ ! -d "$logdir" ]; then echo "$logdir does not exist, creating" @@ -105,12 +104,20 @@ if [ ! -f "$tarball" ]; then exit 1 fi -echo "Building RPM for MySQL version $VER on $bmachine" - log=$logdir/Log-RPM-`date +%y%m%d-%H%M` +echo "Building RPM for MySQL version $VER on $bmachine" +echo "Details in $log" + ( set -x +# remove old stuff +rm -rf $rpmdir/BUILD/mysql-* +rm -f $rpmdir/SOURCES/mysql-* +rm -f $rpmdir/SRPMS/MySQL-* +rm -f $rpmdir/SPECS/mysql-* +rm -rf /var/tmp/mysql + # Copy MySQL source and spec files #Sasha: I left the scp stuff commented out instead of deleted to make it @@ -156,7 +163,7 @@ if [ ! x$skip_perl=x1 ]; then set -x # First clean up so we do not get old versions when wildcard matching - rm -f $rpmdir/SOURCES/DBI-*.spec + rm -f $rpmdir/SOURCES/DBI-*.spec $rpmdir/SOURCES/mysql* rm -f $rpmdir/RPMS/i386/Perl-*.rpm rm -f $rpmdir/SRPMS/Perl-*.rpm rm -f $rpmdir/RPMS/i386/MySQL*-$VER_NO_DASH*.rpm @@ -226,5 +233,9 @@ if [ x$local_build != x1 ]; then # And the perl ones #scp $owner@$bmachine:$rpmdir/RPMS/i386/Perl*-*.rpm $bpath/NEW-RPMS #scp $owner@$bmachine:$rpmdir/SRPMS/Perl*-*.rpm $bpath/NEW-RPMS + + #Remove some of the files that can interfere with future builds + + rm -rf /var/tmp/mysql fi ) > $log 2>&1 diff --git a/Docs/internals.texi b/Docs/internals.texi index 2195b42d9a0..f255a7ab6a6 100644 --- a/Docs/internals.texi +++ b/Docs/internals.texi @@ -481,6 +481,27 @@ Functions i mysys: (For flags se my_sys.h) void end_key_cache _A((void)); - End key-cacheing. +@node DBUG,,, +@chapter The DBUG tags to use: + +Here is some of the tags we now use: +(We should probably add a couple of new ones) + +"enter" Arguments to the function. +"exit" Results from the function. +"info" is something that may be interesting. +"warning" is when something doesn't go the usual route or may be wrong. +"error" when something went wrong. +"loop" write in a loop, that is probably only useful when debugging + the loop. These should normally be deleted when on is + satisfied with the code and it has been in real use for a while. + +Some specific to mysqld, because we want to watch these carefully: + +"trans" Starting/stopping transactions. +"quit" 'info' when mysqld is preparing to die. +"query" Print query + @node protocol,,, @chapter MySQL client/server protocol diff --git a/Docs/manual.texi b/Docs/manual.texi index f9f12d00d00..951c57931ba 100644 --- a/Docs/manual.texi +++ b/Docs/manual.texi @@ -3532,12 +3532,18 @@ an application when you delete records from a table that has a foreign key. In practice this is as quick (in some cases quicker) and much more portable than using foreign keys. +In MySQL 4.0 you can use multi-table delete to delete rows from many +tables with one command. @xref{DELETE}. + In the near future we will extend the @code{FOREIGN KEY} implementation so that at least the information will be saved in the table specification file and may be retrieved by @code{mysqldump} and ODBC. At a later stage we will implement the foreign key constraints for application that can't easily be coded to avoid them. +MySQL 3.23.44 and forwards, InnoDB tables supports checking of foreign +key constraints. @xref{InnoDB}. + @menu * Broken Foreign KEY:: Reasons NOT to use foreign keys constraints @end menu @@ -4033,8 +4039,13 @@ If the date is totally wrong, MySQL will store the special 0000-00-00 date value in the column. @item -If you set an @code{enum} to an unsupported value, it will be set to +If you set an @code{ENUM} column to an unsupported value, it will be set to the error value 'empty string', with numeric value 0. + +@item +If you set an @code{SET} column to an unsupported value, the value will +be ignored. @xref{Bugs}. + @end itemize @item @@ -4935,6 +4946,18 @@ Standard usage in PostgreSQL is closer to ANSI SQL in some cases. One can speed up PostgreSQL by coding things as stored procedures. @item +For geographical data, R-TREES makes PostgreSQL better than MySQL. + +@item +The PostgreSQL optimizer can do some optimization that the current MySQL +optimizer can't do. Most notable is doing joins when you don't have the +proper keys in place and doing a join where you are using different keys +combined with OR. The MySQL benchmark suite at +@uref{http://www.mysql.com/information/benchmarks.html} shows you what +kind of constructs you should watch out for when using different +databases. + +@item PostgreSQL has a bigger team of developers that contribute to the server. @end itemize @@ -7606,6 +7629,10 @@ warnings about @file{mysql.cc}. (If @code{make} stops, try @code{make -k} to tell it to continue with the rest of the build even if errors occur.) @item +If you want to get a embedded MySQL library (@code{libmysqld.a}) you should +use the @code{--with-embedded-server} option. + +@item If you don't want your log files and database directories located under @file{/usr/local/var}, use a @code{configure} command, something like one of these: @@ -7621,7 +7648,8 @@ installed under @file{/usr/local/mysql} rather than the default of @file{/usr/local}. The second command preserves the default installation prefix, but overrides the default location for database directories (normally @file{/usr/local/var}) and changes it to -@code{/usr/local/mysql/data}. +@code{/usr/local/mysql/data}. After you have compiled MySQL, you can +change these options with option files. @xref{Option files}. @cindex changing socket location @cindex socket location, changing @@ -24201,17 +24229,11 @@ for most systems, but one should be aware of it. @cindex design, limitations @cindex limitations, design -Because MySQL uses extremely fast table locking (multiple readers / -single writers) the biggest remaining problem is a mix of a steady stream of -inserts and slow selects on the same table. - -We believe that for a huge number of systems the extremely fast -performance in other cases make this choice a win. This case is usually -also possible to solve by having multiple copies of the table, but it -takes more effort and hardware. - -We are also working on some extensions to solve this problem for some -common application niches. +When using the MyISAM table handler, MySQL uses extremely fast table +locking (multiple readers / single writers). The biggest problem with +this table type is a if you have a mix of a steady stream of updates and +slow selects on the same table. If this is a problem with some tables, +you can use another table type for these. @xref{Table types}. MySQL can work with both transactional and not transactional tables. To be able to work smoothly with not transactional tables (which can't @@ -29147,6 +29169,9 @@ specified at table creation time. For example, if a column is specified as @code{SET("a","b","c","d")}, then @code{"a,d"}, @code{"d,a"}, and @code{"d,a,a,d,d"} will all appear as @code{"a,d"} when retrieved. +If you set a @code{SET} column to an unsupported value, the value will +be ignored. + @code{SET} values are sorted numerically. @code{NULL} values sort before non-@code{NULL} @code{SET} values. @@ -33791,8 +33816,10 @@ column in a table, the default value is the current date and time. @xref{Date and time types}. @item -For string types other than @code{ENUM}, the default value is the empty string. -For @code{ENUM}, the default is the first enumeration value. +For string types other than @code{ENUM}, the default value is the empty +string. For @code{ENUM}, the default is the first enumeration value (if +you haven't explicitely specified another default value with the +@code{DEFAULT} directive). @end itemize Default values must be constants. This means, for example, that you cannot @@ -42350,6 +42377,9 @@ library. @xref{mysql_server_init}. @node libmysqld compiling, libmysqld restrictions, libmysqld overview, libmysqld @subsubsection Compiling Programs with @code{libmysqld} +To get a @code{libmysqld} library you should configure MySQL with the +@code{--with-embedded-server} option. + When you link your program with @code{libmysqld}, you must also include the system specific @code{pthread} libraries and some libraries that @code{mysqld} uses. You can get the full list of libraries by executing @@ -47361,6 +47391,8 @@ Added documentation for @code{libmysqld}, the embedded MySQL server library. Also added example programs (a @code{mysql} client and @code{mysqltest} test program) which use @code{libmysqld}. @item +Removed all Gemini hooks from MySQL. +@item Removed @code{my_thread_init()} and @code{my_thread_end()} from mysql_com.h, and added @code{mysql_thread_init()} and @code{mysql_thread_end()} to mysql.h. diff --git a/acconfig.h b/acconfig.h index 341de4a2ceb..1e62d2a4248 100644 --- a/acconfig.h +++ b/acconfig.h @@ -81,6 +81,9 @@ /* Using Innobase DB */ #undef HAVE_INNOBASE_DB +/* Using old ISAM tables */ +#undef HAVE_ISAM + /* Define if we have GNU readline */ #undef HAVE_LIBREADLINE diff --git a/acinclude.m4 b/acinclude.m4 index 79caa4338ac..fe2ef07b61d 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -814,6 +814,23 @@ AC_SUBST(orbit_libs) AC_SUBST(orbit_idl) ]) +AC_DEFUN([MYSQL_CHECK_ISAM], [ + AC_ARG_WITH([isam], [\ + --without-isam Disable the ISAM table type], + [with_isam="$withval"], + [with_isam=yes]) + + isam_libs= + if test X"$with_isam" = X"yes" + then + AC_DEFINE(HAVE_ISAM) + isam_libs="\$(top_builddir)/isam/libnisam.a\ + \$(top_builddir)/merge/libmerge.a" + fi + AC_SUBST(isam_libs) +]) + + dnl --------------------------------------------------------------------------- dnl Macro: MYSQL_CHECK_BDB dnl Sets HAVE_BERKELEY_DB if inst library is found @@ -1159,48 +1176,6 @@ dnl END OF MYSQL_CHECK_INNODB SECTION dnl --------------------------------------------------------------------------- dnl --------------------------------------------------------------------------- -dnl Macro: MYSQL_CHECK_GEMINI -dnl Sets HAVE_GEMINI_DB if --with-gemini is used -dnl --------------------------------------------------------------------------- - -AC_DEFUN([MYSQL_CHECK_GEMINI], [ - AC_ARG_WITH([gemini], - [\ - --with-gemini[=DIR] Use Gemini DB located in DIR], - [gemini="$withval"], - [gemini=no]) - - AC_MSG_CHECKING([for Gemini DB]) - -dnl SORT OUT THE SUPPLIED ARGUMENTS TO DETERMINE WHAT TO DO -dnl echo "DBG_GEM1: gemini='$gemini'" - have_gemini_db=no - gemini_includes= - gemini_libs= - case "$gemini" in - no) - AC_MSG_RESULT([Not using Gemini DB]) - ;; - yes | default | *) - have_gemini_db="yes" - gemini_includes="-I../gemini/incl -I../gemini" - gemini_libs="\ - ../gemini/api/libapi.a\ - ../gemini/db/libdb.a\ - ../gemini/dbut/libdbut.a" - AC_MSG_RESULT([Using Gemini DB]) - ;; - esac - - AC_SUBST(gemini_includes) - AC_SUBST(gemini_libs) -]) - -dnl --------------------------------------------------------------------------- -dnl END OF MYSQL_CHECK_GEMINI SECTION -dnl --------------------------------------------------------------------------- - -dnl --------------------------------------------------------------------------- dnl Got this from the GNU tar 1.13.11 distribution dnl by Paul Eggert <eggert@twinsun.com> dnl --------------------------------------------------------------------------- diff --git a/client/mysqldump.c b/client/mysqldump.c index 894286f8896..64e7ae0fd82 100644 --- a/client/mysqldump.c +++ b/client/mysqldump.c @@ -1150,9 +1150,9 @@ static void dumpTable(uint numFields, char *table) } if (opt_lock) fputs("UNLOCK TABLES;\n", md_result_file); - mysql_free_result(res); if (opt_autocommit) fprintf(md_result_file, "commit;\n"); + mysql_free_result(res); } } /* dumpTable */ diff --git a/client/mysqltest.c b/client/mysqltest.c index 1374a276231..c6169540469 100644 --- a/client/mysqltest.c +++ b/client/mysqltest.c @@ -92,7 +92,7 @@ static char *db = 0, *pass=0; const char* user = 0, *host = 0, *unix_sock = 0, *opt_basedir="./"; static int port = 0, opt_big_test=0, opt_compress=0; static uint start_lineno, *lineno; -const char* manager_user="root",*manager_host="localhost"; +const char* manager_user="root",*manager_host=0; char *manager_pass=0; int manager_port=MYSQL_MANAGER_PORT; int manager_wait_timeout=3; @@ -655,6 +655,10 @@ int do_server_op(struct st_query* q,const char* op) { char* p=q->first_argument; char com_buf[256],*com_p; + if (!manager) + { + die("Manager is not initialized, manager commands are not possible"); + } com_p=strmov(com_buf,op); com_p=strmov(com_p,"_exec "); if (!*p) @@ -2195,8 +2199,9 @@ int main(int argc, char** argv) if (cur_file == file_stack) *++cur_file = stdin; *lineno=1; -#ifndef EMBEDDED_LIBRARY - init_manager(); +#ifndef EMBEDDED_LIBRARY + if (manager_host) + init_manager(); #endif if (!( mysql_init(&cur_con->mysql))) die("Failed in mysql_init()"); diff --git a/configure.in b/configure.in index ab2cc87d46f..4e22e4db664 100644 --- a/configure.in +++ b/configure.in @@ -2075,9 +2075,9 @@ EOF AC_MSG_RESULT([default: $default_charset; compiled in: $CHARSETS]) +MYSQL_CHECK_ISAM MYSQL_CHECK_BDB MYSQL_CHECK_INNODB -MYSQL_CHECK_GEMINI # If we have threads generate some library functions and test programs sql_server_dirs= @@ -2107,12 +2107,23 @@ then AC_SUBST(THREAD_LPROGRAMS) THREAD_LOBJECTS="thr_alarm.o thr_lock.o thr_mutex.o thr_rwlock.o my_pthread.o my_thr_init.o" AC_SUBST(THREAD_LOBJECTS) - sql_server_dirs="strings dbug mysys extra regex isam merge myisam myisammrg heap vio sql" server_scripts="mysqld_safe mysql_install_db" + sql_server_dirs="strings dbug mysys extra regex" + + + # + # Configuration for optional table handlers + # + + if test X"$have_isam" != Xno + then + sql_server_dirs="$sql_server_dirs isam merge" + fi + if test X"$have_berkeley_db" != Xno; then if test X"$have_berkeley_db" != Xyes; then # we must build berkeley db from source - sql_server_dirs="$have_berkeley_db $sql_server_dirs" + sql_server_dirs="$sql_server_dirs $have_berkeley_db" echo "CONFIGURING FOR BERKELEY DB" bdb_conf_flags= @@ -2179,7 +2190,7 @@ EOF if test X"$have_innodb" = Xyes then - sql_server_dirs="innobase $sql_server_dirs" + sql_server_dirs="$sql_server_dirs innobase" echo "CONFIGURING FOR INNODB" if test ! -d "innobase"; then # This should only happen when doing a VPATH build @@ -2196,17 +2207,10 @@ EOF echo "END OF INNODB CONFIGURATION" fi - - if test "X$have_gemini_db" = "Xyes"; then - sql_server_dirs="gemini $sql_server_dirs" - echo "CONFIGURING FOR GEMINI DB" - (cd gemini && sh ./configure) \ - || AC_MSG_ERROR([could not configure Gemini DB]) - - echo "END OF GEMINI DB CONFIGURATION" - - AC_DEFINE(HAVE_GEMINI_DB) - fi + # + # END of configuration for optional table handlers + # + sql_server_dirs="$sql_server_dirs myisam myisammrg heap vio sql" if test "$with_posix_threads" = "no" -o "$with_mit_threads" = "yes" then diff --git a/include/my_global.h b/include/my_global.h index 5fc575f7ada..09956254ac4 100644 --- a/include/my_global.h +++ b/include/my_global.h @@ -453,7 +453,6 @@ typedef SOCKET_SIZE_TYPE size_socket; /* Some things that this system doesn't have */ #define ONLY_OWN_DATABASES /* We are using only databases by monty */ -#define HAVE_ISAM /* TO BE DELETED */ #define NO_HASH /* Not needed anymore */ #ifdef __WIN__ #define NO_DIR_LIBRARY /* Not standar dir-library */ diff --git a/innobase/btr/btr0btr.c b/innobase/btr/btr0btr.c index 6da323867fb..e4e957ea7b6 100644 --- a/innobase/btr/btr0btr.c +++ b/innobase/btr/btr0btr.c @@ -2347,6 +2347,8 @@ btr_validate_level( mtr_start(&mtr); + mtr_x_lock(dict_tree_get_lock(tree), &mtr); + page = btr_root_get(tree, &mtr); space = buf_frame_get_space_id(page); diff --git a/innobase/btr/btr0cur.c b/innobase/btr/btr0cur.c index 7783f618d6d..a64ed8b6fe1 100644 --- a/innobase/btr/btr0cur.c +++ b/innobase/btr/btr0cur.c @@ -256,7 +256,8 @@ btr_cur_search_to_nth_level( #ifdef UNIV_SEARCH_PERF_STAT info->n_searches++; #endif - if (latch_mode <= BTR_MODIFY_LEAF && info->last_hash_succ + if (btr_search_latch.writer != RW_LOCK_NOT_LOCKED + && latch_mode <= BTR_MODIFY_LEAF && info->last_hash_succ && !estimate && btr_search_guess_on_hash(index, info, tuple, mode, latch_mode, cursor, @@ -344,9 +345,7 @@ btr_cur_search_to_nth_level( retry_page_get: page = buf_page_get_gen(space, page_no, rw_latch, guess, buf_mode, -#ifdef UNIV_SYNC_DEBUG IB__FILE__, __LINE__, -#endif mtr); if (page == NULL) { @@ -380,7 +379,7 @@ retry_page_get: } #endif ut_ad(0 == ut_dulint_cmp(tree->id, - btr_page_get_index_id(page))); + btr_page_get_index_id(page))); if (height == ULINT_UNDEFINED) { /* We are in the root node */ @@ -515,9 +514,7 @@ btr_cur_open_at_index_side( for (;;) { page = buf_page_get_gen(space, page_no, RW_NO_LATCH, NULL, BUF_GET, -#ifdef UNIV_SYNC_DEBUG IB__FILE__, __LINE__, -#endif mtr); ut_ad(0 == ut_dulint_cmp(tree->id, btr_page_get_index_id(page))); @@ -604,9 +601,7 @@ btr_cur_open_at_rnd_pos( for (;;) { page = buf_page_get_gen(space, page_no, RW_NO_LATCH, NULL, BUF_GET, -#ifdef UNIV_SYNC_DEBUG IB__FILE__, __LINE__, -#endif mtr); ut_ad(0 == ut_dulint_cmp(tree->id, btr_page_get_index_id(page))); @@ -1223,6 +1218,57 @@ btr_cur_parse_update_in_place( } /***************************************************************** +Updates a secondary index record when the update causes no size +changes in its fields. The only case when this function is currently +called is that in a char field characters change to others which +are identified in the collation order. */ + +ulint +btr_cur_update_sec_rec_in_place( +/*============================*/ + /* out: DB_SUCCESS or error number */ + btr_cur_t* cursor, /* in: cursor on the record to update; + cursor stays valid and positioned on the + same record */ + upd_t* update, /* in: update vector */ + que_thr_t* thr, /* in: query thread */ + mtr_t* mtr) /* in: mtr */ +{ + dict_index_t* index = cursor->index; + dict_index_t* clust_index; + ulint err; + rec_t* rec; + dulint roll_ptr = ut_dulint_zero; + trx_t* trx = thr_get_trx(thr); + + /* Only secondary index records are updated using this function */ + ut_ad(0 == (index->type & DICT_CLUSTERED)); + + rec = btr_cur_get_rec(cursor); + + err = lock_sec_rec_modify_check_and_lock(0, rec, index, thr); + + if (err != DB_SUCCESS) { + + return(err); + } + + /* Remove possible hash index pointer to this record */ + btr_search_update_hash_on_delete(cursor); + + row_upd_rec_in_place(rec, update); + + clust_index = dict_table_get_first_index(index->table); + + /* Note that roll_ptr is really just a dummy value since + a secondary index record does not contain any sys columns */ + + btr_cur_update_in_place_log(BTR_KEEP_SYS_FLAG, rec, clust_index, + update, trx, roll_ptr, mtr); + return(DB_SUCCESS); +} + +/***************************************************************** Updates a record when the update causes no size changes in its fields. */ ulint @@ -1248,7 +1294,7 @@ btr_cur_update_in_place( ibool was_delete_marked; /* Only clustered index records are updated using this function */ - ut_ad((cursor->index)->type & DICT_CLUSTERED); + ut_ad(cursor->index->type & DICT_CLUSTERED); rec = btr_cur_get_rec(cursor); index = cursor->index; @@ -2477,27 +2523,33 @@ btr_estimate_n_rows_in_range( } /*********************************************************************** -Estimates the number of different key values in a given index. */ +Estimates the number of different key values in a given index, for +each n-column prefix of the index where n <= dict_index_get_n_unique(index). +The estimates are stored in the array index->stat_n_diff_key_vals. */ -ulint +void btr_estimate_number_of_different_key_vals( /*======================================*/ - /* out: estimated number of key values */ dict_index_t* index) /* in: index */ { btr_cur_t cursor; page_t* page; rec_t* rec; - ulint total_n_recs = 0; - ulint n_diff_in_page; - ulint n_diff = 0; + ulint n_cols; ulint matched_fields; ulint matched_bytes; + ulint* n_diff; + ulint not_empty_flag = 0; ulint i; + ulint j; mtr_t mtr; - if (index->type & DICT_UNIQUE) { - return(index->table->stat_n_rows); + n_cols = dict_index_get_n_unique(index); + + n_diff = mem_alloc((n_cols + 1) * sizeof(ib_longlong)); + + for (j = 0; j <= n_cols; j++) { + n_diff[j] = 0; } /* We sample some pages in the index to get an estimate */ @@ -2507,17 +2559,19 @@ btr_estimate_number_of_different_key_vals( btr_cur_open_at_rnd_pos(index, BTR_SEARCH_LEAF, &cursor, &mtr); - /* Count the number of different key values minus one on this - index page: we subtract one because otherwise our algorithm - would give a wrong estimate for an index where there is - just one key value */ + /* Count the number of different key values minus one + for each prefix of the key on this index page: we subtract + one because otherwise our algorithm would give a wrong + estimate for an index where there is just one key value */ page = btr_cur_get_page(&cursor); rec = page_get_infimum_rec(page); rec = page_rec_get_next(rec); - n_diff_in_page = 0; + if (rec != page_get_supremum_rec(page)) { + not_empty_flag = 1; + } while (rec != page_get_supremum_rec(page) && page_rec_get_next(rec) @@ -2528,30 +2582,30 @@ btr_estimate_number_of_different_key_vals( cmp_rec_rec_with_match(rec, page_rec_get_next(rec), index, &matched_fields, &matched_bytes); - if (matched_fields < - dict_index_get_n_ordering_defined_by_user( - index)) { - n_diff_in_page++; - } + for (j = matched_fields + 1; j <= n_cols; j++) { + n_diff[j]++; + } + rec = page_rec_get_next(rec); } - - n_diff += n_diff_in_page; - - total_n_recs += page_get_n_recs(page); mtr_commit(&mtr); } - if (n_diff == 0) { - /* We play safe and assume that there are just two different - key values in the index */ - - return(2); + /* If we saw k borders between different key values on + BTR_KEY_VAL_ESTIMATE_N_PAGES leaf pages, we can estimate how many + there will be in index->stat_n_leaf_pages */ + + for (j = 0; j <= n_cols; j++) { + index->stat_n_diff_key_vals[j] = + (n_diff[j] * index->stat_n_leaf_pages + + BTR_KEY_VAL_ESTIMATE_N_PAGES - 1 + + not_empty_flag) + / BTR_KEY_VAL_ESTIMATE_N_PAGES; } - - return(index->table->stat_n_rows / (total_n_recs / n_diff)); + + mem_free(n_diff); } /*================== EXTERNAL STORAGE OF BIG FIELDS ===================*/ diff --git a/innobase/btr/btr0pcur.c b/innobase/btr/btr0pcur.c index 5e625553929..8ca3d41f7f9 100644 --- a/innobase/btr/btr0pcur.c +++ b/innobase/btr/btr0pcur.c @@ -62,8 +62,10 @@ btr_pcur_free_for_mysql( /****************************************************************** The position of the cursor is stored by taking an initial segment of the record the cursor is positioned on, before, or after, and copying it to the -cursor data structure. NOTE that the page where the cursor is positioned -must not be empty! */ +cursor data structure, or just setting a flag if the cursor id before the +first in an EMPTY tree, or after the last in an EMPTY tree. NOTE that the +page where the cursor is positioned must not be empty if the index tree is +not totally empty! */ void btr_pcur_store_position( @@ -93,9 +95,21 @@ btr_pcur_store_position( ut_a(cursor->latch_mode != BTR_NO_LATCHES); if (page_get_n_recs(page) == 0) { + /* It must be an empty index tree */ - /* Cannot store position! */ - btr_pcur_close(cursor); + ut_a(btr_page_get_next(page, mtr) == FIL_NULL + && btr_page_get_prev(page, mtr) == FIL_NULL); + + if (rec == page_get_supremum_rec(page)) { + + cursor->rel_pos = BTR_PCUR_AFTER_LAST_IN_TREE; + cursor->old_stored = BTR_PCUR_OLD_STORED; + + return; + } + + cursor->rel_pos = BTR_PCUR_BEFORE_FIRST_IN_TREE; + cursor->old_stored = BTR_PCUR_OLD_STORED; return; } @@ -140,13 +154,15 @@ btr_pcur_copy_stored_position( ut_memcpy((byte*)pcur_receive, (byte*)pcur_donate, sizeof(btr_pcur_t)); - pcur_receive->old_rec_buf = mem_alloc(pcur_donate->buf_size); + if (pcur_donate->old_rec_buf) { + + pcur_receive->old_rec_buf = mem_alloc(pcur_donate->buf_size); - ut_memcpy(pcur_receive->old_rec_buf, pcur_donate->old_rec_buf, + ut_memcpy(pcur_receive->old_rec_buf, pcur_donate->old_rec_buf, pcur_donate->buf_size); - pcur_receive->old_rec = pcur_receive->old_rec_buf + pcur_receive->old_rec = pcur_receive->old_rec_buf + (pcur_donate->old_rec - pcur_donate->old_rec_buf); - + } } /****************************************************************** @@ -158,7 +174,9 @@ to the last record LESS OR EQUAL to the stored record; the last record LESS than the user record which was the successor of the page infimum; (3) cursor was positioned on the page supremum: restores to the first record -GREATER than the user record which was the predecessor of the supremum. */ +GREATER than the user record which was the predecessor of the supremum. +(4) cursor was positioned before the first or after the last in an empty tree: +restores to before first or after the last in the tree. */ ibool btr_pcur_restore_position( @@ -177,17 +195,33 @@ btr_pcur_restore_position( dtuple_t* tuple; ulint mode; ulint old_mode; + ibool from_left; mem_heap_t* heap; - ut_a((cursor->pos_state == BTR_PCUR_WAS_POSITIONED) - || (cursor->pos_state == BTR_PCUR_IS_POSITIONED)); + ut_a(cursor->pos_state == BTR_PCUR_WAS_POSITIONED + || cursor->pos_state == BTR_PCUR_IS_POSITIONED); ut_a(cursor->old_stored == BTR_PCUR_OLD_STORED); + + if (cursor->rel_pos == BTR_PCUR_AFTER_LAST_IN_TREE + || cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE) { + + if (cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE) { + from_left = TRUE; + } else { + from_left = FALSE; + } + + btr_cur_open_at_index_side(from_left, + btr_pcur_get_btr_cur(cursor)->index, latch_mode, + btr_pcur_get_btr_cur(cursor), mtr); + return(FALSE); + } + ut_a(cursor->old_rec); page = btr_cur_get_page(btr_pcur_get_btr_cur(cursor)); - if ((latch_mode == BTR_SEARCH_LEAF) - || (latch_mode == BTR_MODIFY_LEAF)) { + if (latch_mode == BTR_SEARCH_LEAF || latch_mode == BTR_MODIFY_LEAF) { /* Try optimistic restoration */ if (buf_page_optimistic_get(latch_mode, page, @@ -242,16 +276,15 @@ btr_pcur_restore_position( /* Restore the old search mode */ cursor->search_mode = old_mode; - if ((cursor->rel_pos == BTR_PCUR_ON) - && btr_pcur_is_on_user_rec(cursor, mtr) - && (0 == cmp_dtuple_rec(tuple, btr_pcur_get_rec(cursor)))) { + if (cursor->rel_pos == BTR_PCUR_ON + && btr_pcur_is_on_user_rec(cursor, mtr) + && 0 == cmp_dtuple_rec(tuple, btr_pcur_get_rec(cursor))) { /* We have to store the NEW value for the modify clock, since the cursor can now be on a different page! */ cursor->modify_clock = buf_frame_get_modify_clock( - buf_frame_align( - btr_pcur_get_rec(cursor))); + buf_frame_align(btr_pcur_get_rec(cursor))); mem_heap_free(heap); return(TRUE); @@ -366,6 +399,7 @@ btr_pcur_move_backward_from_page( latch_mode2 = BTR_MODIFY_PREV; } else { + latch_mode2 = 0; /* To eliminate compiler warning */ ut_error; } diff --git a/innobase/btr/btr0sea.c b/innobase/btr/btr0sea.c index 616f8911aba..31ef8ce573b 100644 --- a/innobase/btr/btr0sea.c +++ b/innobase/btr/btr0sea.c @@ -680,9 +680,7 @@ btr_search_guess_on_hash( success = buf_page_get_known_nowait(latch_mode, page, BUF_MAKE_YOUNG, -#ifdef UNIV_SYNC_DEBUG IB__FILE__, __LINE__, -#endif mtr); rw_lock_s_unlock(&btr_search_latch); diff --git a/innobase/buf/buf0buf.c b/innobase/buf/buf0buf.c index acf56ac4ddf..7d9cbf24948 100644 --- a/innobase/buf/buf0buf.c +++ b/innobase/buf/buf0buf.c @@ -34,6 +34,8 @@ Created 11/5/1995 Heikki Tuuri #include "ibuf0ibuf.h" #include "dict0dict.h" #include "log0recv.h" +#include "trx0undo.h" +#include "srv0srv.h" /* IMPLEMENTATION OF THE BUFFER POOL @@ -240,6 +242,11 @@ buf_page_is_corrupted( checksum = buf_calc_page_checksum(read_buf); + /* Note that InnoDB initializes empty pages to zero, and + early versions of InnoDB did not store page checksum to + the 4 most significant bytes of the page lsn field at the + end of a page: */ + if ((mach_read_from_4(read_buf + FIL_PAGE_LSN + 4) != mach_read_from_4(read_buf + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN + 4)) @@ -257,6 +264,71 @@ buf_page_is_corrupted( } /************************************************************************ +Prints a page to stderr. */ + +void +buf_page_print( +/*===========*/ + byte* read_buf) /* in: a database page */ +{ + dict_index_t* index; + ulint checksum; + char* buf; + + buf = mem_alloc(4 * UNIV_PAGE_SIZE); + + ut_sprintf_buf(buf, read_buf, UNIV_PAGE_SIZE); + + fprintf(stderr, + "InnoDB: Page dump in ascii and hex (%lu bytes):\n%s", + UNIV_PAGE_SIZE, buf); + fprintf(stderr, "InnoDB: End of page dump\n"); + + mem_free(buf); + + checksum = buf_calc_page_checksum(read_buf); + + fprintf(stderr, "InnoDB: Page checksum %lu stored checksum %lu\n", + checksum, mach_read_from_4(read_buf + + UNIV_PAGE_SIZE + - FIL_PAGE_END_LSN)); + fprintf(stderr, + "InnoDB: Page lsn %lu %lu, low 4 bytes of lsn at page end %lu\n", + mach_read_from_4(read_buf + FIL_PAGE_LSN), + mach_read_from_4(read_buf + FIL_PAGE_LSN + 4), + mach_read_from_4(read_buf + UNIV_PAGE_SIZE + - FIL_PAGE_END_LSN + 4)); + if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE) + == TRX_UNDO_INSERT) { + fprintf(stderr, + "InnoDB: Page may be an insert undo log page\n"); + } else if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR + + TRX_UNDO_PAGE_TYPE) + == TRX_UNDO_UPDATE) { + fprintf(stderr, + "InnoDB: Page may be an update undo log page\n"); + } + + if (fil_page_get_type(read_buf) == FIL_PAGE_INDEX) { + fprintf(stderr, + "InnoDB: Page may be an index page "); + + fprintf(stderr, + "where index id is %lu %lu\n", + ut_dulint_get_high(btr_page_get_index_id(read_buf)), + ut_dulint_get_low(btr_page_get_index_id(read_buf))); + + index = dict_index_find_on_id_low( + btr_page_get_index_id(read_buf)); + if (index) { + fprintf(stderr, "InnoDB: and table %s index %s\n", + index->table_name, + index->name); + } + } +} + +/************************************************************************ Initializes a buffer control block when the buf_pool is created. */ static void @@ -334,6 +406,8 @@ buf_pool_create( frame = ut_align(buf_pool->frame_mem, UNIV_PAGE_SIZE); buf_pool->frame_zero = frame; + buf_pool->high_end = frame + UNIV_PAGE_SIZE * curr_size; + /* Init block structs and assign frames for them */ for (i = 0; i < max_size; i++) { @@ -345,6 +419,9 @@ buf_pool_create( buf_pool->page_hash = hash_create(2 * max_size); buf_pool->n_pend_reads = 0; + + buf_pool->last_printout_time = time(NULL); + buf_pool->n_pages_read = 0; buf_pool->n_pages_written = 0; buf_pool->n_pages_created = 0; @@ -352,6 +429,8 @@ buf_pool_create( buf_pool->n_page_gets = 0; buf_pool->n_page_gets_old = 0; buf_pool->n_pages_read_old = 0; + buf_pool->n_pages_written_old = 0; + buf_pool->n_pages_created_old = 0; /* 2. Initialize flushing fields ---------------------------- */ @@ -379,6 +458,10 @@ buf_pool_create( for (i = 0; i < curr_size; i++) { block = buf_pool_get_nth_block(buf_pool, i); + + /* Wipe contents of page to eliminate a Purify warning */ + memset(block->frame, '\0', UNIV_PAGE_SIZE); + UT_LIST_ADD_FIRST(free, buf_pool->free, block); } @@ -650,10 +733,8 @@ buf_page_get_gen( buf_frame_t* guess, /* in: guessed frame or NULL */ ulint mode, /* in: BUF_GET, BUF_GET_IF_IN_POOL, BUF_GET_NO_LATCH, BUF_GET_NOWAIT */ -#ifdef UNIV_SYNC_DEBUG char* file, /* in: file name */ ulint line, /* in: line where called */ -#endif mtr_t* mtr) /* in: mini-transaction */ { buf_block_t* block; @@ -759,19 +840,13 @@ loop: if (mode == BUF_GET_NOWAIT) { if (rw_latch == RW_S_LATCH) { - success = rw_lock_s_lock_func_nowait(&(block->lock) - #ifdef UNIV_SYNC_DEBUG - ,file, line - #endif - ); + success = rw_lock_s_lock_func_nowait(&(block->lock), + file, line); fix_type = MTR_MEMO_PAGE_S_FIX; } else { ut_ad(rw_latch == RW_X_LATCH); - success = rw_lock_x_lock_func_nowait(&(block->lock) - #ifdef UNIV_SYNC_DEBUG - ,file, line - #endif - ); + success = rw_lock_x_lock_func_nowait(&(block->lock), + file, line); fix_type = MTR_MEMO_PAGE_X_FIX; } @@ -796,18 +871,12 @@ loop: fix_type = MTR_MEMO_BUF_FIX; } else if (rw_latch == RW_S_LATCH) { - rw_lock_s_lock_func(&(block->lock) - #ifdef UNIV_SYNC_DEBUG - ,0, file, line - #endif - ); + rw_lock_s_lock_func(&(block->lock), 0, file, line); + fix_type = MTR_MEMO_PAGE_S_FIX; } else { - rw_lock_x_lock_func(&(block->lock), 0 - #ifdef UNIV_SYNC_DEBUG - , file, line - #endif - ); + rw_lock_x_lock_func(&(block->lock), 0, file, line); + fix_type = MTR_MEMO_PAGE_X_FIX; } @@ -838,10 +907,8 @@ buf_page_optimistic_get_func( buf_frame_t* guess, /* in: guessed frame */ dulint modify_clock,/* in: modify clock value if mode is ..._GUESS_ON_CLOCK */ -#ifdef UNIV_SYNC_DEBUG char* file, /* in: file name */ ulint line, /* in: line where called */ -#endif mtr_t* mtr) /* in: mini-transaction */ { buf_block_t* block; @@ -883,18 +950,12 @@ buf_page_optimistic_get_func( ut_ad(!ibuf_inside() || ibuf_page(block->space, block->offset)); if (rw_latch == RW_S_LATCH) { - success = rw_lock_s_lock_func_nowait(&(block->lock) - #ifdef UNIV_SYNC_DEBUG - , file, line - #endif - ); + success = rw_lock_s_lock_func_nowait(&(block->lock), + file, line); fix_type = MTR_MEMO_PAGE_S_FIX; } else { - success = rw_lock_x_lock_func_nowait(&(block->lock) - #ifdef UNIV_SYNC_DEBUG - , file, line - #endif - ); + success = rw_lock_x_lock_func_nowait(&(block->lock), + file, line); fix_type = MTR_MEMO_PAGE_X_FIX; } @@ -971,10 +1032,8 @@ buf_page_get_known_nowait( ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */ buf_frame_t* guess, /* in: the known page frame */ ulint mode, /* in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */ -#ifdef UNIV_SYNC_DEBUG char* file, /* in: file name */ ulint line, /* in: line where called */ -#endif mtr_t* mtr) /* in: mini-transaction */ { buf_block_t* block; @@ -1017,18 +1076,12 @@ buf_page_get_known_nowait( ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD)); if (rw_latch == RW_S_LATCH) { - success = rw_lock_s_lock_func_nowait(&(block->lock) - #ifdef UNIV_SYNC_DEBUG - , file, line - #endif - ); + success = rw_lock_s_lock_func_nowait(&(block->lock), + file, line); fix_type = MTR_MEMO_PAGE_S_FIX; } else { - success = rw_lock_x_lock_func_nowait(&(block->lock) - #ifdef UNIV_SYNC_DEBUG - , file, line - #endif - ); + success = rw_lock_x_lock_func_nowait(&(block->lock), + file, line); fix_type = MTR_MEMO_PAGE_X_FIX; } @@ -1318,9 +1371,26 @@ buf_page_io_complete( fprintf(stderr, "InnoDB: Database page corruption or a failed\n" "InnoDB: file read of page %lu.\n", block->offset); + fprintf(stderr, "InnoDB: You may have to recover from a backup.\n"); - exit(1); + + buf_page_print(block->frame); + + fprintf(stderr, + "InnoDB: Database page corruption or a failed\n" + "InnoDB: file read of page %lu.\n", block->offset); + fprintf(stderr, + "InnoDB: You may have to recover from a backup.\n"); + fprintf(stderr, + "InnoDB: It is also possible that your operating\n" + "InnoDB: system has corrupted its own file cache\n" + "InnoDB: and rebooting your computer removes the\n" + "InnoDB: error.\n"); + + if (srv_force_recovery < SRV_FORCE_IGNORE_CORRUPT) { + exit(1); + } } if (recv_recovery_is_on()) { @@ -1623,12 +1693,27 @@ buf_print(void) } /************************************************************************* +Returns the number of pending buf pool ios. */ + +ulint +buf_get_n_pending_ios(void) +/*=======================*/ +{ + return(buf_pool->n_pend_reads + + buf_pool->n_flush[BUF_FLUSH_LRU] + + buf_pool->n_flush[BUF_FLUSH_LIST] + + buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]); +} + +/************************************************************************* Prints info of the buffer i/o. */ void buf_print_io(void) /*==============*/ { + time_t current_time; + double time_elapsed; ulint size; ut_ad(buf_pool); @@ -1637,11 +1722,11 @@ buf_print_io(void) mutex_enter(&(buf_pool->mutex)); - printf("LRU list length %lu \n", UT_LIST_GET_LEN(buf_pool->LRU)); - printf("Free list length %lu \n", UT_LIST_GET_LEN(buf_pool->free)); + printf("Free list length %lu \n", UT_LIST_GET_LEN(buf_pool->free)); + printf("LRU list length %lu \n", UT_LIST_GET_LEN(buf_pool->LRU)); printf("Flush list length %lu \n", UT_LIST_GET_LEN(buf_pool->flush_list)); - printf("Buffer pool size in pages %lu\n", size); + printf("Buffer pool size %lu\n", size); printf("Pending reads %lu \n", buf_pool->n_pend_reads); @@ -1650,9 +1735,21 @@ buf_print_io(void) buf_pool->n_flush[BUF_FLUSH_LIST], buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]); + current_time = time(NULL); + time_elapsed = difftime(current_time, buf_pool->last_printout_time); + + buf_pool->last_printout_time = current_time; + printf("Pages read %lu, created %lu, written %lu\n", buf_pool->n_pages_read, buf_pool->n_pages_created, buf_pool->n_pages_written); + printf("%.2f reads/s, %.2f creates/s, %.2f writes/s\n", + (buf_pool->n_pages_read - buf_pool->n_pages_read_old) + / time_elapsed, + (buf_pool->n_pages_created - buf_pool->n_pages_created_old) + / time_elapsed, + (buf_pool->n_pages_written - buf_pool->n_pages_written_old) + / time_elapsed); if (buf_pool->n_page_gets > buf_pool->n_page_gets_old) { printf("Buffer pool hit rate %lu / 1000\n", @@ -1660,10 +1757,14 @@ buf_print_io(void) - ((1000 * (buf_pool->n_pages_read - buf_pool->n_pages_read_old)) / (buf_pool->n_page_gets - buf_pool->n_page_gets_old))); + } else { + printf("No buffer pool activity since the last printout\n"); } buf_pool->n_page_gets_old = buf_pool->n_page_gets; buf_pool->n_pages_read_old = buf_pool->n_pages_read; + buf_pool->n_pages_created_old = buf_pool->n_pages_created; + buf_pool->n_pages_written_old = buf_pool->n_pages_written; mutex_exit(&(buf_pool->mutex)); } diff --git a/innobase/buf/buf0lru.c b/innobase/buf/buf0lru.c index 142beaaaa15..eb63fa99f4a 100644 --- a/innobase/buf/buf0lru.c +++ b/innobase/buf/buf0lru.c @@ -551,6 +551,10 @@ buf_LRU_block_free_non_file_page( block->state = BUF_BLOCK_NOT_USED; +#ifdef UNIV_DEBUG + /* Wipe contents of page to reveal possible stale pointers to it */ + memset(block->frame, '\0', UNIV_PAGE_SIZE); +#endif UT_LIST_ADD_FIRST(free, buf_pool->free, block); } diff --git a/innobase/configure.in b/innobase/configure.in index 48bb9504219..b606ecfffc0 100644 --- a/innobase/configure.in +++ b/innobase/configure.in @@ -38,7 +38,7 @@ AC_CHECK_HEADERS(aio.h sched.h) AC_CHECK_SIZEOF(int, 4) AC_CHECK_FUNCS(sched_yield) AC_CHECK_FUNCS(fdatasync) -AC_CHECK_FUNCS(localtime_r) +#AC_CHECK_FUNCS(localtime_r) # Already checked by MySQL #AC_C_INLINE Already checked in MySQL AC_C_BIGENDIAN diff --git a/innobase/data/data0data.c b/innobase/data/data0data.c index aecc56ec022..2254dcb6ae6 100644 --- a/innobase/data/data0data.c +++ b/innobase/data/data0data.c @@ -14,6 +14,7 @@ Created 5/30/1994 Heikki Tuuri #include "ut0rnd.h" #include "rem0rec.h" +#include "rem0cmp.h" #include "page0page.h" #include "dict0dict.h" #include "btr0cur.h" @@ -63,6 +64,53 @@ dtuple_get_nth_field_noninline( return(dtuple_get_nth_field(tuple, n)); } +/**************************************************************** +Returns TRUE if lengths of two dtuples are equal and respective data fields +in them are equal when compared with collation in char fields (not as binary +strings). */ + +ibool +dtuple_datas_are_ordering_equal( +/*============================*/ + /* out: TRUE if length and fieds are equal + when compared with cmp_data_data: + NOTE: in character type fields some letters + are identified with others! (collation) */ + dtuple_t* tuple1, /* in: tuple 1 */ + dtuple_t* tuple2) /* in: tuple 2 */ +{ + dfield_t* field1; + dfield_t* field2; + ulint n_fields; + ulint i; + + ut_ad(tuple1 && tuple2); + ut_ad(tuple1->magic_n = DATA_TUPLE_MAGIC_N); + ut_ad(tuple2->magic_n = DATA_TUPLE_MAGIC_N); + ut_ad(dtuple_check_typed(tuple1)); + ut_ad(dtuple_check_typed(tuple2)); + + n_fields = dtuple_get_n_fields(tuple1); + + if (n_fields != dtuple_get_n_fields(tuple2)) { + + return(FALSE); + } + + for (i = 0; i < n_fields; i++) { + + field1 = dtuple_get_nth_field(tuple1, i); + field2 = dtuple_get_nth_field(tuple2, i); + + if (0 != cmp_dfield_dfield(field1, field2)) { + + return(FALSE); + } + } + + return(TRUE); +} + /************************************************************************* Creates a dtuple for use in MySQL. */ @@ -408,7 +456,7 @@ dtuple_convert_big_rec( ulint size; ulint n_fields; ulint longest; - ulint longest_i; + ulint longest_i = ULINT_MAX; ibool is_externally_stored; ulint i; ulint j; diff --git a/innobase/data/data0type.c b/innobase/data/data0type.c index 82c00a83fb2..5d0ddf3e887 100644 --- a/innobase/data/data0type.c +++ b/innobase/data/data0type.c @@ -28,7 +28,6 @@ dtype_validate( ut_a((type->mtype >= DATA_VARCHAR) && (type->mtype <= DATA_MYSQL)); if (type->mtype == DATA_SYS) { - ut_a(type->prtype >= DATA_ROW_ID); ut_a(type->prtype <= DATA_MIX_ID); } @@ -45,11 +44,10 @@ dtype_print( { ulint mtype; ulint prtype; - + ulint len; + ut_a(type); - printf("DATA TYPE: "); - mtype = type->mtype; prtype = type->prtype; if (mtype == DATA_VARCHAR) { @@ -65,8 +63,10 @@ dtype_print( } else if (mtype == DATA_SYS) { printf("DATA_SYS"); } else { - printf("unknown type %lu", mtype); + printf("type %lu", mtype); } + + len = type->len; if ((type->mtype == DATA_SYS) || (type->mtype == DATA_VARCHAR) @@ -74,8 +74,13 @@ dtype_print( printf(" "); if (prtype == DATA_ROW_ID) { printf("DATA_ROW_ID"); + len = DATA_ROW_ID_LEN; } else if (prtype == DATA_ROLL_PTR) { printf("DATA_ROLL_PTR"); + len = DATA_ROLL_PTR_LEN; + } else if (prtype == DATA_TRX_ID) { + printf("DATA_TRX_ID"); + len = DATA_TRX_ID_LEN; } else if (prtype == DATA_MIX_ID) { printf("DATA_MIX_ID"); } else if (prtype == DATA_ENGLISH) { @@ -83,9 +88,9 @@ dtype_print( } else if (prtype == DATA_FINNISH) { printf("DATA_FINNISH"); } else { - printf("unknown prtype %lu", mtype); + printf("prtype %lu", mtype); } } - printf("; len %lu prec %lu\n", type->len, type->prec); + printf(" len %lu prec %lu", len, type->prec); } diff --git a/innobase/dict/dict0crea.c b/innobase/dict/dict0crea.c index 478364fba8a..9d79983c9e5 100644 --- a/innobase/dict/dict0crea.c +++ b/innobase/dict/dict0crea.c @@ -17,9 +17,13 @@ Created 1/8/1996 Heikki Tuuri #include "page0page.h" #include "mach0data.h" #include "dict0boot.h" +#include "dict0dict.h" #include "que0que.h" #include "row0ins.h" +#include "row0mysql.h" #include "pars0pars.h" +#include "trx0roll.h" +#include "usr0sess.h" /********************************************************************* Based on a table object, this function builds the entry to be inserted @@ -1019,3 +1023,228 @@ function_exit: return(thr); } + +/******************************************************************** +Creates the foreign key constraints system tables inside InnoDB +at database creation or database start if they are not found or are +not of the right form. */ + +ulint +dict_create_or_check_foreign_constraint_tables(void) +/*================================================*/ + /* out: DB_SUCCESS or error code */ +{ + dict_table_t* table1; + dict_table_t* table2; + que_thr_t* thr; + que_t* graph; + ulint error; + trx_t* trx; + char* str; + + mutex_enter(&(dict_sys->mutex)); + + table1 = dict_table_get_low("SYS_FOREIGN"); + table2 = dict_table_get_low("SYS_FOREIGN_COLS"); + + if (table1 && table2 + && UT_LIST_GET_LEN(table1->indexes) == 3 + && UT_LIST_GET_LEN(table2->indexes) == 1) { + + /* Foreign constraint system tables have already been + created, and they are ok */ + + mutex_exit(&(dict_sys->mutex)); + + return(DB_SUCCESS); + } + + trx = trx_allocate_for_mysql(); + + trx->op_info = "creating foreign key sys tables"; + + if (table1) { + fprintf(stderr, + "InnoDB: dropping incompletely created SYS_FOREIGN table\n"); + row_drop_table_for_mysql("SYS_FOREIGN", trx, TRUE); + } + + if (table2) { + fprintf(stderr, + "InnoDB: dropping incompletely created SYS_FOREIGN_COLS table\n"); + row_drop_table_for_mysql("SYS_FOREIGN_COLS", trx, TRUE); + } + + fprintf(stderr, + "InnoDB: creating foreign key constraint system tables\n"); + + /* NOTE: in dict_load_foreigns we use the fact that + there are 2 secondary indexes on SYS_FOREIGN, and they + are defined just like below */ + + str = + "PROCEDURE CREATE_FOREIGN_SYS_TABLES_PROC () IS\n" + "BEGIN\n" + "CREATE TABLE\n" + "SYS_FOREIGN(ID CHAR, FOR_NAME CHAR, REF_NAME CHAR, N_COLS INT);\n" + "CREATE UNIQUE CLUSTERED INDEX ID_IND ON SYS_FOREIGN (ID);\n" + "CREATE INDEX FOR_IND ON SYS_FOREIGN (FOR_NAME);\n" + "CREATE INDEX REF_IND ON SYS_FOREIGN (REF_NAME);\n" + "CREATE TABLE\n" + "SYS_FOREIGN_COLS(ID CHAR, POS INT, FOR_COL_NAME CHAR, REF_COL_NAME CHAR);\n" + "CREATE UNIQUE CLUSTERED INDEX ID_IND ON SYS_FOREIGN_COLS (ID, POS);\n" + "COMMIT WORK;\n" + "END;\n"; + + graph = pars_sql(str); + + ut_a(graph); + + graph->trx = trx; + trx->graph = NULL; + + graph->fork_type = QUE_FORK_MYSQL_INTERFACE; + + ut_a(thr = que_fork_start_command(graph, SESS_COMM_EXECUTE, 0)); + + que_run_threads(thr); + + error = trx->error_state; + + if (error != DB_SUCCESS) { + ut_a(error == DB_OUT_OF_FILE_SPACE); + + fprintf(stderr, "InnoDB: creation failed\n"); + fprintf(stderr, "InnoDB: tablespace is full\n"); + fprintf(stderr, + "InnoDB: dropping incompletely created SYS_FOREIGN tables\n"); + + row_drop_table_for_mysql("SYS_FOREIGN", trx, TRUE); + row_drop_table_for_mysql("SYS_FOREIGN_COLS", trx, TRUE); + + error = DB_MUST_GET_MORE_FILE_SPACE; + } + + que_graph_free(graph); + + trx->op_info = ""; + + trx_free_for_mysql(trx); + + if (error == DB_SUCCESS) { + fprintf(stderr, + "InnoDB: foreign key constraint system tables created\n"); + } + + mutex_exit(&(dict_sys->mutex)); + + return(error); +} + +/************************************************************************ +Adds foreign key definitions to data dictionary tables in the database. */ + +ulint +dict_create_add_foreigns_to_dictionary( +/*===================================*/ + /* out: error code or DB_SUCCESS */ + dict_table_t* table, /* in: table */ + trx_t* trx) /* in: transaction */ +{ + dict_foreign_t* foreign; + que_thr_t* thr; + que_t* graph; + dulint id; + ulint len; + ulint error; + ulint i; + char buf2[50]; + char buf[10000]; + + ut_ad(mutex_own(&(dict_sys->mutex))); + + if (NULL == dict_table_get_low("SYS_FOREIGN")) { + fprintf(stderr, + "InnoDB: table SYS_FOREIGN not found from internal data dictionary\n"); + return(DB_ERROR); + } + + foreign = UT_LIST_GET_FIRST(table->foreign_list); +loop: + if (foreign == NULL) { + + return(DB_SUCCESS); + } + + /* Build an InnoDB stored procedure which will insert the necessary + rows to SYS_FOREIGN and SYS_FOREIGN_COLS */ + + len = 0; + + len += sprintf(buf, + "PROCEDURE ADD_FOREIGN_DEFS_PROC () IS\n" + "BEGIN\n"); + + /* We allocate the new id from the sequence of table id's */ + id = dict_hdr_get_new_id(DICT_HDR_TABLE_ID); + + sprintf(buf2, "%lu_%lu", ut_dulint_get_high(id), + ut_dulint_get_low(id)); + foreign->id = mem_heap_alloc(foreign->heap, ut_strlen(buf2) + 1); + ut_memcpy(foreign->id, buf2, ut_strlen(buf2) + 1); + + len += sprintf(buf + len, + "INSERT INTO SYS_FOREIGN VALUES('%lu_%lu', '%s', '%s', %lu);\n", + ut_dulint_get_high(id), + ut_dulint_get_low(id), + table->name, + foreign->referenced_table_name, + foreign->n_fields); + + for (i = 0; i < foreign->n_fields; i++) { + + len += sprintf(buf + len, + "INSERT INTO SYS_FOREIGN_COLS VALUES('%lu_%lu', %lu, '%s', '%s');\n", + ut_dulint_get_high(id), + ut_dulint_get_low(id), + i, + foreign->foreign_col_names[i], + foreign->referenced_col_names[i]); + } + + len += sprintf(buf + len,"COMMIT WORK;\nEND;\n"); + + graph = pars_sql(buf); + + ut_a(graph); + + graph->trx = trx; + trx->graph = NULL; + + graph->fork_type = QUE_FORK_MYSQL_INTERFACE; + + ut_a(thr = que_fork_start_command(graph, SESS_COMM_EXECUTE, 0)); + + que_run_threads(thr); + + error = trx->error_state; + + que_graph_free(graph); + + if (error != DB_SUCCESS) { + ut_a(error == DB_OUT_OF_FILE_SPACE); + + fprintf(stderr, "InnoDB: foreign constraint creation failed\n"); + fprintf(stderr, "InnoDB: tablespace is full\n"); + + trx_general_rollback_for_mysql(trx, FALSE, NULL); + + error = DB_MUST_GET_MORE_FILE_SPACE; + + return(error); + } + + foreign = UT_LIST_GET_NEXT(foreign_list, foreign); + + goto loop; +} diff --git a/innobase/dict/dict0dict.c b/innobase/dict/dict0dict.c index 10d93fc6ecf..e0a7fd327a5 100644 --- a/innobase/dict/dict0dict.c +++ b/innobase/dict/dict0dict.c @@ -17,6 +17,7 @@ Created 1/8/1996 Heikki Tuuri #include "mach0data.h" #include "dict0boot.h" #include "dict0mem.h" +#include "dict0crea.h" #include "trx0undo.h" #include "btr0btr.h" #include "btr0cur.h" @@ -24,10 +25,12 @@ Created 1/8/1996 Heikki Tuuri #include "pars0pars.h" #include "pars0sym.h" #include "que0que.h" - +#include "rem0cmp.h" dict_sys_t* dict_sys = NULL; /* the dictionary system */ +rw_lock_t dict_foreign_key_check_lock; + #define DICT_HEAP_SIZE 100 /* initial memory heap size when creating a table or index object */ #define DICT_POOL_PER_PROCEDURE_HASH 512 /* buffer pool max size per stored @@ -137,12 +140,12 @@ dict_tree_find_index_low( dict_tree_t* tree, /* in: index tree */ rec_t* rec); /* in: record for which to find correct index */ /************************************************************************** -Prints a table data. */ +Removes a foreign constraint struct from the dictionet cache. */ static void -dict_table_print_low( -/*=================*/ - dict_table_t* table); /* in: table */ +dict_foreign_remove_from_cache( +/*===========================*/ + dict_foreign_t* foreign); /* in, own: foreign constraint */ /************************************************************************** Prints a column data. */ static @@ -164,6 +167,13 @@ void dict_field_print_low( /*=================*/ dict_field_t* field); /* in: field */ +/************************************************************************* +Frees a foreign key struct. */ +static +void +dict_foreign_free( +/*==============*/ + dict_foreign_t* foreign); /* in, own: foreign key struct */ /************************************************************************ Reserves the dictionary system mutex for MySQL. */ @@ -353,7 +363,8 @@ dict_table_get_on_id( { dict_table_t* table; - if (ut_dulint_cmp(table_id, DICT_FIELDS_ID) <= 0) { + if (ut_dulint_cmp(table_id, DICT_FIELDS_ID) <= 0 + || trx->dict_operation) { /* It is a system table which will always exist in the table cache: we avoid acquiring the dictionary mutex, because if we are doing a rollback to handle an error in TABLE @@ -415,6 +426,10 @@ dict_init(void) dict_sys->size = 0; UT_LIST_INIT(dict_sys->table_LRU); + + rw_lock_create(&dict_foreign_key_check_lock); + rw_lock_set_level(&dict_foreign_key_check_lock, + SYNC_FOREIGN_KEY_CHECK); } /************************************************************************** @@ -535,6 +550,41 @@ dict_table_add_to_cache( } /************************************************************************** +Looks for an index with the given id. NOTE that we do not reserve +the dictionary mutex: this function is for emergency purposes like +printing info of a corrupt database page! */ + +dict_index_t* +dict_index_find_on_id_low( +/*======================*/ + /* out: index or NULL if not found from cache */ + dulint id) /* in: index id */ +{ + dict_table_t* table; + dict_index_t* index; + + table = UT_LIST_GET_FIRST(dict_sys->table_LRU); + + while (table) { + index = dict_table_get_first_index(table); + + while (index) { + if (0 == ut_dulint_cmp(id, index->tree->id)) { + /* Found */ + + return(index); + } + + index = dict_table_get_next_index(index); + } + + table = UT_LIST_GET_NEXT(table_LRU, table); + } + + return(NULL); +} + +/************************************************************************** Renames a table object. */ ibool @@ -544,10 +594,12 @@ dict_table_rename_in_cache( dict_table_t* table, /* in: table */ char* new_name) /* in: new name */ { - ulint fold; - ulint old_size; - char* name_buf; - ulint i; + dict_foreign_t* foreign; + dict_index_t* index; + ulint fold; + ulint old_size; + char* name_buf; + ulint i; ut_ad(table); ut_ad(mutex_own(&(dict_sys->mutex))); @@ -589,6 +641,55 @@ dict_table_rename_in_cache( dict_sys->size += (mem_heap_get_size(table->heap) - old_size); + /* Update the table_name field in indexes */ + index = dict_table_get_first_index(table); + + while (index != NULL) { + index->table_name = table->name; + + index = dict_table_get_next_index(index); + } + + /* Update the table name fields in foreign constraints */ + + foreign = UT_LIST_GET_FIRST(table->foreign_list); + + while (foreign != NULL) { + if (ut_strlen(foreign->foreign_table_name) < + ut_strlen(table->name)) { + /* Allocate a longer name buffer; + TODO: store buf len to save memory */ + foreign->foreign_table_name = mem_heap_alloc( + foreign->heap, + ut_strlen(table->name) + 1); + } + + ut_memcpy(foreign->foreign_table_name, table->name, + ut_strlen(table->name) + 1); + foreign->foreign_table_name[ut_strlen(table->name)] = '\0'; + + foreign = UT_LIST_GET_NEXT(foreign_list, foreign); + } + + foreign = UT_LIST_GET_FIRST(table->referenced_list); + + while (foreign != NULL) { + if (ut_strlen(foreign->referenced_table_name) < + ut_strlen(table->name)) { + /* Allocate a longer name buffer; + TODO: store buf len to save memory */ + foreign->referenced_table_name = mem_heap_alloc( + foreign->heap, + ut_strlen(table->name) + 1); + } + + ut_memcpy(foreign->referenced_table_name, table->name, + ut_strlen(table->name) + 1); + foreign->referenced_table_name[ut_strlen(table->name)] = '\0'; + + foreign = UT_LIST_GET_NEXT(referenced_list, foreign); + } + return(TRUE); } @@ -600,6 +701,7 @@ dict_table_remove_from_cache( /*=========================*/ dict_table_t* table) /* in, own: table */ { + dict_foreign_t* foreign; dict_index_t* index; ulint size; ulint i; @@ -610,6 +712,29 @@ dict_table_remove_from_cache( /* printf("Removing table %s from dictionary cache\n", table->name); */ + /* Remove the foreign constraints from the cache */ + foreign = UT_LIST_GET_LAST(table->foreign_list); + + while (foreign != NULL) { + ut_a(0 == ut_strcmp(foreign->foreign_table_name, table->name)); + + dict_foreign_remove_from_cache(foreign); + foreign = UT_LIST_GET_LAST(table->foreign_list); + } + + /* Reset table field in referencing constraints */ + + foreign = UT_LIST_GET_FIRST(table->referenced_list); + + while (foreign != NULL) { + ut_a(0 == ut_strcmp(foreign->referenced_table_name, + table->name)); + foreign->referenced_table = NULL; + foreign->referenced_index = NULL; + + foreign = UT_LIST_GET_NEXT(referenced_list, foreign); + } + /* Remove the indexes from the cache */ index = UT_LIST_GET_LAST(table->indexes); @@ -856,6 +981,21 @@ dict_index_add_to_cache( new_index->tree = tree; } + if (!(new_index->type & DICT_UNIVERSAL)) { + + new_index->stat_n_diff_key_vals = + mem_heap_alloc(new_index->heap, + (1 + dict_index_get_n_unique(new_index)) + * sizeof(ib_longlong)); + /* Give some sensible values to stat_n_... in case we do + not calculate statistics quickly enough */ + + for (i = 0; i <= dict_index_get_n_unique(new_index); i++) { + + new_index->stat_n_diff_key_vals[i] = 100; + } + } + /* Add the index to the list of indexes stored in the tree */ UT_LIST_ADD_LAST(tree_indexes, tree->tree_indexes, new_index); @@ -1290,6 +1430,654 @@ dict_index_build_internal_non_clust( return(new_index); } +/*====================== FOREIGN KEY PROCESSING ========================*/ + +/************************************************************************* +Frees a foreign key struct. */ +static +void +dict_foreign_free( +/*==============*/ + dict_foreign_t* foreign) /* in, own: foreign key struct */ +{ + mem_heap_free(foreign->heap); +} + +/************************************************************************** +Removes a foreign constraint struct from the dictionary cache. */ +static +void +dict_foreign_remove_from_cache( +/*===========================*/ + dict_foreign_t* foreign) /* in, own: foreign constraint */ +{ + ut_ad(mutex_own(&(dict_sys->mutex))); + ut_a(foreign); + + if (foreign->referenced_table) { + UT_LIST_REMOVE(referenced_list, + foreign->referenced_table->referenced_list, foreign); + } + + if (foreign->foreign_table) { + UT_LIST_REMOVE(foreign_list, + foreign->foreign_table->foreign_list, foreign); + } + + dict_foreign_free(foreign); +} + +/************************************************************************** +Looks for the foreign constraint from the foreign and referenced lists +of a table. */ +static +dict_foreign_t* +dict_foreign_find( +/*==============*/ + /* out: foreign constraint */ + dict_table_t* table, /* in: table object */ + char* id) /* in: foreign constraint id */ +{ + dict_foreign_t* foreign; + + ut_ad(mutex_own(&(dict_sys->mutex))); + + foreign = UT_LIST_GET_FIRST(table->foreign_list); + + while (foreign) { + if (ut_strcmp(id, foreign->id) == 0) { + + return(foreign); + } + + foreign = UT_LIST_GET_NEXT(foreign_list, foreign); + } + + foreign = UT_LIST_GET_FIRST(table->referenced_list); + + while (foreign) { + if (ut_strcmp(id, foreign->id) == 0) { + + return(foreign); + } + + foreign = UT_LIST_GET_NEXT(referenced_list, foreign); + } + + return(NULL); +} + +/************************************************************************* +Tries to find an index whose first fields are the columns in the array, +in the same order. */ +static +dict_index_t* +dict_foreign_find_index( +/*====================*/ + /* out: matching index, NULL if not found */ + dict_table_t* table, /* in: table */ + char** columns,/* in: array of column names */ + ulint n_cols, /* in: number of columns */ + dict_index_t* types_idx)/* in: NULL or an index to whose types the + column types must match */ +{ + dict_index_t* index; + char* col_name; + ulint i; + + index = dict_table_get_first_index(table); + + while (index != NULL) { + if (dict_index_get_n_fields(index) >= n_cols) { + + for (i = 0; i < n_cols; i++) { + col_name = dict_index_get_nth_field(index, i) + ->col->name; + if (ut_strlen(columns[i]) != + ut_strlen(col_name) + || 0 != ut_memcmp(columns[i], + col_name, + ut_strlen(col_name))) { + break; + } + + if (types_idx && !cmp_types_are_equal( + dict_index_get_nth_type(index, i), + dict_index_get_nth_type(types_idx, i))) { + + break; + } + } + + if (i == n_cols) { + /* We found a matching index */ + + return(index); + } + } + + index = dict_table_get_next_index(index); + } + + return(NULL); +} + +/************************************************************************** +Adds a foreign key constraint object to the dictionary cache. May free +the object if there already is an object with the same identifier in. +At least one of the foreign table and the referenced table must already +be in the dictionary cache! */ + +ulint +dict_foreign_add_to_cache( +/*======================*/ + /* out: DB_SUCCESS or error code */ + dict_foreign_t* foreign) /* in, own: foreign key constraint */ +{ + dict_table_t* for_table; + dict_table_t* ref_table; + dict_foreign_t* for_in_cache = NULL; + dict_index_t* index; + + ut_ad(mutex_own(&(dict_sys->mutex))); + + for_table = dict_table_check_if_in_cache_low( + foreign->foreign_table_name); + + ref_table = dict_table_check_if_in_cache_low( + foreign->referenced_table_name); + ut_a(for_table || ref_table); + + if (for_table) { + for_in_cache = dict_foreign_find(for_table, foreign->id); + } + + if (!for_in_cache && ref_table) { + for_in_cache = dict_foreign_find(ref_table, foreign->id); + } + + if (for_in_cache) { + /* Free the foreign object */ + mem_heap_free(foreign->heap); + } else { + for_in_cache = foreign; + } + + if (for_in_cache->referenced_table == NULL && ref_table) { + index = dict_foreign_find_index(ref_table, + for_in_cache->referenced_col_names, + for_in_cache->n_fields, + for_in_cache->foreign_index); + + if (index == NULL) { + if (for_in_cache == foreign) { + mem_heap_free(foreign->heap); + } + return(DB_CANNOT_ADD_CONSTRAINT); + } + + for_in_cache->referenced_table = ref_table; + for_in_cache->referenced_index = index; + UT_LIST_ADD_LAST(referenced_list, + ref_table->referenced_list, + for_in_cache); + } + + if (for_in_cache->foreign_table == NULL && for_table) { + index = dict_foreign_find_index(for_table, + for_in_cache->foreign_col_names, + for_in_cache->n_fields, + for_in_cache->referenced_index); + + if (index == NULL) { + if (for_in_cache == foreign) { + mem_heap_free(foreign->heap); + } + + return(DB_CANNOT_ADD_CONSTRAINT); + } + + for_in_cache->foreign_table = for_table; + for_in_cache->foreign_index = index; + UT_LIST_ADD_LAST(foreign_list, + for_table->foreign_list, + for_in_cache); + } + + return(DB_SUCCESS); +} + +/************************************************************************* +Scans from pointer onwards. Stops if is at the start of a copy of +'string' where characters are compared without case sensitivity. Stops +also at '\0'. */ +static +char* +dict_scan_to( +/*=========*/ + + char* ptr, /* in: scan from */ + char* string) /* in: look for this */ +{ + ibool success; + ulint i; +loop: + if (*ptr == '\0') { + return(ptr); + } + + success = TRUE; + + for (i = 0; i < ut_strlen(string); i++) { + if (toupper((ulint)(ptr[i])) != toupper((ulint)(string[i]))) { + success = FALSE; + + break; + } + } + + if (success) { + + return(ptr); + } + + ptr++; + + goto loop; +} + +/************************************************************************* +Accepts a specified string. Comparisons are case-insensitive. */ +static +char* +dict_accept( +/*========*/ + /* out: if string was accepted, the pointer + is moved after that, else ptr is returned */ + char* ptr, /* in: scan from this */ + char* string, /* in: accept only this string as the next + non-whitespace string */ + ibool* success)/* out: TRUE if accepted */ +{ + char* old_ptr = ptr; + char* old_ptr2; + + *success = FALSE; + + while (isspace(*ptr)) { + ptr++; + } + + old_ptr2 = ptr; + + ptr = dict_scan_to(ptr, string); + + if (*ptr == '\0' || old_ptr2 != ptr) { + return(old_ptr); + } + + *success = TRUE; + + return(ptr + ut_strlen(string)); +} + +/************************************************************************* +Tries to scan a column name. */ +static +char* +dict_scan_col( +/*==========*/ + /* out: scanned to */ + char* ptr, /* in: scanned to */ + ibool* success,/* out: TRUE if success */ + dict_table_t* table, /* in: table in which the column is */ + dict_col_t** column, /* out: pointer to column if success */ + char** column_name)/* out: pointer to column->name if + success */ +{ + dict_col_t* col; + char* old_ptr; + ulint i; + + *success = FALSE; + + while (isspace(*ptr)) { + ptr++; + } + + if (*ptr == '\0') { + + return(ptr); + } + + old_ptr = ptr; + + while (!isspace(*ptr) && *ptr != ',' && *ptr != ')') { + ptr++; + } + + for (i = 0; i < dict_table_get_n_cols(table); i++) { + + col = dict_table_get_nth_col(table, i); + + if (ut_strlen(col->name) == (ulint)(ptr - old_ptr) + && 0 == ut_memcmp(col->name, old_ptr, + (ulint)(ptr - old_ptr))) { + + /* Found */ + + *success = TRUE; + *column = col; + *column_name = col->name; + + break; + } + } + + return(ptr); +} + +/************************************************************************* +Scans the referenced table name from an SQL string. */ +static +char* +dict_scan_table_name( +/*=================*/ + /* out: scanned to */ + char* ptr, /* in: scanned to */ + dict_table_t** table, /* out: table object or NULL if error */ + char* name) /* in: foreign key table name */ +{ + char* dot_ptr = NULL; + char* old_ptr; + ulint i; + char second_table_name[10000]; + + *table = NULL; + + while (isspace(*ptr)) { + ptr++; + } + + if (*ptr == '\0') { + + return(ptr); + } + + old_ptr = ptr; + + while (!isspace(*ptr) && *ptr != '(') { + if (*ptr == '.') { + dot_ptr = ptr; + } + + ptr++; + } + + if (ptr - old_ptr > 9000) { + return(old_ptr); + } + + if (dot_ptr == NULL) { + /* Copy the database name from 'name' to the start */ + for (i = 0;; i++) { + second_table_name[i] = name[i]; + if (name[i] == '/') { + i++; + break; + } + } + + ut_memcpy(second_table_name + i, old_ptr, ptr - old_ptr); + second_table_name[i + (ptr - old_ptr)] = '\0'; + } else { + ut_memcpy(second_table_name, old_ptr, ptr - old_ptr); + second_table_name[dot_ptr - old_ptr] = '/'; + second_table_name[ptr - old_ptr] = '\0'; + } + + *table = dict_table_get_low(second_table_name); + + return(ptr); +} + +/************************************************************************* +Returns the number of opening brackets '(' subtracted by the number +of closing brackets ')' between string and ptr. */ +static +int +dict_bracket_count( +/*===============*/ + /* out: bracket count */ + char* string, /* in: start of string */ + char* ptr) /* in: end of string */ +{ + int count = 0; + + while (string != ptr) { + if (*string == '(') { + count++; + } + if (*string == ')') { + count--; + } + + string++; + } + + return(count); +} + +/************************************************************************* +Scans a table create SQL string and adds to the data dictionary the foreign +key constraints declared in the string. This function should be called after +the indexes for a table have been created. Each foreign key constraint must +be accompanied with indexes in both participating tables. The indexes are +allowed to contain more fields than mentioned in the constraint. */ + +ulint +dict_create_foreign_constraints( +/*============================*/ + /* out: error code or DB_SUCCESS */ + trx_t* trx, /* in: transaction */ + char* sql_string, /* in: table create statement where + foreign keys are declared like: + FOREIGN KEY (a, b) REFERENCES table2(c, d), + table2 can be written also with the database + name before it: test.table2; the default + database id the database of parameter name */ + char* name) /* in: table full name in the normalized form + database_name/table_name */ +{ + dict_table_t* table; + dict_table_t* referenced_table; + dict_index_t* index; + dict_foreign_t* foreign; + char* ptr = sql_string; + ibool success; + ulint error; + ulint i; + dict_col_t* columns[1000]; + char* column_names[1000]; + + ut_ad(mutex_own(&(dict_sys->mutex))); + + table = dict_table_get_low(name); + + if (table == NULL) { + return(DB_ERROR); + } +loop: + ptr = dict_scan_to(ptr, "FOREIGN"); + + if (*ptr == '\0' || dict_bracket_count(sql_string, ptr) != 1) { + + /* The following call adds the foreign key constraints + to the data dictionary system tables on disk */ + + error = dict_create_add_foreigns_to_dictionary(table, trx); + + return(error); + } + + ptr = dict_accept(ptr, "FOREIGN", &success); + + if (!isspace(*ptr)) { + return(DB_CANNOT_ADD_CONSTRAINT); + } + + ptr = dict_accept(ptr, "KEY", &success); + + if (!success) { + goto loop; + } + + ptr = dict_accept(ptr, "(", &success); + + if (!success) { + goto loop; + } + + i = 0; + + /* Scan the columns in the first list */ +col_loop1: + ptr = dict_scan_col(ptr, &success, table, columns + i, + column_names + i); + if (!success) { + return(DB_CANNOT_ADD_CONSTRAINT); + } + + i++; + + ptr = dict_accept(ptr, ",", &success); + + if (success) { + goto col_loop1; + } + + ptr = dict_accept(ptr, ")", &success); + + if (!success) { + return(DB_CANNOT_ADD_CONSTRAINT); + } + + /* Try to find an index which contains the columns + as the first fields and in the right order */ + + index = dict_foreign_find_index(table, column_names, i, NULL); + + if (!index) { + return(DB_CANNOT_ADD_CONSTRAINT); + } + + ptr = dict_accept(ptr, "REFERENCES", &success); + + if (!success || !isspace(*ptr)) { + return(DB_CANNOT_ADD_CONSTRAINT); + } + + /* Let us create a constraint struct */ + + foreign = dict_mem_foreign_create(); + + foreign->foreign_table = table; + foreign->foreign_table_name = table->name; + foreign->foreign_index = index; + foreign->n_fields = i; + foreign->foreign_col_names = mem_heap_alloc(foreign->heap, + i * sizeof(void*)); + for (i = 0; i < foreign->n_fields; i++) { + foreign->foreign_col_names[i] = mem_heap_alloc(foreign->heap, + 1 + ut_strlen(columns[i]->name)); + ut_memcpy(foreign->foreign_col_names[i], columns[i]->name, + 1 + ut_strlen(columns[i]->name)); + } + + ptr = dict_scan_table_name(ptr, &referenced_table, name); + + if (!referenced_table) { + dict_foreign_free(foreign); + + return(DB_CANNOT_ADD_CONSTRAINT); + } + + ptr = dict_accept(ptr, "(", &success); + + if (!success) { + dict_foreign_free(foreign); + return(DB_CANNOT_ADD_CONSTRAINT); + } + + /* Scan the columns in the second list */ + i = 0; + +col_loop2: + ptr = dict_scan_col(ptr, &success, referenced_table, columns + i, + column_names + i); + i++; + + if (!success) { + dict_foreign_free(foreign); + return(DB_CANNOT_ADD_CONSTRAINT); + } + + ptr = dict_accept(ptr, ",", &success); + + if (success) { + goto col_loop2; + } + + ptr = dict_accept(ptr, ")", &success); + + if (!success || foreign->n_fields != i) { + dict_foreign_free(foreign); + + return(DB_CANNOT_ADD_CONSTRAINT); + } + + /* Try to find an index which contains the columns as the first fields + and in the right order, and the types are the same as in + foreign->foreign_index */ + + index = dict_foreign_find_index(referenced_table, column_names, i, + foreign->foreign_index); + + if (!index) { + dict_foreign_free(foreign); + return(DB_CANNOT_ADD_CONSTRAINT); + } + + foreign->referenced_index = index; + foreign->referenced_table = referenced_table; + + foreign->referenced_table_name = mem_heap_alloc(foreign->heap, + 1 + ut_strlen(referenced_table->name)); + + ut_memcpy(foreign->referenced_table_name, referenced_table->name, + 1 + ut_strlen(referenced_table->name)); + + foreign->referenced_col_names = mem_heap_alloc(foreign->heap, + i * sizeof(void*)); + for (i = 0; i < foreign->n_fields; i++) { + foreign->referenced_col_names[i] + = mem_heap_alloc(foreign->heap, + 1 + ut_strlen(columns[i]->name)); + ut_memcpy( + foreign->referenced_col_names[i], columns[i]->name, + 1 + ut_strlen(columns[i]->name)); + } + + /* We found an ok constraint definition: add to the lists */ + + UT_LIST_ADD_LAST(foreign_list, table->foreign_list, foreign); + UT_LIST_ADD_LAST(referenced_list, referenced_table->referenced_list, + foreign); + goto loop; +} + +/*==================== END OF FOREIGN KEY PROCESSING ====================*/ + /************************************************************************** Adds a stored procedure object to the dictionary cache. */ @@ -1733,77 +2521,127 @@ dict_tree_build_data_tuple( } /************************************************************************* -Calculates new estimates for table and index statistics. The statistics -are used in query optimization. */ +Calculates the minimum record length in an index. */ -void -dict_update_statistics( -/*===================*/ - dict_table_t* table) /* in: table */ +ulint +dict_index_calc_min_rec_len( +/*========================*/ + dict_index_t* index) /* in: index */ { - mem_heap_t* heap; - dict_index_t* index; - dtuple_t* start; - dtuple_t* end; - ulint n_rows; - ulint n_vals; - ulint size; - ulint sum_of_index_sizes = 0; - - /* Estimate the number of records in the clustered index */ - index = dict_table_get_first_index(table); - - heap = mem_heap_create(500); - - start = dtuple_create(heap, 0); - end = dtuple_create(heap, 0); + ulint sum = 0; + ulint i; - n_rows = btr_estimate_n_rows_in_range(index, start, PAGE_CUR_G, - end, PAGE_CUR_L); - mem_heap_free(heap); + for (i = 0; i < dict_index_get_n_fields(index); i++) { + sum += dtype_get_fixed_size(dict_index_get_nth_type(index, i)); + } - if (n_rows > 0) { - /* For small tables our estimate function tends to give - values 1 too big */ - n_rows--; + if (sum > 127) { + sum += 2 * dict_index_get_n_fields(index); + } else { + sum += dict_index_get_n_fields(index); } - mutex_enter(&(dict_sys->mutex)); + sum += REC_N_EXTRA_BYTES; - table->stat_last_estimate_counter = table->stat_modif_counter; - table->stat_n_rows = n_rows; + return(sum); +} - mutex_exit(&(dict_sys->mutex)); +/************************************************************************* +Calculates new estimates for table and index statistics. The statistics +are used in query optimization. */ + +void +dict_update_statistics_low( +/*=======================*/ + dict_table_t* table, /* in: table */ + ibool has_dict_mutex) /* in: TRUE if the caller has the + dictionary mutex */ +{ + dict_index_t* index; + ulint size; + ulint sum_of_index_sizes = 0; /* Find out the sizes of the indexes and how many different values for the key they approximately have */ - + + index = dict_table_get_first_index(table); + while (index) { - n_vals = btr_estimate_number_of_different_key_vals(index); size = btr_get_size(index, BTR_TOTAL_SIZE); + index->stat_index_size = size; + sum_of_index_sizes += size; - mutex_enter(&(dict_sys->mutex)); + size = btr_get_size(index, BTR_N_LEAF_PAGES); - index->stat_n_diff_key_vals = n_vals; - index->stat_index_size = size; + if (size == 0) { + /* The root node of the tree is a leaf */ + size = 1; + } - mutex_exit(&(dict_sys->mutex)); + index->stat_n_leaf_pages = size; + + btr_estimate_number_of_different_key_vals(index); index = dict_table_get_next_index(index); } index = dict_table_get_first_index(table); + table->stat_n_rows = index->stat_n_diff_key_vals[ + dict_index_get_n_unique(index)]; + table->stat_clustered_index_size = index->stat_index_size; table->stat_sum_of_other_index_sizes = sum_of_index_sizes - - index->stat_index_size; + - index->stat_index_size; table->stat_last_estimate_counter = table->stat_modif_counter; } +/************************************************************************* +Calculates new estimates for table and index statistics. The statistics +are used in query optimization. */ + +void +dict_update_statistics( +/*===================*/ + dict_table_t* table) /* in: table */ +{ + dict_update_statistics_low(table, FALSE); +} + +/************************************************************************** +Prints info of a foreign key constraint. */ +static +void +dict_foreign_print_low( +/*===================*/ + dict_foreign_t* foreign) /* in: foreign key constraint */ +{ + ulint i; + + ut_ad(mutex_own(&(dict_sys->mutex))); + + printf(" FOREIGN KEY CONSTRAINT %s: %s (", foreign->id, + foreign->foreign_table_name); + + for (i = 0; i < foreign->n_fields; i++) { + printf(" %s", foreign->foreign_col_names[i]); + } + + printf(" )\n"); + + printf(" REFERENCES %s (", foreign->referenced_table_name); + + for (i = 0; i < foreign->n_fields; i++) { + printf(" %s", foreign->referenced_col_names[i]); + } + + printf(" )\n"); +} + /************************************************************************** Prints a table data. */ @@ -1839,31 +2677,57 @@ dict_table_print_by_name( /************************************************************************** Prints a table data. */ -static + void dict_table_print_low( /*=================*/ dict_table_t* table) /* in: table */ { - ulint i; dict_index_t* index; + dict_foreign_t* foreign; + ulint i; ut_ad(mutex_own(&(dict_sys->mutex))); + dict_update_statistics_low(table, TRUE); + printf("--------------------------------------\n"); - printf("TABLE INFO: name %s, columns %lu, indexes %lu\n", table->name, - table->n_cols, UT_LIST_GET_LEN(table->indexes)); - for (i = 0; i < table->n_cols; i++) { - printf(" "); + printf( + "TABLE: name %s, id %lu %lu, columns %lu, indexes %lu, appr.rows %lu\n", + table->name, + ut_dulint_get_high(table->id), + ut_dulint_get_low(table->id), + table->n_cols, UT_LIST_GET_LEN(table->indexes), + (ulint)table->stat_n_rows); + printf(" COLUMNS: "); + + for (i = 0; i < table->n_cols - 1; i++) { dict_col_print_low(dict_table_get_nth_col(table, i)); + printf("; "); } + printf("\n"); + index = UT_LIST_GET_FIRST(table->indexes); while (index != NULL) { dict_index_print_low(index); index = UT_LIST_GET_NEXT(indexes, index); } + + foreign = UT_LIST_GET_FIRST(table->foreign_list); + + while (foreign != NULL) { + dict_foreign_print_low(foreign); + foreign = UT_LIST_GET_NEXT(foreign_list, foreign); + } + + foreign = UT_LIST_GET_FIRST(table->referenced_list); + + while (foreign != NULL) { + dict_foreign_print_low(foreign); + foreign = UT_LIST_GET_NEXT(referenced_list, foreign); + } } /************************************************************************** @@ -1879,7 +2743,7 @@ dict_col_print_low( ut_ad(mutex_own(&(dict_sys->mutex))); type = dict_col_get_type(col); - printf("COLUMN: name %s; ", col->name); + printf("%s: ", col->name); dtype_print(type); } @@ -1892,28 +2756,47 @@ dict_index_print_low( /*=================*/ dict_index_t* index) /* in: index */ { - ulint i; dict_tree_t* tree; + ib_longlong n_vals; + ulint i; ut_ad(mutex_own(&(dict_sys->mutex))); tree = index->tree; - + + if (index->n_user_defined_cols > 0) { + n_vals = index->stat_n_diff_key_vals[ + index->n_user_defined_cols]; + } else { + n_vals = index->stat_n_diff_key_vals[1]; + } + + printf( - "INDEX INFO: name %s, table name %s, fields %lu, type %lu\n", - index->name, index->table_name, index->n_fields, - index->type); - printf(" root node: space %lu, page number %lu\n", - tree->space, tree->page); + " INDEX: name %s, table name %s, id %lu %lu, fields %lu/%lu, type %lu\n", + index->name, index->table_name, + ut_dulint_get_high(tree->id), + ut_dulint_get_low(tree->id), + index->n_user_defined_cols, + index->n_fields, index->type); + printf( + " root page %lu, appr.key vals %lu, leaf pages %lu, size pages %lu\n", + tree->page, + (ulint)n_vals, + index->stat_n_leaf_pages, + index->stat_index_size); + printf(" FIELDS: "); + for (i = 0; i < index->n_fields; i++) { - printf(" "); dict_field_print_low(dict_index_get_nth_field(index, i)); } - btr_print_size(tree); + printf("\n"); + +/* btr_print_size(tree); */ - btr_print_tree(tree, 7); +/* btr_print_tree(tree, 7); */ } /************************************************************************** @@ -1926,6 +2809,5 @@ dict_field_print_low( { ut_ad(mutex_own(&(dict_sys->mutex))); - printf("FIELD: column name %s, order criterion %lu\n", field->name, - field->order); + printf(" %s", field->name); } diff --git a/innobase/dict/dict0load.c b/innobase/dict/dict0load.c index be16988086a..dcdc9ee01cd 100644 --- a/innobase/dict/dict0load.c +++ b/innobase/dict/dict0load.c @@ -48,8 +48,171 @@ dict_load_fields( /************************************************************************ +Finds the first table name in the given database. */ + +char* +dict_get_first_table_name_in_db( +/*============================*/ + /* out, own: table name, NULL if does not exist; + the caller must free the memory in the string! */ + char* name) /* in: database name which ends to '/' */ +{ + dict_table_t* sys_tables; + btr_pcur_t pcur; + dict_index_t* sys_index; + dtuple_t* tuple; + mem_heap_t* heap; + dfield_t* dfield; + rec_t* rec; + byte* field; + ulint len; + char* table_name; + mtr_t mtr; + + ut_ad(mutex_own(&(dict_sys->mutex))); + + heap = mem_heap_create(1000); + + mtr_start(&mtr); + + sys_tables = dict_table_get_low("SYS_TABLES"); + sys_index = UT_LIST_GET_FIRST(sys_tables->indexes); + + tuple = dtuple_create(heap, 1); + dfield = dtuple_get_nth_field(tuple, 0); + + dfield_set_data(dfield, name, ut_strlen(name)); + dict_index_copy_types(tuple, sys_index, 1); + + btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, + BTR_SEARCH_LEAF, &pcur, &mtr); +loop: + rec = btr_pcur_get_rec(&pcur); + + if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) { + /* Not found */ + + btr_pcur_close(&pcur); + mtr_commit(&mtr); + mem_heap_free(heap); + + return(NULL); + } + + field = rec_get_nth_field(rec, 0, &len); + + if (len < strlen(name) + || ut_memcmp(name, field, strlen(name)) != 0) { + /* Not found */ + + btr_pcur_close(&pcur); + mtr_commit(&mtr); + mem_heap_free(heap); + + return(NULL); + } + + if (!rec_get_deleted_flag(rec)) { + + /* We found one */ + + table_name = mem_alloc(len + 1); + ut_memcpy(table_name, field, len); + table_name[len] = '\0'; + + btr_pcur_close(&pcur); + mtr_commit(&mtr); + mem_heap_free(heap); + + return(table_name); + } + + btr_pcur_move_to_next_user_rec(&pcur, &mtr); + + goto loop; +} + +/************************************************************************ +Prints to the standard output information on all tables found in the data +dictionary system table. */ + +void +dict_print(void) +/*============*/ +{ + dict_table_t* sys_tables; + dict_index_t* sys_index; + dict_table_t* table; + btr_pcur_t pcur; + rec_t* rec; + byte* field; + ulint len; + char table_name[10000]; + mtr_t mtr; + + mutex_enter(&(dict_sys->mutex)); + + mtr_start(&mtr); + + sys_tables = dict_table_get_low("SYS_TABLES"); + sys_index = UT_LIST_GET_FIRST(sys_tables->indexes); + + btr_pcur_open_at_index_side(TRUE, sys_index, BTR_SEARCH_LEAF, &pcur, + TRUE, &mtr); +loop: + btr_pcur_move_to_next_user_rec(&pcur, &mtr); + + rec = btr_pcur_get_rec(&pcur); + + if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) { + /* end of index */ + + btr_pcur_close(&pcur); + mtr_commit(&mtr); + + mutex_exit(&(dict_sys->mutex)); + + return; + } + + field = rec_get_nth_field(rec, 0, &len); + + if (!rec_get_deleted_flag(rec)) { + + /* We found one */ + + ut_memcpy(table_name, field, len); + table_name[len] = '\0'; + + btr_pcur_store_position(&pcur, &mtr); + + mtr_commit(&mtr); + + table = dict_table_get_low(table_name); + + if (table == NULL) { + fprintf(stderr, "InnoDB: Failed to load table %s\n", + table_name); + } else { + dict_update_statistics_low(table, TRUE); + + dict_table_print_low(table); + } + + mtr_start(&mtr); + + btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr); + } + + goto loop; +} + +/************************************************************************ Loads a table definition and also all its index definitions, and also -the cluster definition if the table is a member in a cluster. */ +the cluster definition if the table is a member in a cluster. Also loads +all foreign key constraints where the foreign key is in the table or where +a foreign key references columns in this table. Adds all these to the data +dictionary cache. */ dict_table_t* dict_load_table( @@ -59,7 +222,6 @@ dict_load_table( { dict_table_t* table; dict_table_t* sys_tables; - mtr_t mtr; btr_pcur_t pcur; dict_index_t* sys_index; dtuple_t* tuple; @@ -71,6 +233,7 @@ dict_load_table( char* buf; ulint space; ulint n_cols; + mtr_t mtr; ut_ad(mutex_own(&(dict_sys->mutex))); @@ -178,6 +341,106 @@ dict_load_table( dict_load_indexes(table, heap); + ut_a(DB_SUCCESS == dict_load_foreigns(table->name)); + + mem_heap_free(heap); + + return(table); +} + +/*************************************************************************** +Loads a table object based on the table id. */ + +dict_table_t* +dict_load_table_on_id( +/*==================*/ + /* out: table; NULL if table does not exist */ + dulint table_id) /* in: table id */ +{ + byte id_buf[8]; + btr_pcur_t pcur; + mem_heap_t* heap; + dtuple_t* tuple; + dfield_t* dfield; + dict_index_t* sys_table_ids; + dict_table_t* sys_tables; + rec_t* rec; + byte* field; + ulint len; + dict_table_t* table; + char* name; + mtr_t mtr; + + ut_ad(mutex_own(&(dict_sys->mutex))); + + /* NOTE that the operation of this function is protected by + the dictionary mutex, and therefore no deadlocks can occur + with other dictionary operations. */ + + mtr_start(&mtr); + /*---------------------------------------------------*/ + /* Get the secondary index based on ID for table SYS_TABLES */ + sys_tables = dict_sys->sys_tables; + sys_table_ids = dict_table_get_next_index( + dict_table_get_first_index(sys_tables)); + heap = mem_heap_create(256); + + tuple = dtuple_create(heap, 1); + dfield = dtuple_get_nth_field(tuple, 0); + + /* Write the table id in byte format to id_buf */ + mach_write_to_8(id_buf, table_id); + + dfield_set_data(dfield, id_buf, 8); + dict_index_copy_types(tuple, sys_table_ids, 1); + + btr_pcur_open_on_user_rec(sys_table_ids, tuple, PAGE_CUR_GE, + BTR_SEARCH_LEAF, &pcur, &mtr); + rec = btr_pcur_get_rec(&pcur); + + if (!btr_pcur_is_on_user_rec(&pcur, &mtr) + || rec_get_deleted_flag(rec)) { + /* Not found */ + + btr_pcur_close(&pcur); + mtr_commit(&mtr); + mem_heap_free(heap); + + return(NULL); + } + + /*---------------------------------------------------*/ + /* Now we have the record in the secondary index containing the + table ID and NAME */ + + rec = btr_pcur_get_rec(&pcur); + field = rec_get_nth_field(rec, 0, &len); + ut_ad(len == 8); + + /* Check if the table id in record is the one searched for */ + if (ut_dulint_cmp(table_id, mach_read_from_8(field)) != 0) { + + btr_pcur_close(&pcur); + mtr_commit(&mtr); + mem_heap_free(heap); + + return(NULL); + } + + /* Now we get the table name from the record */ + field = rec_get_nth_field(rec, 1, &len); + + name = mem_heap_alloc(heap, len + 1); + ut_memcpy(name, field, len); + name[len] = '\0'; + + /* Load the table definition to memory */ + table = dict_load_table(name); + + ut_a(table); + + btr_pcur_close(&pcur); + mtr_commit(&mtr); mem_heap_free(heap); return(table); @@ -305,7 +568,8 @@ dict_load_columns( } /************************************************************************ -Loads definitions for table indexes. */ +Loads definitions for table indexes. Adds them to the data dictionary cache. +*/ static void dict_load_indexes( @@ -446,7 +710,6 @@ dict_load_fields( { dict_table_t* sys_fields; dict_index_t* sys_index; - mtr_t mtr; btr_pcur_t pcur; dtuple_t* tuple; dfield_t* dfield; @@ -456,6 +719,7 @@ dict_load_fields( ulint len; byte* buf; ulint i; + mtr_t mtr; ut_ad(mutex_own(&(dict_sys->mutex))); @@ -512,100 +776,328 @@ dict_load_fields( mtr_commit(&mtr); } +/************************************************************************ +Loads foreign key constraint col names (also for the referenced table). */ +static +void +dict_load_foreign_cols( +/*===================*/ + char* id, /* in: foreign constraint id as a null- + terminated string */ + dict_foreign_t* foreign)/* in: foreign constraint object */ +{ + dict_table_t* sys_foreign_cols; + dict_index_t* sys_index; + btr_pcur_t pcur; + dtuple_t* tuple; + dfield_t* dfield; + char* col_name; + rec_t* rec; + byte* field; + ulint len; + ulint i; + mtr_t mtr; + + ut_ad(mutex_own(&(dict_sys->mutex))); + + foreign->foreign_col_names = mem_heap_alloc(foreign->heap, + foreign->n_fields * sizeof(void*)); + + foreign->referenced_col_names = mem_heap_alloc(foreign->heap, + foreign->n_fields * sizeof(void*)); + mtr_start(&mtr); + + sys_foreign_cols = dict_table_get_low("SYS_FOREIGN_COLS"); + sys_index = UT_LIST_GET_FIRST(sys_foreign_cols->indexes); + + tuple = dtuple_create(foreign->heap, 1); + dfield = dtuple_get_nth_field(tuple, 0); + + dfield_set_data(dfield, id, ut_strlen(id)); + dict_index_copy_types(tuple, sys_index, 1); + + btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, + BTR_SEARCH_LEAF, &pcur, &mtr); + for (i = 0; i < foreign->n_fields; i++) { + + rec = btr_pcur_get_rec(&pcur); + + ut_a(btr_pcur_is_on_user_rec(&pcur, &mtr)); + ut_a(!rec_get_deleted_flag(rec)); + + field = rec_get_nth_field(rec, 0, &len); + ut_a(len == ut_strlen(id)); + ut_a(ut_memcmp(id, field, len) == 0); + + field = rec_get_nth_field(rec, 1, &len); + ut_a(len == 4); + ut_a(i == mach_read_from_4(field)); + + field = rec_get_nth_field(rec, 4, &len); + + col_name = mem_heap_alloc(foreign->heap, len + 1); + ut_memcpy(col_name, field, len); + col_name[len] = '\0'; + + foreign->foreign_col_names[i] = col_name; + + field = rec_get_nth_field(rec, 5, &len); + + col_name = mem_heap_alloc(foreign->heap, len + 1); + ut_memcpy(col_name, field, len); + col_name[len] = '\0'; + + foreign->referenced_col_names[i] = col_name; + + btr_pcur_move_to_next_user_rec(&pcur, &mtr); + } + + btr_pcur_close(&pcur); + mtr_commit(&mtr); +} + /*************************************************************************** -Loads a table object based on the table id. */ +Loads a foreign key constraint to the dictionary cache. */ +static +ulint +dict_load_foreign( +/*==============*/ + /* out: DB_SUCCESS or error code */ + char* id) /* in: foreign constraint id as a null-terminated + string */ +{ + dict_foreign_t* foreign; + dict_table_t* sys_foreign; + btr_pcur_t pcur; + dict_index_t* sys_index; + dtuple_t* tuple; + mem_heap_t* heap2; + dfield_t* dfield; + rec_t* rec; + byte* field; + ulint len; + ulint err; + mtr_t mtr; + + ut_ad(mutex_own(&(dict_sys->mutex))); -dict_table_t* -dict_load_table_on_id( -/*==================*/ - /* out: table; NULL if table does not exist */ - dulint table_id) /* in: table id */ + heap2 = mem_heap_create(1000); + + mtr_start(&mtr); + + sys_foreign = dict_table_get_low("SYS_FOREIGN"); + sys_index = UT_LIST_GET_FIRST(sys_foreign->indexes); + + tuple = dtuple_create(heap2, 1); + dfield = dtuple_get_nth_field(tuple, 0); + + dfield_set_data(dfield, id, ut_strlen(id)); + dict_index_copy_types(tuple, sys_index, 1); + + btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, + BTR_SEARCH_LEAF, &pcur, &mtr); + rec = btr_pcur_get_rec(&pcur); + + if (!btr_pcur_is_on_user_rec(&pcur, &mtr) + || rec_get_deleted_flag(rec)) { + /* Not found */ + + fprintf(stderr, + "InnoDB: Error A: cannot load foreign constraint %s\n", id); + + btr_pcur_close(&pcur); + mtr_commit(&mtr); + mem_heap_free(heap2); + + return(DB_ERROR); + } + + field = rec_get_nth_field(rec, 0, &len); + + /* Check if the id in record is the searched one */ + if (len != ut_strlen(id) || ut_memcmp(id, field, len) != 0) { + + fprintf(stderr, + "InnoDB: Error B: cannot load foreign constraint %s\n", id); + + btr_pcur_close(&pcur); + mtr_commit(&mtr); + mem_heap_free(heap2); + + return(DB_ERROR); + } + + /* Read the table names and the number of columns associated + with the constraint */ + + mem_heap_free(heap2); + + foreign = dict_mem_foreign_create(); + + foreign->n_fields = mach_read_from_4(rec_get_nth_field(rec, 5, &len)); + + ut_a(len == 4); + + foreign->id = mem_heap_alloc(foreign->heap, ut_strlen(id) + 1); + + ut_memcpy(foreign->id, id, ut_strlen(id) + 1); + + field = rec_get_nth_field(rec, 3, &len); + + foreign->foreign_table_name = mem_heap_alloc(foreign->heap, 1 + len); + + ut_memcpy(foreign->foreign_table_name, field, len); + foreign->foreign_table_name[len] = '\0'; + + field = rec_get_nth_field(rec, 4, &len); + + foreign->referenced_table_name = mem_heap_alloc(foreign->heap, 1 + len); + + ut_memcpy(foreign->referenced_table_name, field, len); + foreign->referenced_table_name[len] = '\0'; + + btr_pcur_close(&pcur); + mtr_commit(&mtr); + + dict_load_foreign_cols(id, foreign); + + /* Note that there may already be a foreign constraint object in + the dictionary cache for this constraint: then the following + call only sets the pointers in it to point to the appropriate table + and index objects and frees the newly created object foreign. */ + + err = dict_foreign_add_to_cache(foreign); + + return(err); +} + +/*************************************************************************** +Loads foreign key constraints where the table is either the foreign key +holder or where the table is referenced by a foreign key. Adds these +constraints to the data dictionary. Note that we know that the dictionary +cache already contains all constraints where the other relevant table is +already in the dictionary cache. */ + +ulint +dict_load_foreigns( +/*===============*/ + /* out: DB_SUCCESS or error code */ + char* table_name) /* in: table name */ { - mtr_t mtr; - byte id_buf[8]; btr_pcur_t pcur; mem_heap_t* heap; dtuple_t* tuple; dfield_t* dfield; - dict_index_t* sys_table_ids; - dict_table_t* sys_tables; + dict_index_t* sec_index; + dict_table_t* sys_foreign; rec_t* rec; byte* field; ulint len; - dict_table_t* table; - char* name; + char* id ; + ulint err; + mtr_t mtr; ut_ad(mutex_own(&(dict_sys->mutex))); - /* NOTE that the operation of this function is protected by - the dictionary mutex, and therefore no deadlocks can occur - with other dictionary operations. */ + sys_foreign = dict_table_get_low("SYS_FOREIGN"); + + if (sys_foreign == NULL) { + /* No foreign keys defined yet in this database */ + + fprintf(stderr, + "InnoDB: Error: no foreign key system tables in the database\n"); + + return(DB_ERROR); + } mtr_start(&mtr); - /*---------------------------------------------------*/ - /* Get the secondary index based on ID for table SYS_TABLES */ - sys_tables = dict_sys->sys_tables; - sys_table_ids = dict_table_get_next_index( - dict_table_get_first_index(sys_tables)); + + /* Get the secondary index based on FOR_NAME from table + SYS_FOREIGN */ + + sec_index = dict_table_get_next_index( + dict_table_get_first_index(sys_foreign)); +start_load: heap = mem_heap_create(256); tuple = dtuple_create(heap, 1); dfield = dtuple_get_nth_field(tuple, 0); - /* Write the table id in byte format to id_buf */ - mach_write_to_8(id_buf, table_id); - - dfield_set_data(dfield, id_buf, 8); - dict_index_copy_types(tuple, sys_table_ids, 1); + dfield_set_data(dfield, table_name, ut_strlen(table_name)); + dict_index_copy_types(tuple, sec_index, 1); - btr_pcur_open_on_user_rec(sys_table_ids, tuple, PAGE_CUR_GE, + btr_pcur_open_on_user_rec(sec_index, tuple, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, &mtr); +loop: rec = btr_pcur_get_rec(&pcur); - if (!btr_pcur_is_on_user_rec(&pcur, &mtr) - || rec_get_deleted_flag(rec)) { - /* Not found */ + if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) { + /* End of index */ - btr_pcur_close(&pcur); - mtr_commit(&mtr); - mem_heap_free(heap); - - return(NULL); + goto load_next_index; } - /*---------------------------------------------------*/ - /* Now we have the record in the secondary index containing the - table ID and NAME */ + /* Now we have the record in the secondary index containing a table + name and a foreign constraint ID */ rec = btr_pcur_get_rec(&pcur); field = rec_get_nth_field(rec, 0, &len); - ut_ad(len == 8); - /* Check if the table id in record is the one searched for */ - if (ut_dulint_cmp(table_id, mach_read_from_8(field)) != 0) { + /* Check if the table name in record is the one searched for */ + if (len != ut_strlen(table_name) + || 0 != ut_memcmp(field, table_name, len)) { - btr_pcur_close(&pcur); - mtr_commit(&mtr); - mem_heap_free(heap); - - return(NULL); + goto load_next_index; } - /* Now we get the table name from the record */ + if (rec_get_deleted_flag(rec)) { + + goto next_rec; + } + + /* Now we get a foreign key constraint id */ field = rec_get_nth_field(rec, 1, &len); - name = mem_heap_alloc(heap, len + 1); - ut_memcpy(name, field, len); - name[len] = '\0'; + id = mem_heap_alloc(heap, len + 1); + ut_memcpy(id, field, len); + id[len] = '\0'; - /* Load the table definition to memory */ - table = dict_load_table(name); + btr_pcur_store_position(&pcur, &mtr); - ut_a(table); + mtr_commit(&mtr); + + /* Load the foreign constraint definition to the dictionary cache */ + err = dict_load_foreign(id); + + if (err != DB_SUCCESS) { + btr_pcur_close(&pcur); + mem_heap_free(heap); + + return(err); + } + + mtr_start(&mtr); + + btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr); +next_rec: + btr_pcur_move_to_next_user_rec(&pcur, &mtr); + + goto loop; + +load_next_index: btr_pcur_close(&pcur); mtr_commit(&mtr); mem_heap_free(heap); + + sec_index = dict_table_get_next_index(sec_index); - return(table); + if (sec_index != NULL) { + + mtr_start(&mtr); + + goto start_load; + } + + return(DB_SUCCESS); } diff --git a/innobase/dict/dict0mem.c b/innobase/dict/dict0mem.c index 6947db11aea..57926ab9d2f 100644 --- a/innobase/dict/dict0mem.c +++ b/innobase/dict/dict0mem.c @@ -18,6 +18,7 @@ Created 1/8/1996 Heikki Tuuri #include "dict0dict.h" #include "que0que.h" #include "pars0pars.h" +#include "lock0lock.h" #define DICT_HEAP_SIZE 100 /* initial memory heap size when creating a table or index object */ @@ -63,7 +64,12 @@ dict_mem_table_create( table->cols = mem_heap_alloc(heap, (n_cols + DATA_N_SYS_COLS) * sizeof(dict_col_t)); UT_LIST_INIT(table->indexes); + + table->auto_inc_lock = mem_heap_alloc(heap, lock_get_size()); + UT_LIST_INIT(table->locks); + UT_LIST_INIT(table->foreign_list); + UT_LIST_INIT(table->referenced_list); table->does_not_fit_in_memory = FALSE; @@ -199,6 +205,8 @@ dict_mem_index_create( * sizeof(dict_field_t)); /* The '1 +' above prevents allocation of an empty mem block */ + index->stat_n_diff_key_vals = NULL; + index->cached = FALSE; index->magic_n = DICT_INDEX_MAGIC_N; @@ -206,6 +214,41 @@ dict_mem_index_create( } /************************************************************************** +Creates and initializes a foreign constraint memory object. */ + +dict_foreign_t* +dict_mem_foreign_create(void) +/*=========================*/ + /* out, own: foreign constraint struct */ +{ + dict_foreign_t* foreign; + mem_heap_t* heap; + + heap = mem_heap_create(100); + + foreign = mem_heap_alloc(heap, sizeof(dict_foreign_t)); + + foreign->heap = heap; + + foreign->id = NULL; + + foreign->foreign_table_name = NULL; + foreign->foreign_table = NULL; + foreign->foreign_col_names = NULL; + + foreign->referenced_table_name = NULL; + foreign->referenced_table = NULL; + foreign->referenced_col_names = NULL; + + foreign->n_fields = 0; + + foreign->foreign_index = NULL; + foreign->referenced_index = NULL; + + return(foreign); +} + +/************************************************************************** Adds a field definition to an index. NOTE: does not take a copy of the column name if the field is a column. The memory occupied by the column name may be released only after publishing the index. */ diff --git a/innobase/fil/fil0fil.c b/innobase/fil/fil0fil.c index b386f224d11..62389c8394c 100644 --- a/innobase/fil/fil0fil.c +++ b/innobase/fil/fil0fil.c @@ -77,6 +77,9 @@ out of the LRU-list and keep a count of pending operations. When an operation completes, we decrement the count and return the file node to the LRU-list if the count drops to zero. */ +ulint fil_n_pending_log_flushes = 0; +ulint fil_n_pending_tablespace_flushes = 0; + /* Null file address */ fil_addr_t fil_addr_null = {FIL_NULL, 0}; @@ -856,6 +859,15 @@ fil_node_prepare_for_io( last_node = UT_LIST_GET_LAST(system->LRU); + if (last_node == NULL) { + fprintf(stderr, + "InnoDB: Error: cannot close any file to open another for i/o\n" + "InnoDB: Pending i/o's on %lu files exist\n", + system->n_open_pending); + + ut_a(0); + } + fil_node_close(last_node, system); } @@ -973,7 +985,8 @@ fil_io( ibool ret; ulint is_log; ulint wake_later; - + ulint count; + is_log = type & OS_FILE_LOG; type = type & ~OS_FILE_LOG; @@ -996,7 +1009,7 @@ fil_io( #endif if (sync) { mode = OS_AIO_SYNC; - } else if ((type == OS_FILE_READ) && !is_log + } else if (type == OS_FILE_READ && !is_log && ibuf_page(space_id, block_offset)) { mode = OS_AIO_IBUF; } else if (is_log) { @@ -1006,9 +1019,44 @@ fil_io( } system = fil_system; + + count = 0; loop: + count++; + + /* NOTE that there is a possibility of a hang here: + if the read i/o-handler thread needs to complete + a read by reading from the insert buffer, it may need to + post another read. But if the maximum number of files + are already open, it cannot proceed from here! */ + mutex_enter(&(system->mutex)); + if (count < 500 && !is_log && !ibuf_inside() + && system->n_open_pending >= (3 * system->max_n_open) / 4) { + + /* We are not doing an ibuf operation: leave a + safety margin of openable files for possible ibuf + merges needed in page read completion */ + + mutex_exit(&(system->mutex)); + + /* Wake the i/o-handler threads to make sure pending + i/o's are handled and eventually we can open the file */ + + os_aio_simulated_wake_handler_threads(); + + os_thread_sleep(100000); + + if (count > 50) { + fprintf(stderr, + "InnoDB: Warning: waiting for file closes to proceed\n" + "InnoDB: round %lu\n", count); + } + + goto loop; + } + if (system->n_open_pending == system->max_n_open) { /* It is not sure we can open the file if it is closed: wait */ @@ -1018,11 +1066,19 @@ loop: mutex_exit(&(system->mutex)); + /* Wake the i/o-handler threads to make sure pending + i/o's are handled and eventually we can open the file */ + + os_aio_simulated_wake_handler_threads(); + + fprintf(stderr, + "InnoDB: Warning: max allowed number of files is open\n"); + os_event_wait(event); goto loop; } - + HASH_SEARCH(hash, system->spaces, space_id, space, space->id == space_id); ut_a(space); @@ -1160,6 +1216,7 @@ fil_aio_wait( #elif defined(POSIX_ASYNC_IO) ret = os_aio_posix_handle(segment, &fil_node, &message); #else + ret = 0; /* Eliminate compiler warning */ ut_a(0); #endif } else { @@ -1220,6 +1277,12 @@ fil_flush( node->is_modified = FALSE; + if (space->purpose == FIL_TABLESPACE) { + fil_n_pending_tablespace_flushes++; + } else { + fil_n_pending_log_flushes++; + } + mutex_exit(&(system->mutex)); /* Note that it is not certain, when we have @@ -1233,6 +1296,12 @@ fil_flush( os_file_flush(file); mutex_enter(&(system->mutex)); + + if (space->purpose == FIL_TABLESPACE) { + fil_n_pending_tablespace_flushes--; + } else { + fil_n_pending_log_flushes--; + } } node = UT_LIST_GET_NEXT(chain, node); @@ -1377,7 +1446,7 @@ fil_page_set_type( ulint type) /* in: type */ { ut_ad(page); - ut_ad((type == FIL_PAGE_INDEX) || (type == FIL_PAGE_INDEX)); + ut_ad((type == FIL_PAGE_INDEX) || (type == FIL_PAGE_UNDO_LOG)); mach_write_to_2(page + FIL_PAGE_TYPE, type); } diff --git a/innobase/ibuf/ibuf0ibuf.c b/innobase/ibuf/ibuf0ibuf.c index fa1c630dc08..d289b176efa 100644 --- a/innobase/ibuf/ibuf0ibuf.c +++ b/innobase/ibuf/ibuf0ibuf.c @@ -1013,7 +1013,7 @@ ibuf_rec_get_volume( ulint i; ut_ad(ibuf_inside()); - ut_ad(rec_get_n_fields(rec) > 2); + ut_ad(rec_get_n_fields(ibuf_rec) > 2); n_fields = rec_get_n_fields(ibuf_rec) - 2; @@ -1624,13 +1624,14 @@ ibuf_get_merge_page_nos( /************************************************************************* Contracts insert buffer trees by reading pages to the buffer pool. */ - +static ulint -ibuf_contract( -/*==========*/ +ibuf_contract_ext( +/*==============*/ /* out: a lower limit for the combined size in bytes of entries which will be merged from ibuf trees to the pages read, 0 if ibuf is empty */ + ulint* n_pages,/* out: number of pages to which merged */ ibool sync) /* in: TRUE if the caller wants to wait for the issued read with the highest tablespace address to complete */ @@ -1644,6 +1645,8 @@ ibuf_contract( ulint n_stored; ulint sum_sizes; mtr_t mtr; + + *n_pages = 0; loop: ut_ad(!ibuf_inside()); @@ -1730,10 +1733,65 @@ loop: buf_read_ibuf_merge_pages(sync, space, page_nos, n_stored); + *n_pages = n_stored; + return(sum_sizes + 1); } /************************************************************************* +Contracts insert buffer trees by reading pages to the buffer pool. */ + +ulint +ibuf_contract( +/*==========*/ + /* out: a lower limit for the combined size in bytes + of entries which will be merged from ibuf trees to the + pages read, 0 if ibuf is empty */ + ibool sync) /* in: TRUE if the caller wants to wait for the + issued read with the highest tablespace address + to complete */ +{ + ulint n_pages; + + return(ibuf_contract_ext(&n_pages, sync)); +} + +/************************************************************************* +Contracts insert buffer trees by reading pages to the buffer pool. */ + +ulint +ibuf_contract_for_n_pages( +/*======================*/ + /* out: a lower limit for the combined size in bytes + of entries which will be merged from ibuf trees to the + pages read, 0 if ibuf is empty */ + ibool sync, /* in: TRUE if the caller wants to wait for the + issued read with the highest tablespace address + to complete */ + ulint n_pages)/* in: try to read at least this many pages to + the buffer pool and merge the ibuf contents to + them */ +{ + ulint sum_bytes = 0; + ulint sum_pages = 0; + ulint n_bytes; + ulint n_pag2; + + while (sum_pages < n_pages) { + n_bytes = ibuf_contract_ext(&n_pag2, sync); + + if (n_bytes == 0) { + return(sum_bytes); + } + + sum_bytes += n_bytes; + sum_pages += n_pag2; + } + + return(sum_bytes); +} + +/************************************************************************* Contract insert buffer trees after insert if they are too big. */ UNIV_INLINE void @@ -2252,8 +2310,6 @@ ibuf_insert_to_index_page( if (low_match == dtuple_get_n_fields(entry)) { rec = page_cur_get_rec(&page_cur); - - ut_ad(rec_get_deleted_flag(rec)); btr_cur_del_unmark_for_ibuf(rec, mtr); } else { @@ -2306,6 +2362,8 @@ ibuf_delete_rec( should belong */ btr_pcur_t* pcur, /* in: pcur positioned on the record to delete, having latch mode BTR_MODIFY_LEAF */ + dtuple_t* search_tuple, + /* in: search tuple for entries of page_no */ mtr_t* mtr) /* in: mtr */ { ibool success; @@ -2336,12 +2394,33 @@ ibuf_delete_rec( mtr_start(mtr); - ut_a(btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr)); + success = btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr); + + if (!success) { + fprintf(stderr, + "InnoDB: ERROR: Send the output to heikki.tuuri@innodb.com\n"); + fprintf(stderr, "InnoDB: ibuf cursor restoration fails!\n"); + fprintf(stderr, "InnoDB: ibuf record inserted to page %lu\n", + page_no); + rec_print(btr_pcur_get_rec(pcur)); + rec_print(pcur->old_rec); + dtuple_print(search_tuple); + + rec_print(page_rec_get_next(btr_pcur_get_rec(pcur))); + + mtr_commit(mtr); + + fprintf(stderr, "InnoDB: Validating insert buffer tree:\n"); + ut_a(btr_validate_tree(ibuf_data->index->tree)); + fprintf(stderr, "InnoDB: Ibuf tree ok\n"); + } + + ut_a(success); root = ibuf_tree_root_get(ibuf_data, space, mtr); btr_cur_pessimistic_delete(&err, TRUE, btr_pcur_get_btr_cur(pcur), - FALSE, mtr); + FALSE, mtr); ut_a(err == DB_SUCCESS); #ifdef UNIV_IBUF_DEBUG @@ -2393,8 +2472,11 @@ ibuf_merge_or_delete_for_page( dulint max_trx_id; mtr_t mtr; - /* TODO: get MySQL type info to use in ibuf_insert_to_index_page */ + if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) { + return; + } + #ifdef UNIV_LOG_DEBUG if (space % 2 != 0) { @@ -2451,16 +2533,13 @@ loop: if (page) { success = buf_page_get_known_nowait(RW_X_LATCH, page, BUF_KEEP_OLD, -#ifdef UNIV_SYNC_DEBUG IB__FILE__, __LINE__, -#endif &mtr); - ut_a(success); buf_page_dbg_add_level(page, SYNC_TREE_NODE); } - + /* Position pcur in the insert buffer at the first entry for this index page */ btr_pcur_open_on_user_rec(ibuf_data->index, search_tuple, PAGE_CUR_GE, @@ -2476,7 +2555,7 @@ loop: ut_ad(btr_pcur_is_on_user_rec(&pcur, &mtr)); ibuf_rec = btr_pcur_get_rec(&pcur); - + /* Check if the entry is for this index page */ if (ibuf_rec_get_page_no(ibuf_rec) != page_no) { @@ -2508,13 +2587,13 @@ loop: / IBUF_PAGE_SIZE_PER_FREE_SPACE); #endif ibuf_insert_to_index_page(entry, page, &mtr); - - n_inserts++; } + + n_inserts++; /* Delete the record from ibuf */ - closed = ibuf_delete_rec(space, page_no, &pcur, &mtr); - + closed = ibuf_delete_rec(space, page_no, &pcur, search_tuple, + &mtr); if (closed) { /* Deletion was pessimistic and mtr was committed: we start from the beginning again */ @@ -2524,6 +2603,7 @@ loop: if (btr_pcur_is_after_last_on_page(&pcur, &mtr)) { mtr_commit(&mtr); + btr_pcur_close(&pcur); goto loop; } @@ -2619,8 +2699,6 @@ ibuf_print(void) #endif mutex_enter(&ibuf_mutex); - printf("Ibuf size %lu max size %lu\n", ibuf->size, ibuf->max_size); - data = UT_LIST_GET_FIRST(ibuf->data_list); while (data) { diff --git a/innobase/include/btr0cur.h b/innobase/include/btr0cur.h index f56a5662253..bce1f0685cc 100644 --- a/innobase/include/btr0cur.h +++ b/innobase/include/btr0cur.h @@ -188,6 +188,22 @@ btr_cur_pessimistic_insert( que_thr_t* thr, /* in: query thread or NULL */ mtr_t* mtr); /* in: mtr */ /***************************************************************** +Updates a secondary index record when the update causes no size +changes in its fields. The only case when this function is currently +called is that in a char field characters change to others which +are identified in the collation order. */ + +ulint +btr_cur_update_sec_rec_in_place( +/*============================*/ + /* out: DB_SUCCESS or error number */ + btr_cur_t* cursor, /* in: cursor on the record to update; + cursor stays valid and positioned on the + same record */ + upd_t* update, /* in: update vector */ + que_thr_t* thr, /* in: query thread */ + mtr_t* mtr); /* in: mtr */ +/***************************************************************** Updates a record when the update causes no size changes in its fields. */ ulint @@ -411,12 +427,13 @@ btr_estimate_n_rows_in_range( dtuple_t* tuple2, /* in: range end, may also be empty tuple */ ulint mode2); /* in: search mode for range end */ /*********************************************************************** -Estimates the number of different key values in a given index. */ +Estimates the number of different key values in a given index, for +each n-column prefix of the index where n <= dict_index_get_n_unique(index). +The estimates are stored in the array index->stat_n_diff_key_vals. */ -ulint +void btr_estimate_number_of_different_key_vals( /*======================================*/ - /* out: estimated number of key values */ dict_index_t* index); /* in: index */ /*********************************************************************** Marks not updated extern fields as not-owned by this record. The ownership diff --git a/innobase/include/btr0pcur.h b/innobase/include/btr0pcur.h index 6465093e3c1..05b55e4491d 100644 --- a/innobase/include/btr0pcur.h +++ b/innobase/include/btr0pcur.h @@ -19,9 +19,15 @@ Created 2/23/1996 Heikki Tuuri #include "btr0types.h" /* Relative positions for a stored cursor position */ -#define BTR_PCUR_ON 1 -#define BTR_PCUR_BEFORE 2 -#define BTR_PCUR_AFTER 3 +#define BTR_PCUR_ON 1 +#define BTR_PCUR_BEFORE 2 +#define BTR_PCUR_AFTER 3 +/* Note that if the tree is not empty, btr_pcur_store_position does not +use the following, but only uses the above three alternatives, where the +position is stored relative to a specific record: this makes implementation +of a scroll cursor easier */ +#define BTR_PCUR_BEFORE_FIRST_IN_TREE 4 /* in an empty tree */ +#define BTR_PCUR_AFTER_LAST_IN_TREE 5 /* in an empty tree */ /****************************************************************** Allocates memory for a persistent cursor object and initializes the cursor. */ @@ -170,34 +176,16 @@ btr_pcur_close( /****************************************************************** The position of the cursor is stored by taking an initial segment of the record the cursor is positioned on, before, or after, and copying it to the -cursor data structure. NOTE that the page where the cursor is positioned -must not be empty! */ +cursor data structure, or just setting a flag if the cursor id before the +first in an EMPTY tree, or after the last in an EMPTY tree. NOTE that the +page where the cursor is positioned must not be empty if the index tree is +not totally empty! */ void btr_pcur_store_position( /*====================*/ - btr_pcur_t* cursor, /* in: persistent cursor */ - mtr_t* mtr); /* in: mtr */ -/****************************************************************** -If the latch mode of the cursor is BTR_LEAF_SEARCH or BTR_LEAF_MODIFY, -releases the page latch and bufferfix reserved by the cursor. -NOTE! In the case of BTR_LEAF_MODIFY, there should not exist changes -made by the current mini-transaction to the data protected by the -cursor latch, as then the latch must not be released until mtr_commit. */ - -void -btr_pcur_release_leaf( -/*==================*/ btr_pcur_t* cursor, /* in: persistent cursor */ mtr_t* mtr); /* in: mtr */ -/************************************************************* -Gets the rel_pos field for a cursor whose position has been stored. */ -UNIV_INLINE -ulint -btr_pcur_get_rel_pos( -/*=================*/ - /* out: BTR_PCUR_ON, ... */ - btr_pcur_t* cursor);/* in: persistent cursor */ /****************************************************************** Restores the stored position of a persistent cursor bufferfixing the page and obtaining the specified latches. If the cursor position was saved when the @@ -207,7 +195,9 @@ to the last record LESS OR EQUAL to the stored record; the last record LESS than the user record which was the successor of the page infimum; (3) cursor was positioned on the page supremum: restores to the first record -GREATER than the user record which was the predecessor of the supremum. */ +GREATER than the user record which was the predecessor of the supremum. +(4) cursor was positioned before the first or after the last in an empty tree: +restores to before first or after the last in the tree. */ ibool btr_pcur_restore_position( @@ -220,6 +210,26 @@ btr_pcur_restore_position( ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */ btr_pcur_t* cursor, /* in: detached persistent cursor */ mtr_t* mtr); /* in: mtr */ +/****************************************************************** +If the latch mode of the cursor is BTR_LEAF_SEARCH or BTR_LEAF_MODIFY, +releases the page latch and bufferfix reserved by the cursor. +NOTE! In the case of BTR_LEAF_MODIFY, there should not exist changes +made by the current mini-transaction to the data protected by the +cursor latch, as then the latch must not be released until mtr_commit. */ + +void +btr_pcur_release_leaf( +/*==================*/ + btr_pcur_t* cursor, /* in: persistent cursor */ + mtr_t* mtr); /* in: mtr */ +/************************************************************* +Gets the rel_pos field for a cursor whose position has been stored. */ +UNIV_INLINE +ulint +btr_pcur_get_rel_pos( +/*=================*/ + /* out: BTR_PCUR_ON, ... */ + btr_pcur_t* cursor);/* in: persistent cursor */ /************************************************************* Sets the mtr field for a pcur. */ UNIV_INLINE @@ -458,7 +468,7 @@ struct btr_pcur_struct{ ulint search_mode; /* PAGE_CUR_G, ... */ /*-----------------------------*/ /* NOTE that the following fields may possess dynamically allocated - memory, which should be freed if not needed anymore! */ + memory which should be freed if not needed anymore! */ mtr_t* mtr; /* NULL, or this field may contain a mini-transaction which holds the diff --git a/innobase/include/btr0pcur.ic b/innobase/include/btr0pcur.ic index 8e927689208..a60140e4aa9 100644 --- a/innobase/include/btr0pcur.ic +++ b/innobase/include/btr0pcur.ic @@ -19,8 +19,8 @@ btr_pcur_get_rel_pos( ut_ad(cursor); ut_ad(cursor->old_rec); ut_ad(cursor->old_stored == BTR_PCUR_OLD_STORED); - ut_ad((cursor->pos_state == BTR_PCUR_WAS_POSITIONED) - || (cursor->pos_state == BTR_PCUR_IS_POSITIONED)); + ut_ad(cursor->pos_state == BTR_PCUR_WAS_POSITIONED + || cursor->pos_state == BTR_PCUR_IS_POSITIONED); return(cursor->rel_pos); } diff --git a/innobase/include/btr0sea.h b/innobase/include/btr0sea.h index c319e16d740..fdf5cf375a3 100644 --- a/innobase/include/btr0sea.h +++ b/innobase/include/btr0sea.h @@ -262,6 +262,12 @@ index */ #define BTR_SEARCH_ON_HASH_LIMIT 3 +/* We do this many searches before trying to keep the search latch over calls +from MySQL. If we notice someone waiting for the latch, we again set this +much timeout. This is to reduce contention. */ + +#define BTR_SEA_TIMEOUT 10000 + #ifndef UNIV_NONINL #include "btr0sea.ic" #endif diff --git a/innobase/include/buf0buf.h b/innobase/include/buf0buf.h index 66071030402..5ddbf39335a 100644 --- a/innobase/include/buf0buf.h +++ b/innobase/include/buf0buf.h @@ -116,53 +116,30 @@ buf_frame_copy( NOTE! The following macros should be used instead of buf_page_get_gen, to improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed in LA! */ -#ifdef UNIV_SYNC_DEBUG #define buf_page_get(SP, OF, LA, MTR) buf_page_get_gen(\ SP, OF, LA, NULL,\ BUF_GET, IB__FILE__, __LINE__, MTR) -#else -#define buf_page_get(SP, OF, LA, MTR) buf_page_get_gen(\ - SP, OF, LA, NULL,\ - BUF_GET, MTR) -#endif /****************************************************************** Use these macros to bufferfix a page with no latching. Remember not to read the contents of the page unless you know it is safe. Do not modify the contents of the page! We have separated this case, because it is error-prone programming not to set a latch, and it should be used with care. */ -#ifdef UNIV_SYNC_DEBUG #define buf_page_get_with_no_latch(SP, OF, MTR) buf_page_get_gen(\ SP, OF, RW_NO_LATCH, NULL,\ BUF_GET_NO_LATCH, IB__FILE__, __LINE__, MTR) -#else -#define buf_page_get_with_no_latch(SP, OF, MTR) buf_page_get_gen(\ - SP, OF, RW_NO_LATCH, NULL,\ - BUF_GET_NO_LATCH, MTR) -#endif /****************************************************************** NOTE! The following macros should be used instead of buf_page_get_gen, to improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed as LA! */ -#ifdef UNIV_SYNC_DEBUG #define buf_page_get_nowait(SP, OF, LA, MTR) buf_page_get_gen(\ SP, OF, LA, NULL,\ BUF_GET_NOWAIT, IB__FILE__, __LINE__, MTR) -#else -#define buf_page_get_nowait(SP, OF, LA, MTR) buf_page_get_gen(\ - SP, OF, LA, NULL,\ - BUF_GET_NOWAIT, MTR) -#endif /****************************************************************** NOTE! The following macros should be used instead of buf_page_optimistic_get_func, to improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed as LA! */ -#ifdef UNIV_SYNC_DEBUG #define buf_page_optimistic_get(LA, G, MC, MTR) buf_page_optimistic_get_func(\ LA, G, MC, IB__FILE__, __LINE__, MTR) -#else -#define buf_page_optimistic_get(LA, G, MC, MTR) buf_page_optimistic_get_func(\ - LA, G, MC, MTR) -#endif /************************************************************************ This is the general function used to get optimistic access to a database page. */ @@ -175,10 +152,8 @@ buf_page_optimistic_get_func( buf_frame_t* guess, /* in: guessed frame */ dulint modify_clock,/* in: modify clock value if mode is ..._GUESS_ON_CLOCK */ -#ifdef UNIV_SYNC_DEBUG char* file, /* in: file name */ ulint line, /* in: line where called */ -#endif mtr_t* mtr); /* in: mini-transaction */ /************************************************************************ Tries to get the page, but if file io is required, releases all latches @@ -210,10 +185,8 @@ buf_page_get_known_nowait( ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */ buf_frame_t* guess, /* in: the known page frame */ ulint mode, /* in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */ -#ifdef UNIV_SYNC_DEBUG char* file, /* in: file name */ ulint line, /* in: line where called */ -#endif mtr_t* mtr); /* in: mini-transaction */ /************************************************************************ This is the general function used to get access to a database page. */ @@ -228,10 +201,8 @@ buf_page_get_gen( buf_frame_t* guess, /* in: guessed frame or NULL */ ulint mode, /* in: BUF_GET, BUF_GET_IF_IN_POOL, BUF_GET_NO_LATCH */ -#ifdef UNIV_SYNC_DEBUG char* file, /* in: file name */ ulint line, /* in: line where called */ -#endif mtr_t* mtr); /* in: mini-transaction */ /************************************************************************ Initializes a page to the buffer buf_pool. The page is usually not read @@ -455,6 +426,13 @@ Validates the buffer pool data structure. */ ibool buf_validate(void); /*==============*/ +/************************************************************************ +Prints a page to stderr. */ + +void +buf_page_print( +/*===========*/ + byte* read_buf); /* in: a database page */ /************************************************************************* Prints info of the buffer pool data structure. */ @@ -462,6 +440,12 @@ void buf_print(void); /*===========*/ /************************************************************************* +Returns the number of pending buf pool ios. */ + +ulint +buf_get_n_pending_ios(void); +/*=======================*/ +/************************************************************************* Prints info of the buffer i/o. */ void @@ -760,6 +744,8 @@ struct buf_pool_struct{ byte* frame_zero; /* pointer to the first buffer frame: this may differ from frame_mem, because this is aligned by the frame size */ + byte* high_end; /* pointer to the end of the + buffer pool */ buf_block_t* blocks; /* array of buffer control blocks */ ulint max_size; /* number of control blocks == maximum pool size in pages */ @@ -767,6 +753,9 @@ struct buf_pool_struct{ hash_table_t* page_hash; /* hash table of the file pages */ ulint n_pend_reads; /* number of pending read operations */ + + time_t last_printout_time; /* when buf_print was last time + called */ ulint n_pages_read; /* number read operations */ ulint n_pages_written;/* number write operations */ ulint n_pages_created;/* number of pages created in the pool @@ -782,6 +771,9 @@ struct buf_pool_struct{ hit rate */ ulint n_pages_read_old;/* n_pages_read when buf_print was last time called */ + ulint n_pages_written_old;/* number write operations */ + ulint n_pages_created_old;/* number of pages created in + the pool with no read */ /* 2. Page flushing algorithm fields */ UT_LIST_BASE_NODE_T(buf_block_t) flush_list; diff --git a/innobase/include/buf0buf.ic b/innobase/include/buf0buf.ic index 1ecc6f34a78..3d88d087e63 100644 --- a/innobase/include/buf0buf.ic +++ b/innobase/include/buf0buf.ic @@ -486,11 +486,7 @@ buf_block_buf_fix_inc_debug( { ibool ret; - ret = rw_lock_s_lock_func_nowait(&(block->debug_latch) -#ifdef UNIV_SYNC_DEBUG - ,file, line -#endif - ); + ret = rw_lock_s_lock_func_nowait(&(block->debug_latch), file, line); ut_ad(ret == TRUE); @@ -557,9 +553,7 @@ buf_page_get_release_on_io( frame = buf_page_get_gen(space, offset, rw_latch, guess, BUF_GET_IF_IN_POOL, -#ifdef UNIV_SYNC_DEBUG IB__FILE__, __LINE__, -#endif mtr); if (frame != NULL) { diff --git a/innobase/include/data0data.h b/innobase/include/data0data.h index c314281d758..c19d7ea5552 100644 --- a/innobase/include/data0data.h +++ b/innobase/include/data0data.h @@ -116,8 +116,8 @@ dfield_copy( Tests if data length and content is equal for two dfields. */ UNIV_INLINE ibool -dfield_datas_are_equal( -/*===================*/ +dfield_datas_are_binary_equal( +/*==========================*/ /* out: TRUE if equal */ dfield_t* field1, /* in: field */ dfield_t* field2);/* in: field */ @@ -125,8 +125,8 @@ dfield_datas_are_equal( Tests if dfield data length and content is equal to the given. */ UNIV_INLINE ibool -dfield_data_is_equal( -/*=================*/ +dfield_data_is_binary_equal( +/*========================*/ /* out: TRUE if equal */ dfield_t* field, /* in: field */ ulint len, /* in: data length or UNIV_SQL_NULL */ @@ -230,14 +230,18 @@ dtuple_get_data_size( dtuple_t* tuple); /* in: typed data tuple */ /**************************************************************** Returns TRUE if lengths of two dtuples are equal and respective data fields -in them are equal. */ -UNIV_INLINE +in them are equal when compared with collation in char fields (not as binary +strings). */ + ibool -dtuple_datas_are_equal( -/*===================*/ - /* out: TRUE if length and datas are equal */ +dtuple_datas_are_ordering_equal( +/*============================*/ + /* out: TRUE if length and fieds are equal + when compared with cmp_data_data: + NOTE: in character type fields some letters + are identified with others! (collation) */ dtuple_t* tuple1, /* in: tuple 1 */ - dtuple_t* tuple2); /* in: tuple 2 */ + dtuple_t* tuple2);/* in: tuple 2 */ /**************************************************************** Folds a prefix given as the number of fields of a tuple. */ UNIV_INLINE @@ -447,7 +451,7 @@ struct dfield_struct{ struct dtuple_struct { ulint info_bits; /* info bits of an index record: - default is 0; this field is used + the default is 0; this field is used if an index record is built from a data tuple */ ulint n_fields; /* number of fields in dtuple */ diff --git a/innobase/include/data0data.ic b/innobase/include/data0data.ic index b886ad6c69c..0750a3894d1 100644 --- a/innobase/include/data0data.ic +++ b/innobase/include/data0data.ic @@ -133,8 +133,8 @@ dfield_copy( Tests if data length and content is equal for two dfields. */ UNIV_INLINE ibool -dfield_datas_are_equal( -/*===================*/ +dfield_datas_are_binary_equal( +/*==========================*/ /* out: TRUE if equal */ dfield_t* field1, /* in: field */ dfield_t* field2) /* in: field */ @@ -157,8 +157,8 @@ dfield_datas_are_equal( Tests if dfield data length and content is equal to the given. */ UNIV_INLINE ibool -dfield_data_is_equal( -/*=================*/ +dfield_data_is_binary_equal( +/*========================*/ /* out: TRUE if equal */ dfield_t* field, /* in: field */ ulint len, /* in: data length or UNIV_SQL_NULL */ @@ -169,8 +169,7 @@ dfield_data_is_equal( return(FALSE); } - if ((len != UNIV_SQL_NULL) - && (0 != ut_memcmp(field->data, data, len))) { + if (len != UNIV_SQL_NULL && 0 != ut_memcmp(field->data, data, len)) { return(FALSE); } @@ -342,65 +341,6 @@ dtuple_get_data_size( return(sum); } -/**************************************************************** -Returns TRUE if lengths of two dtuples are equal and respective data fields -in them are equal. */ -UNIV_INLINE -ibool -dtuple_datas_are_equal( -/*===================*/ - /* out: TRUE if length and datas are equal */ - dtuple_t* tuple1, /* in: tuple 1 */ - dtuple_t* tuple2) /* in: tuple 2 */ -{ - dfield_t* field1; - dfield_t* field2; - ulint n_fields; - byte* data1; - byte* data2; - ulint len1; - ulint len2; - ulint i; - - ut_ad(tuple1 && tuple2); - ut_ad(tuple1->magic_n = DATA_TUPLE_MAGIC_N); - ut_ad(tuple2->magic_n = DATA_TUPLE_MAGIC_N); - ut_ad(dtuple_check_typed(tuple1)); - ut_ad(dtuple_check_typed(tuple2)); - - n_fields = dtuple_get_n_fields(tuple1); - - if (n_fields != dtuple_get_n_fields(tuple2)) { - - return(FALSE); - } - - for (i = 0; i < n_fields; i++) { - - field1 = dtuple_get_nth_field(tuple1, i); - data1 = (byte*) dfield_get_data(field1); - len1 = dfield_get_len(field1); - - field2 = dtuple_get_nth_field(tuple2, i); - data2 = (byte*) dfield_get_data(field2); - len2 = dfield_get_len(field2); - - if (len1 != len2) { - - return(FALSE); - } - - if (len1 != UNIV_SQL_NULL) { - if (ut_memcmp(data1, data2, len1) != 0) { - - return(FALSE); - } - } - } - - return(TRUE); -} - /*********************************************************************** Sets types of fields binary in a tuple. */ UNIV_INLINE diff --git a/innobase/include/data0type.h b/innobase/include/data0type.h index 4817f0ca839..b53a70a8909 100644 --- a/innobase/include/data0type.h +++ b/innobase/include/data0type.h @@ -124,17 +124,6 @@ dtype_get_pad_char( /* out: padding character code, or ULINT_UNDEFINED if no padding specified */ dtype_t* type); /* in: typeumn */ -/************************************************************************* -Transforms the character code so that it is ordered appropriately -for the language. */ -UNIV_INLINE -ulint -dtype_collate( -/*==========*/ - /* out: padding character */ - dtype_t* type, /* in: type */ - ulint code); /* in: character code stored in database - record */ /*************************************************************************** Returns the size of a fixed size data type, 0 if not a fixed size type. */ UNIV_INLINE diff --git a/innobase/include/data0type.ic b/innobase/include/data0type.ic index f6bdaf69662..4a62902eb1b 100644 --- a/innobase/include/data0type.ic +++ b/innobase/include/data0type.ic @@ -120,23 +120,6 @@ dtype_get_pad_char( return(ULINT_UNDEFINED); } -/************************************************************************* -Transforms the character code so that it is ordered appropriately for the -language. */ -UNIV_INLINE -ulint -dtype_collate( -/*==========*/ - /* out: collation order position */ - dtype_t* type, /* in: type */ - ulint code) /* in: character code stored in database - record */ -{ - ut_ad((type->mtype == DATA_CHAR) || (type->mtype == DATA_VARCHAR)); - - return(toupper(code)); -} - /************************************************************************** Stores to a type the information which determines its alphabetical ordering. */ @@ -198,6 +181,10 @@ dtype_get_fixed_size( case DATA_SYS: if (type->prtype == DATA_ROW_ID) { return(DATA_ROW_ID_LEN); + } else if (type->prtype == DATA_TRX_ID) { + return(DATA_TRX_ID_LEN); + } else if (type->prtype == DATA_ROLL_PTR) { + return(DATA_ROLL_PTR_LEN); } else { return(0); } diff --git a/innobase/include/db0err.h b/innobase/include/db0err.h index 34513545faa..ddfbd5b7862 100644 --- a/innobase/include/db0err.h +++ b/innobase/include/db0err.h @@ -27,12 +27,21 @@ Created 5/24/1996 Heikki Tuuri #define DB_CLUSTER_NOT_FOUND 30 #define DB_TABLE_NOT_FOUND 31 #define DB_MUST_GET_MORE_FILE_SPACE 32 /* the database has to be stopped - and restrated with more file space */ + and restarted with more file space */ #define DB_TABLE_IS_BEING_USED 33 #define DB_TOO_BIG_RECORD 34 /* a record in an index would become bigger than 1/2 free space in a page frame */ - +#define DB_LOCK_WAIT_TIMEOUT 35 /* lock wait lasted too long */ +#define DB_NO_REFERENCED_ROW 36 /* referenced key value not found + for a foreign key in an insert or + update of a row */ +#define DB_ROW_IS_REFERENCED 37 /* cannot delete or update a row + because it contains a key value + which is referenced */ +#define DB_CANNOT_ADD_CONSTRAINT 38 /* adding a foreign key constraint + to a table failed */ + /* The following are partial failure codes */ #define DB_FAIL 1000 #define DB_OVERFLOW 1001 diff --git a/innobase/include/dict0crea.h b/innobase/include/dict0crea.h index 6bc31e1e722..ccdedff42c8 100644 --- a/innobase/include/dict0crea.h +++ b/innobase/include/dict0crea.h @@ -71,6 +71,24 @@ dict_drop_index_tree( rec_t* rec, /* in: record in the clustered index of SYS_INDEXES table */ mtr_t* mtr); /* in: mtr having the latch on the record page */ +/******************************************************************** +Creates the foreign key constraints system tables inside InnoDB +at database creation or database start if they are not found or are +not of the right form. */ + +ulint +dict_create_or_check_foreign_constraint_tables(void); +/*================================================*/ + /* out: DB_SUCCESS or error code */ +/************************************************************************ +Adds foreign key definitions to data dictionary tables in the database. */ + +ulint +dict_create_add_foreigns_to_dictionary( +/*===================================*/ + /* out: error code or DB_SUCCESS */ + dict_table_t* table, /* in: table */ + trx_t* trx); /* in: transaction */ /* Table create node structure */ diff --git a/innobase/include/dict0dict.h b/innobase/include/dict0dict.h index cec1430c9e9..56b55b8a417 100644 --- a/innobase/include/dict0dict.h +++ b/innobase/include/dict0dict.h @@ -138,6 +138,38 @@ dict_table_rename_in_cache( dict_table_t* table, /* in: table */ char* new_name); /* in: new name */ /************************************************************************** +Adds a foreign key constraint object to the dictionary cache. May free +the object if there already is an object with the same identifier in. +At least one of foreign table or referenced table must already be in +the dictionary cache! */ + +ulint +dict_foreign_add_to_cache( +/*======================*/ + /* out: DB_SUCCESS or error code */ + dict_foreign_t* foreign); /* in, own: foreign key constraint */ +/************************************************************************* +Scans a table create SQL string and adds to the data dictionary +the foreign key constraints declared in the string. This function +should be called after the indexes for a table have been created. +Each foreign key constraint must be accompanied with indexes in +bot participating tables. The indexes are allowed to contain more +fields than mentioned in the constraint. */ + +ulint +dict_create_foreign_constraints( +/*============================*/ + /* out: error code or DB_SUCCESS */ + trx_t* trx, /* in: transaction */ + char* sql_string, /* in: table create statement where + foreign keys are declared like: + FOREIGN KEY (a, b) REFERENCES table2(c, d), + table2 can be written also with the database + name before it: test.table2; the default + database id the database of parameter name */ + char* name); /* in: table full name in the normalized form + database_name/table_name */ +/************************************************************************** Returns a table object and memoryfixes it. NOTE! This is a high-level function to be used mainly from outside the 'dict' directory. Inside this directory dict_table_get_low is usually the appropriate function. */ @@ -174,6 +206,14 @@ dict_table_release( /*===============*/ dict_table_t* table); /* in: table to be released */ /************************************************************************** +Checks if a table is in the dictionary cache. */ +UNIV_INLINE +dict_table_t* +dict_table_check_if_in_cache_low( +/*==============================*/ + /* out: table, NULL if not found */ + char* table_name); /* in: table name */ +/************************************************************************** Gets a table; loads it to the dictionary cache if necessary. A low-level function. */ UNIV_INLINE @@ -208,6 +248,13 @@ dict_table_print( /*=============*/ dict_table_t* table); /* in: table */ /************************************************************************** +Prints a table data. */ + +void +dict_table_print_low( +/*=================*/ + dict_table_t* table); /* in: table */ +/************************************************************************** Prints a table data when we know the table name. */ void @@ -319,6 +366,16 @@ dict_table_copy_types( dtuple_t* tuple, /* in: data tuple */ dict_table_t* table); /* in: index */ /************************************************************************** +Looks for an index with the given id. NOTE that we do not reserve +the dictionary mutex: this function is for emergency purposes like +printing info of a corrupt database page! */ + +dict_index_t* +dict_index_find_on_id_low( +/*======================*/ + /* out: index or NULL if not found from cache */ + dulint id); /* in: index id */ +/************************************************************************** Adds an index to dictionary cache. */ ibool @@ -640,6 +697,23 @@ dict_tree_get_space_reserve( reserved for updates */ dict_tree_t* tree); /* in: a tree */ /************************************************************************* +Calculates the minimum record length in an index. */ + +ulint +dict_index_calc_min_rec_len( +/*========================*/ + dict_index_t* index); /* in: index */ +/************************************************************************* +Calculates new estimates for table and index statistics. The statistics +are used in query optimization. */ + +void +dict_update_statistics_low( +/*=======================*/ + dict_table_t* table, /* in: table */ + ibool has_dict_mutex);/* in: TRUE if the caller has the + dictionary mutex */ +/************************************************************************* Calculates new estimates for table and index statistics. The statistics are used in query optimization. */ @@ -661,7 +735,8 @@ dict_mutex_exit_for_mysql(void); /*===========================*/ -extern dict_sys_t* dict_sys; /* the dictionary system */ +extern dict_sys_t* dict_sys; /* the dictionary system */ +extern rw_lock_t dict_foreign_key_check_lock; /* Dictionary system struct */ struct dict_sys_struct{ diff --git a/innobase/include/dict0dict.ic b/innobase/include/dict0dict.ic index 9089ebe8edd..821465f96a8 100644 --- a/innobase/include/dict0dict.ic +++ b/innobase/include/dict0dict.ic @@ -532,12 +532,11 @@ dict_tree_get_space_reserve( } /************************************************************************** -Gets a table; loads it to the dictionary cache if necessary. A low-level -function. */ +Checks if a table is in the dictionary cache. */ UNIV_INLINE dict_table_t* -dict_table_get_low( -/*===============*/ +dict_table_check_if_in_cache_low( +/*==============================*/ /* out: table, NULL if not found */ char* table_name) /* in: table name */ { @@ -552,6 +551,26 @@ dict_table_get_low( HASH_SEARCH(name_hash, dict_sys->table_hash, table_fold, table, ut_strcmp(table->name, table_name) == 0); + return(table); +} + +/************************************************************************** +Gets a table; loads it to the dictionary cache if necessary. A low-level +function. */ +UNIV_INLINE +dict_table_t* +dict_table_get_low( +/*===============*/ + /* out: table, NULL if not found */ + char* table_name) /* in: table name */ +{ + dict_table_t* table; + + ut_ad(table_name); + ut_ad(mutex_own(&(dict_sys->mutex))); + + table = dict_table_check_if_in_cache_low(table_name); + if (table == NULL) { table = dict_load_table(table_name); } @@ -603,6 +622,7 @@ dict_table_get_on_id_low( dict_table_t* table; ulint fold; + ut_ad(mutex_own(&(dict_sys->mutex))); UT_NOT_USED(trx); /* Look for the table name in the hash table */ diff --git a/innobase/include/dict0load.h b/innobase/include/dict0load.h index d0298d8df37..b60996a8dab 100644 --- a/innobase/include/dict0load.h +++ b/innobase/include/dict0load.h @@ -15,8 +15,19 @@ Created 4/24/1996 Heikki Tuuri #include "ut0byte.h" /************************************************************************ +Finds the first table name in the given database. */ + +char* +dict_get_first_table_name_in_db( +/*============================*/ + /* out, own: table name, NULL if does not exist; + the caller must free the memory in the string! */ + char* name); /* in: database name which ends to '/' */ +/************************************************************************ Loads a table definition and also all its index definitions, and also -the cluster definition, if the table is a member in a cluster. */ +the cluster definition if the table is a member in a cluster. Also loads +all foreign key constraints where the foreign key is in the table or where +a foreign key references columns in this table. */ dict_table_t* dict_load_table( @@ -40,6 +51,25 @@ void dict_load_sys_table( /*================*/ dict_table_t* table); /* in: system table */ +/*************************************************************************** +Loads foreign key constraints where the table is either the foreign key +holder or where the table is referenced by a foreign key. Adds these +constraints to the data dictionary. Note that we know that the dictionary +cache already contains all constraints where the other relevant table is +already in the dictionary cache. */ + +ulint +dict_load_foreigns( +/*===============*/ + /* out: DB_SUCCESS or error code */ + char* table_name); /* in: table name */ +/************************************************************************ +Prints to the standard output information on all tables found in the data +dictionary system table. */ + +void +dict_print(void); +/*============*/ #ifndef UNIV_NONINL diff --git a/innobase/include/dict0mem.h b/innobase/include/dict0mem.h index 74ecbc8bba2..bd24d1539ca 100644 --- a/innobase/include/dict0mem.h +++ b/innobase/include/dict0mem.h @@ -123,6 +123,13 @@ dict_mem_index_free( /*================*/ dict_index_t* index); /* in: index */ /************************************************************************** +Creates and initializes a foreign constraint memory object. */ + +dict_foreign_t* +dict_mem_foreign_create(void); +/*=========================*/ + /* out, own: foreign constraint struct */ +/************************************************************************** Creates a procedure memory object. */ dict_proc_t* @@ -221,15 +228,56 @@ struct dict_index_struct{ dictionary cache */ btr_search_t* search_info; /* info used in optimistic searches */ /*----------------------*/ - ulint stat_n_diff_key_vals; + ib_longlong* stat_n_diff_key_vals; /* approximate number of different key values - for this index; we periodically calculate - new estimates */ + for this index, for each n-column prefix + where n <= dict_get_n_unique(index); we + periodically calculate new estimates */ ulint stat_index_size; /* approximate index size in database pages */ + ulint stat_n_leaf_pages; + /* approximate number of leaf pages in the + index tree */ ulint magic_n;/* magic number */ }; +/* Data structure for a foreign key constraint; an example: +FOREIGN KEY (A, B) REFERENCES TABLE2 (C, D) */ + +struct dict_foreign_struct{ + mem_heap_t* heap; /* this object is allocated from + this memory heap */ + char* id; /* id of the constraint as a + null-terminated string */ + char* foreign_table_name;/* foreign table name */ + dict_table_t* foreign_table; /* table where the foreign key is */ + char** foreign_col_names;/* names of the columns in the + foreign key */ + char* referenced_table_name;/* referenced table name */ + dict_table_t* referenced_table;/* table where the referenced key + is */ + char** referenced_col_names;/* names of the referenced + columns in the referenced table */ + ulint n_fields; /* number of indexes' first fields + for which the the foreign key + constraint is defined: we allow the + indexes to contain more fields than + mentioned in the constraint, as long + as the first fields are as mentioned */ + dict_index_t* foreign_index; /* foreign index; we require that + both tables contain explicitly defined + indexes for the constraint: InnoDB + does not generate new indexes + implicitly */ + dict_index_t* referenced_index;/* referenced index */ + UT_LIST_NODE_T(dict_foreign_t) + foreign_list; /* list node for foreign keys of the + table */ + UT_LIST_NODE_T(dict_foreign_t) + referenced_list;/* list node for referenced keys of the + table */ +}; + #define DICT_INDEX_MAGIC_N 76789786 /* Data structure for a database table */ @@ -247,6 +295,13 @@ struct dict_table_struct{ dict_col_t* cols; /* array of column descriptions */ UT_LIST_BASE_NODE_T(dict_index_t) indexes; /* list of indexes of the table */ + UT_LIST_BASE_NODE_T(dict_foreign_t) + foreign_list;/* list of foreign key constraints + in the table; these refer to columns + in other tables */ + UT_LIST_BASE_NODE_T(dict_foreign_t) + referenced_list;/* list of foreign key constraints + which refer to this table */ UT_LIST_NODE_T(dict_table_t) table_LRU; /* node of the LRU list of tables */ ulint mem_fix;/* count of how many times the table @@ -254,6 +309,13 @@ struct dict_table_struct{ currently NOT used */ ibool cached; /* TRUE if the table object has been added to the dictionary cache */ + lock_t* auto_inc_lock;/* a buffer for an auto-inc lock + for this table: we allocate the memory here + so that individual transactions can get it + and release it without a need to allocate + space from the lock heap of the trx: + otherwise the lock heap would grow rapidly + if we do a large insert from a select */ UT_LIST_BASE_NODE_T(lock_t) locks; /* list of locks on the table */ /*----------------------*/ @@ -278,7 +340,7 @@ struct dict_table_struct{ forget about value TRUE if it has to reload the table definition from disk */ /*----------------------*/ - ulint stat_n_rows; + ib_longlong stat_n_rows; /* approximate number of rows in the table; we periodically calculate new estimates */ ulint stat_clustered_index_size; diff --git a/innobase/include/dict0types.h b/innobase/include/dict0types.h index fe1bad45063..498c6f46b7b 100644 --- a/innobase/include/dict0types.h +++ b/innobase/include/dict0types.h @@ -16,6 +16,7 @@ typedef struct dict_index_struct dict_index_t; typedef struct dict_tree_struct dict_tree_t; typedef struct dict_table_struct dict_table_t; typedef struct dict_proc_struct dict_proc_t; +typedef struct dict_foreign_struct dict_foreign_t; /* A cluster object is a table object with the type field set to DICT_CLUSTERED */ diff --git a/innobase/include/fil0fil.h b/innobase/include/fil0fil.h index bfc322270fc..ca74ea4cb2c 100644 --- a/innobase/include/fil0fil.h +++ b/innobase/include/fil0fil.h @@ -76,6 +76,9 @@ extern fil_addr_t fil_addr_null; #define FIL_TABLESPACE 501 #define FIL_LOG 502 +extern ulint fil_n_pending_log_flushes; +extern ulint fil_n_pending_tablespace_flushes; + /*********************************************************************** Reserves a right to open a single file. The right must be released with fil_release_right_to_open. */ diff --git a/innobase/include/ibuf0ibuf.h b/innobase/include/ibuf0ibuf.h index f0b333192de..99fb1595f49 100644 --- a/innobase/include/ibuf0ibuf.h +++ b/innobase/include/ibuf0ibuf.h @@ -226,6 +226,21 @@ ibuf_contract( issued read with the highest tablespace address to complete */ /************************************************************************* +Contracts insert buffer trees by reading pages to the buffer pool. */ + +ulint +ibuf_contract_for_n_pages( +/*======================*/ + /* out: a lower limit for the combined size in bytes + of entries which will be merged from ibuf trees to the + pages read, 0 if ibuf is empty */ + ibool sync, /* in: TRUE if the caller wants to wait for the + issued read with the highest tablespace address + to complete */ + ulint n_pages);/* in: try to read at least this many pages to + the buffer pool and merge the ibuf contents to + them */ +/************************************************************************* Parses a redo log record of an ibuf bitmap page init. */ byte* diff --git a/innobase/include/lock0lock.h b/innobase/include/lock0lock.h index c492e7b8ef3..5a15b78b869 100644 --- a/innobase/include/lock0lock.h +++ b/innobase/include/lock0lock.h @@ -21,15 +21,13 @@ Created 5/7/1996 Heikki Tuuri extern ibool lock_print_waits; -/***************************************************************** -Cancels a waiting record lock request and releases the waiting transaction -that requested it. NOTE: does NOT check if waiting lock requests behind this -one can now be granted! */ +/************************************************************************* +Gets the size of a lock struct. */ -void -lock_rec_cancel( -/*============*/ - lock_t* lock); /* in: waiting record lock request */ +ulint +lock_get_size(void); +/*===============*/ + /* out: size in bytes */ /************************************************************************* Creates the lock system at database start. */ @@ -388,6 +386,14 @@ lock_is_on_table( /* out: TRUE if there are lock(s) */ dict_table_t* table); /* in: database table in dictionary cache */ /************************************************************************* +Releases an auto-inc lock a transaction possibly has on a table. +Releases possible other transactions waiting for this lock. */ + +void +lock_table_unlock_auto_inc( +/*=======================*/ + trx_t* trx); /* in: transaction */ +/************************************************************************* Releases transaction locks, and releases possible other transactions waiting because of these locks. */ @@ -396,6 +402,14 @@ lock_release_off_kernel( /*====================*/ trx_t* trx); /* in: transaction */ /************************************************************************* +Cancels a waiting lock request and releases possible other transactions +waiting behind it. */ + +void +lock_cancel_waiting_and_release( +/*============================*/ + lock_t* lock); /* in: waiting lock request */ +/************************************************************************* Resets all locks, both table and record locks, on a table to be dropped. No lock is allowed to be a wait lock. */ @@ -495,6 +509,8 @@ extern lock_sys_t* lock_sys; #define LOCK_IX 3 /* intention exclusive */ #define LOCK_S 4 /* shared */ #define LOCK_X 5 /* exclusive */ +#define LOCK_AUTO_INC 6 /* locks the auto-inc counter of a table + in an exclusive mode */ #define LOCK_MODE_MASK 0xF /* mask used to extract mode from the type_mode field in a lock */ #define LOCK_TABLE 16 /* these type values should be so high that */ diff --git a/innobase/include/log0log.h b/innobase/include/log0log.h index 001f98cfc3c..adff9fae544 100644 --- a/innobase/include/log0log.h +++ b/innobase/include/log0log.h @@ -659,6 +659,11 @@ struct log_struct{ mutex! */ ulint n_log_ios; /* number of log i/os initiated thus far */ + ulint n_log_ios_old; /* number of log i/o's at the + previous printout */ + time_t last_printout_time;/* when log_print was last time + called */ + /* Fields involved in checkpoints */ ulint max_modified_age_async; /* when this recommended value for lsn diff --git a/innobase/include/mtr0mtr.h b/innobase/include/mtr0mtr.h index dec8eeb1e15..0ef25b3d1ee 100644 --- a/innobase/include/mtr0mtr.h +++ b/innobase/include/mtr0mtr.h @@ -203,20 +203,12 @@ mtr_read_dulint( mtr_t* mtr); /* in: mini-transaction handle */ /************************************************************************* This macro locks an rw-lock in s-mode. */ -#ifdef UNIV_SYNC_DEBUG #define mtr_s_lock(B, MTR) mtr_s_lock_func((B), IB__FILE__, __LINE__,\ (MTR)) -#else -#define mtr_s_lock(B, MTR) mtr_s_lock_func((B), (MTR)) -#endif /************************************************************************* This macro locks an rw-lock in x-mode. */ -#ifdef UNIV_SYNC_DEBUG #define mtr_x_lock(B, MTR) mtr_x_lock_func((B), IB__FILE__, __LINE__,\ (MTR)) -#else -#define mtr_x_lock(B, MTR) mtr_x_lock_func((B), (MTR)) -#endif /************************************************************************* NOTE! Use the macro above! Locks a lock in s-mode. */ @@ -225,10 +217,8 @@ void mtr_s_lock_func( /*============*/ rw_lock_t* lock, /* in: rw-lock */ -#ifdef UNIV_SYNC_DEBUG char* file, /* in: file name */ ulint line, /* in: line number */ -#endif mtr_t* mtr); /* in: mtr */ /************************************************************************* NOTE! Use the macro above! @@ -238,10 +228,8 @@ void mtr_x_lock_func( /*============*/ rw_lock_t* lock, /* in: rw-lock */ -#ifdef UNIV_SYNC_DEBUG char* file, /* in: file name */ ulint line, /* in: line number */ -#endif mtr_t* mtr); /* in: mtr */ /******************************************************* diff --git a/innobase/include/mtr0mtr.ic b/innobase/include/mtr0mtr.ic index 5718d872bcb..51112fc0d14 100644 --- a/innobase/include/mtr0mtr.ic +++ b/innobase/include/mtr0mtr.ic @@ -217,20 +217,14 @@ void mtr_s_lock_func( /*============*/ rw_lock_t* lock, /* in: rw-lock */ -#ifdef UNIV_SYNC_DEBUG char* file, /* in: file name */ ulint line, /* in: line number */ -#endif mtr_t* mtr) /* in: mtr */ { ut_ad(mtr); ut_ad(lock); - rw_lock_s_lock_func(lock - #ifdef UNIV_SYNC_DEBUG - ,0, file, line - #endif - ); + rw_lock_s_lock_func(lock, 0, file, line); mtr_memo_push(mtr, lock, MTR_MEMO_S_LOCK); } @@ -242,20 +236,14 @@ void mtr_x_lock_func( /*============*/ rw_lock_t* lock, /* in: rw-lock */ -#ifdef UNIV_SYNC_DEBUG char* file, /* in: file name */ ulint line, /* in: line number */ -#endif mtr_t* mtr) /* in: mtr */ { ut_ad(mtr); ut_ad(lock); - rw_lock_x_lock_func(lock, 0 - #ifdef UNIV_SYNC_DEBUG - , file, line - #endif - ); + rw_lock_x_lock_func(lock, 0, file, line); mtr_memo_push(mtr, lock, MTR_MEMO_X_LOCK); } diff --git a/innobase/include/rem0cmp.h b/innobase/include/rem0cmp.h index 10c428cb9ca..6f2a99fc8c2 100644 --- a/innobase/include/rem0cmp.h +++ b/innobase/include/rem0cmp.h @@ -16,6 +16,32 @@ Created 7/1/1994 Heikki Tuuri #include "rem0rec.h" /***************************************************************** +Returns TRUE if two types are equal for comparison purposes. */ + +ibool +cmp_types_are_equal( +/*================*/ + /* out: TRUE if the types are considered + equal in comparisons */ + dtype_t* type1, /* in: type 1 */ + dtype_t* type2); /* in: type 2 */ +/***************************************************************** +This function is used to compare two data fields for which we know the +data type. */ +UNIV_INLINE +int +cmp_data_data( +/*==========*/ + /* out: 1, 0, -1, if data1 is greater, equal, + less than data2, respectively */ + dtype_t* cur_type,/* in: data type of the fields */ + byte* data1, /* in: data field (== a pointer to a memory + buffer) */ + ulint len1, /* in: data field length or UNIV_SQL_NULL */ + byte* data2, /* in: data field (== a pointer to a memory + buffer) */ + ulint len2); /* in: data field length or UNIV_SQL_NULL */ +/***************************************************************** This function is used to compare two dfields where at least the first has its data type field set. */ UNIV_INLINE diff --git a/innobase/include/row0ins.h b/innobase/include/row0ins.h index 612b9e8d73a..cc3b9fa7e9a 100644 --- a/innobase/include/row0ins.h +++ b/innobase/include/row0ins.h @@ -16,6 +16,28 @@ Created 4/20/1996 Heikki Tuuri #include "trx0types.h" #include "row0types.h" +/******************************************************************* +Checks if foreign key constraint fails for an index entry. Sets shared locks +which lock either the success or the failure of the constraint. NOTE that +the caller must have a shared latch on dict_foreign_key_check_lock. */ + +ulint +row_ins_check_foreign_constraint( +/*=============================*/ + /* out: DB_SUCCESS, DB_LOCK_WAIT, + DB_NO_REFERENCED_ROW, + or DB_ROW_IS_REFERENCED */ + ibool check_ref,/* in: TRUE If we want to check that + the referenced table is ok, FALSE if we + want to to check the foreign key table */ + dict_foreign_t* foreign,/* in: foreign constraint; NOTE that the + tables mentioned in it must be in the + dictionary cache if they exist at all */ + dict_table_t* table, /* in: if check_ref is TRUE, then the foreign + table, else the referenced table */ + dict_index_t* index, /* in: index in table */ + dtuple_t* entry, /* in: index entry for index */ + que_thr_t* thr); /* in: query thread */ /************************************************************************* Creates an insert node struct. */ diff --git a/innobase/include/row0mysql.h b/innobase/include/row0mysql.h index 31f9e15cddc..4e90c0ac590 100644 --- a/innobase/include/row0mysql.h +++ b/innobase/include/row0mysql.h @@ -133,6 +133,26 @@ row_update_prebuilt_trx( handle */ trx_t* trx); /* in: transaction handle */ /************************************************************************* +Unlocks an AUTO_INC type lock possibly reserved by trx. */ + +void +row_unlock_table_autoinc_for_mysql( +/*===============================*/ + trx_t* trx); /* in: transaction */ +/************************************************************************* +Sets an AUTO_INC type lock on the table mentioned in prebuilt. The +AUTO_INC lock gives exclusive access to the auto-inc counter of the +table. The lock is reserved only for the duration of an SQL statement. +It is not compatible with another AUTO_INC or exclusive lock on the +table. */ + +int +row_lock_table_autoinc_for_mysql( +/*=============================*/ + /* out: error code or DB_SUCCESS */ + row_prebuilt_t* prebuilt); /* in: prebuilt struct in the MySQL + table handle */ +/************************************************************************* Does an insert for MySQL. */ int @@ -211,6 +231,26 @@ row_create_index_for_mysql( dict_index_t* index, /* in: index defintion */ trx_t* trx); /* in: transaction handle */ /************************************************************************* +Scans a table create SQL string and adds to the data dictionary +the foreign key constraints declared in the string. This function +should be called after the indexes for a table have been created. +Each foreign key constraint must be accompanied with indexes in +bot participating tables. The indexes are allowed to contain more +fields than mentioned in the constraint. */ + +int +row_table_add_foreign_constraints( +/*==============================*/ + /* out: error code or DB_SUCCESS */ + trx_t* trx, /* in: transaction */ + char* sql_string, /* in: table create statement where + foreign keys are declared like: + FOREIGN KEY (a, b) REFERENCES table2(c, d), + table2 can be written also with the database + name before it: test.table2 */ + char* name); /* in: table full name in the normalized form + database_name/table_name */ +/************************************************************************* Drops a table for MySQL. If the name of the dropped table ends to characters INNODB_MONITOR, then this also stops printing of monitor output by the master thread. */ @@ -224,6 +264,15 @@ row_drop_table_for_mysql( ibool has_dict_mutex);/* in: TRUE if the caller already owns the dictionary system mutex */ /************************************************************************* +Drops a database for MySQL. */ + +int +row_drop_database_for_mysql( +/*========================*/ + /* out: error code or DB_SUCCESS */ + char* name, /* in: database name which ends to '/' */ + trx_t* trx); /* in: transaction handle */ +/************************************************************************* Renames a table for MySQL. */ int diff --git a/innobase/include/row0upd.h b/innobase/include/row0upd.h index 9bb73726b29..106d3866b25 100644 --- a/innobase/include/row0upd.h +++ b/innobase/include/row0upd.h @@ -47,8 +47,7 @@ upd_get_nth_field( upd_t* update, /* in: update vector */ ulint n); /* in: field position in update vector */ /************************************************************************* -Sets the clustered index field number to be updated by an update vector -field. */ +Sets an index field number to be updated by an update vector field. */ UNIV_INLINE void upd_field_set_field_no( @@ -56,7 +55,7 @@ upd_field_set_field_no( upd_field_t* upd_field, /* in: update vector field */ ulint field_no, /* in: field number in a clustered index */ - dict_index_t* index); /* in: clustered index */ + dict_index_t* index); /* in: index */ /************************************************************************* Writes into the redo log the values of trx id and roll ptr and enough info to determine their positions within a clustered index record. */ @@ -136,13 +135,27 @@ row_upd_rec_in_place( rec_t* rec, /* in/out: record where replaced */ upd_t* update);/* in: update vector */ /******************************************************************* +Builds an update vector from those fields which in a secondary index entry +differ from a record that has the equal ordering fields. NOTE: we compare +the fields as binary strings! */ + +upd_t* +row_upd_build_sec_rec_difference_binary( +/*====================================*/ + /* out, own: update vector of differing + fields */ + dict_index_t* index, /* in: index */ + dtuple_t* entry, /* in: entry to insert */ + rec_t* rec, /* in: secondary index record */ + mem_heap_t* heap); /* in: memory heap from which allocated */ +/******************************************************************* Builds an update vector from those fields, excluding the roll ptr and trx id fields, which in an index entry differ from a record that has -the equal ordering fields. */ +the equal ordering fields. NOTE: we compare the fields as binary strings! */ upd_t* -row_upd_build_difference( -/*=====================*/ +row_upd_build_difference_binary( +/*============================*/ /* out, own: update vector of differing fields, excluding roll ptr and trx id */ dict_index_t* index, /* in: clustered index */ @@ -175,13 +188,16 @@ row_upd_clust_index_replace_new_col_vals( /*************************************************************** Checks if an update vector changes an ordering field of an index record. This function is fast if the update vector is short or the number of ordering -fields in the index is small. Otherwise, this can be quadratic. */ - +fields in the index is small. Otherwise, this can be quadratic. +NOTE: we compare the fields as binary strings! */ + ibool -row_upd_changes_ord_field( -/*======================*/ +row_upd_changes_ord_field_binary( +/*=============================*/ /* out: TRUE if update vector changes - an ordering field in the index record */ + an ordering field in the index record; + NOTE: the fields are compared as binary + strings */ dtuple_t* row, /* in: old value of row, or NULL if the row and the data values in update are not known when this function is called, e.g., at @@ -191,11 +207,12 @@ row_upd_changes_ord_field( /*************************************************************** Checks if an update vector changes an ordering field of an index record. This function is fast if the update vector is short or the number of ordering -fields in the index is small. Otherwise, this can be quadratic. */ +fields in the index is small. Otherwise, this can be quadratic. +NOTE: we compare the fields as binary strings! */ ibool -row_upd_changes_some_index_ord_field( -/*=================================*/ +row_upd_changes_some_index_ord_field_binary( +/*========================================*/ /* out: TRUE if update vector may change an ordering field in an index record */ dict_table_t* table, /* in: table */ diff --git a/innobase/include/row0upd.ic b/innobase/include/row0upd.ic index b785e52caa0..7015b2eda13 100644 --- a/innobase/include/row0upd.ic +++ b/innobase/include/row0upd.ic @@ -70,8 +70,7 @@ upd_get_nth_field( } /************************************************************************* -Sets the clustered index field number to be updated by an update vector -field. */ +Sets an index field number to be updated by an update vector field. */ UNIV_INLINE void upd_field_set_field_no( @@ -79,12 +78,18 @@ upd_field_set_field_no( upd_field_t* upd_field, /* in: update vector field */ ulint field_no, /* in: field number in a clustered index */ - dict_index_t* index) /* in: clustered index */ -{ - ut_ad(index->type & DICT_CLUSTERED); - + dict_index_t* index) /* in: index */ +{ upd_field->field_no = field_no; + if (field_no >= dict_index_get_n_fields(index)) { + fprintf(stderr, + "InnoDB: Error: trying to access field %lu in table %s\n" + "InnoDB: index %s, but index has only %lu fields\n", + field_no, index->table_name, index->name, + dict_index_get_n_fields(index)); + } + dtype_copy(dfield_get_type(&(upd_field->new_val)), dict_index_get_nth_type(index, field_no)); } diff --git a/innobase/include/srv0srv.h b/innobase/include/srv0srv.h index b77d7d5436a..ca89de4b9a8 100644 --- a/innobase/include/srv0srv.h +++ b/innobase/include/srv0srv.h @@ -16,6 +16,11 @@ Created 10/10/1995 Heikki Tuuri #include "com0com.h" #include "que0types.h" + +/* When this event is set the lock timeout and InnoDB monitor +thread starts running */ +extern os_event_t srv_lock_timeout_thread_event; + /* Server parameters which are read from the initfile */ extern char* srv_data_home; @@ -27,6 +32,8 @@ extern char** srv_data_file_names; extern ulint* srv_data_file_sizes; extern ulint* srv_data_file_is_raw_partition; +extern ibool srv_created_new_raw; + #define SRV_NEW_RAW 1 #define SRV_OLD_RAW 2 @@ -39,6 +46,8 @@ extern ibool srv_log_archive_on; extern ulint srv_log_buffer_size; extern ibool srv_flush_log_at_trx_commit; +extern byte srv_latin1_ordering[256];/* The sort order table of the latin1 + character set */ extern ibool srv_use_native_aio; extern ulint srv_pool_size; @@ -54,6 +63,7 @@ extern ulint srv_lock_wait_timeout; extern char* srv_unix_file_flush_method_str; extern ulint srv_unix_file_flush_method; +extern ulint srv_force_recovery; extern ibool srv_use_doublewrite_buf; @@ -71,6 +81,7 @@ extern ibool srv_print_innodb_monitor; extern ibool srv_print_innodb_lock_monitor; extern ibool srv_print_innodb_tablespace_monitor; extern ibool srv_print_verbose_log; +extern ibool srv_print_innodb_table_monitor; extern ulint srv_n_spin_wait_rounds; extern ulint srv_spin_wait_delay; @@ -133,6 +144,25 @@ what these mean */ #define SRV_UNIX_LITTLESYNC 3 #define SRV_UNIX_NOSYNC 4 +/* Alternatives for srv_force_recovery. Non-zero values are intended +to help the user get a damaged database up so that he can dump intact +tables and rows with SELECT INTO OUTFILE. The database must not otherwise +be used with these options! A bigger number below means that all precautions +of lower numbers are included. */ + +#define SRV_FORCE_IGNORE_CORRUPT 1 /* let the server run even if it + detects a corrupt page */ +#define SRV_FORCE_NO_BACKGROUND 2 /* prevent the main thread from + running: if a crash would occur + in purge, this prevents it */ +#define SRV_FORCE_NO_TRX_UNDO 3 /* do not run trx rollback after + recovery */ +#define SRV_FORCE_NO_IBUF_MERGE 4 /* prevent also ibuf operations: + if they would cause a crash, better + not do them */ +#define SRV_FORCE_NO_LOG_REDO 5 /* do not do the log roll-forward + in connection with recovery */ + /************************************************************************* Boots Innobase server. */ @@ -225,15 +255,30 @@ srv_release_mysql_thread_if_suspended( que_thr_t* thr); /* in: query thread associated with the MySQL OS thread */ /************************************************************************* -A thread which wakes up threads whose lock wait may have lasted too long. */ +A thread which wakes up threads whose lock wait may have lasted too long. +This also prints the info output by various InnoDB monitors. */ + +#ifndef __WIN__ +void* +#else +ulint +#endif +srv_lock_timeout_and_monitor_thread( +/*================================*/ + /* out: a dummy parameter */ + void* arg); /* in: a dummy parameter required by + os_thread_create */ +/************************************************************************* +A thread which prints warnings about semaphore waits which have lasted +too long. These can be used to track bugs which cause hangs. */ #ifndef __WIN__ void* #else ulint #endif -srv_lock_timeout_monitor_thread( -/*============================*/ +srv_error_monitor_thread( +/*=====================*/ /* out: a dummy parameter */ void* arg); /* in: a dummy parameter required by os_thread_create */ diff --git a/innobase/include/sync0arr.h b/innobase/include/sync0arr.h index 75d79f4c93f..f0134894997 100644 --- a/innobase/include/sync0arr.h +++ b/innobase/include/sync0arr.h @@ -51,13 +51,9 @@ sync_array_reserve_cell( sync_array_t* arr, /* in: wait array */ void* object, /* in: pointer to the object to wait for */ ulint type, /* in: lock request type */ - #ifdef UNIV_SYNC_DEBUG - char* file, /* in: in debug version file where - requested */ - ulint line, /* in: in the debug version line where - requested */ - #endif - ulint* index); /* out: index of the reserved cell */ + char* file, /* in: file where requested */ + ulint line, /* in: line where requested */ + ulint* index); /* out: index of the reserved cell */ /********************************************************************** This function should be called when a thread starts to wait on a wait array cell. In the debug version this function checks @@ -90,6 +86,20 @@ sync_array_signal_object( /*=====================*/ sync_array_t* arr, /* in: wait array */ void* object);/* in: wait object */ +/************************************************************************** +If the wakeup algorithm does not work perfectly at semaphore relases, +this function will do the waking (see the comment in mutex_exit). This +function should be called about every 1 second in the server. */ + +void +sync_arr_wake_threads_if_sema_free(void); +/*====================================*/ +/************************************************************************** +Prints warnings of long semaphore waits to stderr. Currently > 120 sec. */ + +void +sync_array_print_long_waits(void); +/*=============================*/ /************************************************************************ Validates the integrity of the wait array. Checks that the number of reserved cells equals the count variable. */ diff --git a/innobase/include/sync0ipm.ic b/innobase/include/sync0ipm.ic index 8487830e1dd..b8aa87ba6d6 100644 --- a/innobase/include/sync0ipm.ic +++ b/innobase/include/sync0ipm.ic @@ -92,7 +92,7 @@ loop: loop_count++; ut_ad(loop_count < 15); - if (mutex_enter_nowait(mutex) == 0) { + if (mutex_enter_nowait(mutex, IB__FILE__, __LINE__) == 0) { /* Succeeded! */ return(0); @@ -105,7 +105,7 @@ loop: /* Order is important here: FIRST reset event, then set waiters */ ip_mutex_set_waiters(ip_mutex, 1); - if (mutex_enter_nowait(mutex) == 0) { + if (mutex_enter_nowait(mutex, IB__FILE__, __LINE__) == 0) { /* Succeeded! */ return(0); diff --git a/innobase/include/sync0rw.h b/innobase/include/sync0rw.h index 4e055da3810..7ad38f5bc7f 100644 --- a/innobase/include/sync0rw.h +++ b/innobase/include/sync0rw.h @@ -46,9 +46,10 @@ extern ibool rw_lock_debug_waiters; /* This is set to TRUE, if extern ulint rw_s_system_call_count; extern ulint rw_s_spin_wait_count; extern ulint rw_s_exit_count; - +extern ulint rw_s_os_wait_count; extern ulint rw_x_system_call_count; extern ulint rw_x_spin_wait_count; +extern ulint rw_x_os_wait_count; extern ulint rw_x_exit_count; /********************************************************************** @@ -92,32 +93,20 @@ rw_lock_validate( NOTE! The following macros should be used in rw s-locking, not the corresponding function. */ -#ifdef UNIV_SYNC_DEBUG #define rw_lock_s_lock(M) rw_lock_s_lock_func(\ (M), 0, IB__FILE__, __LINE__) -#else -#define rw_lock_s_lock(M) rw_lock_s_lock_func(M) -#endif /****************************************************************** NOTE! The following macros should be used in rw s-locking, not the corresponding function. */ -#ifdef UNIV_SYNC_DEBUG #define rw_lock_s_lock_gen(M, P) rw_lock_s_lock_func(\ (M), (P), IB__FILE__, __LINE__) -#else -#define rw_lock_s_lock_gen(M, P) rw_lock_s_lock_func(M) -#endif /****************************************************************** NOTE! The following macros should be used in rw s-locking, not the corresponding function. */ -#ifdef UNIV_SYNC_DEBUG #define rw_lock_s_lock_nowait(M) rw_lock_s_lock_func_nowait(\ (M), IB__FILE__, __LINE__) -#else -#define rw_lock_s_lock_nowait(M) rw_lock_s_lock_func_nowait(M) -#endif /********************************************************************** NOTE! Use the corresponding macro, not directly this function, except if you supply the file name and line number. Lock an rw-lock in shared mode @@ -129,14 +118,11 @@ UNIV_INLINE void rw_lock_s_lock_func( /*================*/ - rw_lock_t* lock /* in: pointer to rw-lock */ - #ifdef UNIV_SYNC_DEBUG - ,ulint pass, /* in: pass value; != 0, if the lock will + rw_lock_t* lock, /* in: pointer to rw-lock */ + ulint pass, /* in: pass value; != 0, if the lock will be passed to another thread to unlock */ - char* file_name, /* in: file name where lock requested */ - ulint line /* in: line where requested */ - #endif -); + char* file_name,/* in: file name where lock requested */ + ulint line); /* in: line where requested */ /********************************************************************** NOTE! Use the corresponding macro, not directly this function, except if you supply the file name and line number. Lock an rw-lock in shared mode @@ -146,12 +132,9 @@ ibool rw_lock_s_lock_func_nowait( /*=======================*/ /* out: TRUE if success */ - rw_lock_t* lock /* in: pointer to rw-lock */ - #ifdef UNIV_SYNC_DEBUG - ,char* file_name, /* in: file name where lock requested */ - ulint line /* in: line where requested */ - #endif -); + rw_lock_t* lock, /* in: pointer to rw-lock */ + char* file_name,/* in: file name where lock requested */ + ulint line); /* in: line where requested */ /********************************************************************** NOTE! Use the corresponding macro, not directly this function! Lock an rw-lock in exclusive mode for the current thread if the lock can be @@ -161,12 +144,9 @@ ibool rw_lock_x_lock_func_nowait( /*=======================*/ /* out: TRUE if success */ - rw_lock_t* lock /* in: pointer to rw-lock */ - #ifdef UNIV_SYNC_DEBUG - ,char* file_name, /* in: file name where lock requested */ - ulint line /* in: line where requested */ - #endif -); + rw_lock_t* lock, /* in: pointer to rw-lock */ + char* file_name,/* in: file name where lock requested */ + ulint line); /* in: line where requested */ /********************************************************************** Releases a shared mode lock. */ UNIV_INLINE @@ -199,32 +179,20 @@ Releases a shared mode lock. */ NOTE! The following macro should be used in rw x-locking, not the corresponding function. */ -#ifdef UNIV_SYNC_DEBUG #define rw_lock_x_lock(M) rw_lock_x_lock_func(\ (M), 0, IB__FILE__, __LINE__) -#else -#define rw_lock_x_lock(M) rw_lock_x_lock_func(M, 0) -#endif /****************************************************************** NOTE! The following macro should be used in rw x-locking, not the corresponding function. */ -#ifdef UNIV_SYNC_DEBUG #define rw_lock_x_lock_gen(M, P) rw_lock_x_lock_func(\ (M), (P), IB__FILE__, __LINE__) -#else -#define rw_lock_x_lock_gen(M, P) rw_lock_x_lock_func(M, P) -#endif /****************************************************************** NOTE! The following macros should be used in rw x-locking, not the corresponding function. */ -#ifdef UNIV_SYNC_DEBUG #define rw_lock_x_lock_nowait(M) rw_lock_x_lock_func_nowait(\ (M), IB__FILE__, __LINE__) -#else -#define rw_lock_x_lock_nowait(M) rw_lock_x_lock_func_nowait(M) -#endif /********************************************************************** NOTE! Use the corresponding macro, not directly this function! Lock an rw-lock in exclusive mode for the current thread. If the rw-lock is locked @@ -239,13 +207,10 @@ void rw_lock_x_lock_func( /*================*/ rw_lock_t* lock, /* in: pointer to rw-lock */ - ulint pass /* in: pass value; != 0, if the lock will + ulint pass, /* in: pass value; != 0, if the lock will be passed to another thread to unlock */ - #ifdef UNIV_SYNC_DEBUG - ,char* file_name, /* in: file name where lock requested */ - ulint line /* in: line where requested */ - #endif -); + char* file_name,/* in: file name where lock requested */ + ulint line); /* in: line where requested */ /********************************************************************** Releases an exclusive mode lock. */ UNIV_INLINE @@ -283,10 +248,8 @@ void rw_lock_s_lock_direct( /*==================*/ rw_lock_t* lock /* in: pointer to rw-lock */ - #ifdef UNIV_SYNC_DEBUG ,char* file_name, /* in: file name where lock requested */ ulint line /* in: line where requested */ - #endif ); /********************************************************************** Low-level function which locks an rw-lock in x-mode when we know that it @@ -297,10 +260,8 @@ void rw_lock_x_lock_direct( /*==================*/ rw_lock_t* lock /* in: pointer to rw-lock */ - #ifdef UNIV_SYNC_DEBUG ,char* file_name, /* in: file name where lock requested */ ulint line /* in: line where requested */ - #endif ); /********************************************************************** This function is used in the insert buffer to move the ownership of an @@ -349,6 +310,23 @@ rw_lock_get_x_lock_count( /*=====================*/ /* out: value of writer_count */ rw_lock_t* lock); /* in: rw-lock */ +/************************************************************************ +Accessor functions for rw lock. */ +UNIV_INLINE +ulint +rw_lock_get_waiters( +/*================*/ + rw_lock_t* lock); +UNIV_INLINE +ulint +rw_lock_get_writer( +/*===============*/ + rw_lock_t* lock); +UNIV_INLINE +ulint +rw_lock_get_reader_count( +/*=====================*/ + rw_lock_t* lock); /********************************************************************** Checks if the thread has locked the rw-lock in the specified mode, with the pass value == 0. */ @@ -414,9 +392,6 @@ rw_lock_debug_print( /*================*/ rw_lock_debug_t* info); /* in: debug struct */ - -#define RW_CNAME_LEN 8 - /* NOTE! The structure appears here only for the compiler to know its size. Do not use its fields directly! The structure used in the spin lock implementation of a read-write lock. Several threads may have a shared lock @@ -447,7 +422,7 @@ struct rw_lock_struct { ulint waiters; /* This ulint is set to 1 if there are waiters (readers or writers) in the global wait array, waiting for this rw_lock. - Otherwise, = 0. */ + Otherwise, == 0. */ ibool writer_is_wait_ex; /* This is TRUE if the writer field is RW_LOCK_WAIT_EX; this field is located far @@ -463,9 +438,12 @@ struct rw_lock_struct { info list of the lock */ ulint level; /* Debug version: level in the global latching order; default SYNC_LEVEL_NONE */ - char cfile_name[RW_CNAME_LEN]; - /* File name where lock created */ + char* cfile_name; /* File name where lock created */ ulint cline; /* Line where created */ + char* last_s_file_name;/* File name where last time s-locked */ + char* last_x_file_name;/* File name where last time x-locked */ + ulint last_s_line; /* Line number where last time s-locked */ + ulint last_x_line; /* Line number where last time x-locked */ ulint magic_n; }; diff --git a/innobase/include/sync0rw.ic b/innobase/include/sync0rw.ic index 11add13d2d0..2a02cfb6a53 100644 --- a/innobase/include/sync0rw.ic +++ b/innobase/include/sync0rw.ic @@ -15,14 +15,11 @@ waiting for the lock before suspending the thread. */ void rw_lock_s_lock_spin( /*================*/ - rw_lock_t* lock /* in: pointer to rw-lock */ - #ifdef UNIV_SYNC_DEBUG - ,ulint pass, /* in: pass value; != 0, if the lock will + rw_lock_t* lock, /* in: pointer to rw-lock */ + ulint pass, /* in: pass value; != 0, if the lock will be passed to another thread to unlock */ - char* file_name, /* in: file name where lock requested */ - ulint line /* in: line where requested */ - #endif -); + char* file_name,/* in: file name where lock requested */ + ulint line); /* in: line where requested */ /********************************************************************** Inserts the debug information for an rw-lock. */ @@ -128,14 +125,11 @@ ibool rw_lock_s_lock_low( /*===============*/ /* out: TRUE if success */ - rw_lock_t* lock /* in: pointer to rw-lock */ - #ifdef UNIV_SYNC_DEBUG - ,ulint pass, /* in: pass value; != 0, if the lock will be + rw_lock_t* lock, /* in: pointer to rw-lock */ + ulint pass, /* in: pass value; != 0, if the lock will be passed to another thread to unlock */ char* file_name, /* in: file name where lock requested */ - ulint line /* in: line where requested */ - #endif -) + ulint line) /* in: line where requested */ { ut_ad(mutex_own(rw_lock_get_mutex(lock))); @@ -150,6 +144,9 @@ rw_lock_s_lock_low( line); #endif + lock->last_s_file_name = file_name; + lock->last_s_line = line; + return(TRUE); /* locking succeeded */ } @@ -164,12 +161,9 @@ UNIV_INLINE void rw_lock_s_lock_direct( /*==================*/ - rw_lock_t* lock /* in: pointer to rw-lock */ - #ifdef UNIV_SYNC_DEBUG - ,char* file_name, /* in: file name where lock requested */ - ulint line /* in: line where requested */ - #endif -) + rw_lock_t* lock, /* in: pointer to rw-lock */ + char* file_name,/* in: file name where lock requested */ + ulint line) /* in: line where requested */ { ut_ad(lock->writer == RW_LOCK_NOT_LOCKED); ut_ad(rw_lock_get_reader_count(lock) == 0); @@ -177,6 +171,9 @@ rw_lock_s_lock_direct( /* Set the shared lock by incrementing the reader count */ lock->reader_count++; + lock->last_s_file_name = file_name; + lock->last_s_line = line; + #ifdef UNIV_SYNC_DEBUG rw_lock_add_debug_info(lock, 0, RW_LOCK_SHARED, file_name, line); #endif @@ -190,12 +187,9 @@ UNIV_INLINE void rw_lock_x_lock_direct( /*==================*/ - rw_lock_t* lock /* in: pointer to rw-lock */ - #ifdef UNIV_SYNC_DEBUG - ,char* file_name, /* in: file name where lock requested */ - ulint line /* in: line where requested */ - #endif -) + rw_lock_t* lock, /* in: pointer to rw-lock */ + char* file_name, /* in: file name where lock requested */ + ulint line) /* in: line where requested */ { ut_ad(rw_lock_validate(lock)); ut_ad(rw_lock_get_reader_count(lock) == 0); @@ -206,6 +200,9 @@ rw_lock_x_lock_direct( lock->writer_count++; lock->pass = 0; + lock->last_x_file_name = file_name; + lock->last_x_line = line; + #ifdef UNIV_SYNC_DEBUG rw_lock_add_debug_info(lock, 0, RW_LOCK_EX, file_name, line); #endif @@ -221,14 +218,11 @@ UNIV_INLINE void rw_lock_s_lock_func( /*================*/ - rw_lock_t* lock /* in: pointer to rw-lock */ - #ifdef UNIV_SYNC_DEBUG - ,ulint pass, /* in: pass value; != 0, if the lock will + rw_lock_t* lock, /* in: pointer to rw-lock */ + ulint pass, /* in: pass value; != 0, if the lock will be passed to another thread to unlock */ char* file_name, /* in: file name where lock requested */ - ulint line /* in: line where requested */ - #endif -) + ulint line) /* in: line where requested */ { /* NOTE: As we do not know the thread ids for threads which have s-locked a latch, and s-lockers will be served only after waiting @@ -245,11 +239,7 @@ rw_lock_s_lock_func( mutex_enter(rw_lock_get_mutex(lock)); - if (TRUE == rw_lock_s_lock_low(lock - #ifdef UNIV_SYNC_DEBUG - ,pass, file_name, line - #endif - )) { + if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) { mutex_exit(rw_lock_get_mutex(lock)); return; /* Success */ @@ -257,11 +247,8 @@ rw_lock_s_lock_func( /* Did not succeed, try spin wait */ mutex_exit(rw_lock_get_mutex(lock)); - rw_lock_s_lock_spin(lock - #ifdef UNIV_SYNC_DEBUG - ,pass, file_name, line - #endif - ); + rw_lock_s_lock_spin(lock, pass, file_name, line); + return; } } @@ -275,12 +262,9 @@ ibool rw_lock_s_lock_func_nowait( /*=======================*/ /* out: TRUE if success */ - rw_lock_t* lock /* in: pointer to rw-lock */ - #ifdef UNIV_SYNC_DEBUG - ,char* file_name, /* in: file name where lock requested */ - ulint line /* in: line where requested */ - #endif -) + rw_lock_t* lock, /* in: pointer to rw-lock */ + char* file_name,/* in: file name where lock requested */ + ulint line) /* in: line where requested */ { ibool success = FALSE; @@ -294,6 +278,9 @@ rw_lock_s_lock_func_nowait( rw_lock_add_debug_info(lock, 0, RW_LOCK_SHARED, file_name, line); #endif + + lock->last_s_file_name = file_name; + lock->last_s_line = line; success = TRUE; } @@ -312,12 +299,9 @@ ibool rw_lock_x_lock_func_nowait( /*=======================*/ /* out: TRUE if success */ - rw_lock_t* lock /* in: pointer to rw-lock */ - #ifdef UNIV_SYNC_DEBUG - ,char* file_name, /* in: file name where lock requested */ - ulint line /* in: line where requested */ - #endif -) + rw_lock_t* lock, /* in: pointer to rw-lock */ + char* file_name, /* in: file name where lock requested */ + ulint line) /* in: line where requested */ { ibool success = FALSE; @@ -338,6 +322,9 @@ rw_lock_x_lock_func_nowait( rw_lock_add_debug_info(lock, 0, RW_LOCK_EX, file_name, line); #endif + lock->last_x_file_name = file_name; + lock->last_x_line = line; + success = TRUE; } diff --git a/innobase/include/sync0sync.h b/innobase/include/sync0sync.h index cb86b2b815c..6c3bff66e27 100644 --- a/innobase/include/sync0sync.h +++ b/innobase/include/sync0sync.h @@ -64,24 +64,15 @@ mutex_free( NOTE! The following macro should be used in mutex locking, not the corresponding function. */ -#ifdef UNIV_SYNC_DEBUG #define mutex_enter(M) mutex_enter_func((M), IB__FILE__, __LINE__) -#else -#define mutex_enter(M) mutex_enter_func(M) -#endif /****************************************************************** NOTE! The following macro should be used in mutex locking, not the corresponding function. */ /* NOTE! currently same as mutex_enter! */ -#ifdef UNIV_SYNC_DEBUG -#define mutex_enter_fast(M) mutex_enter_func((M), IB__FILE__, __LINE__) -#else -#define mutex_enter_fast(M) mutex_enter_func(M) -#endif - -#define mutex_enter_fast_func mutex_enter_func; +#define mutex_enter_fast(M) mutex_enter_func((M), IB__FILE__, __LINE__) +#define mutex_enter_fast_func mutex_enter_func; /********************************************************************** NOTE! Use the corresponding macro in the header file, not this function directly. Locks a mutex for the current thread. If the mutex is reserved @@ -91,12 +82,9 @@ UNIV_INLINE void mutex_enter_func( /*=============*/ - mutex_t* mutex /* in: pointer to mutex */ - #ifdef UNIV_SYNC_DEBUG - ,char* file_name, /* in: file name where locked */ - ulint line /* in: line where locked */ - #endif - ); + mutex_t* mutex, /* in: pointer to mutex */ + char* file_name, /* in: file name where locked */ + ulint line); /* in: line where locked */ /************************************************************************ Tries to lock the mutex for the current thread. If the lock is not acquired immediately, returns with return value 1. */ @@ -104,8 +92,11 @@ immediately, returns with return value 1. */ ulint mutex_enter_nowait( /*===============*/ - /* out: 0 if succeed, 1 if not */ - mutex_t* mutex); /* in: pointer to mutex */ + /* out: 0 if succeed, 1 if not */ + mutex_t* mutex, /* in: pointer to mutex */ + char* file_name, /* in: file name where mutex + requested */ + ulint line); /* in: line where requested */ /********************************************************************** Unlocks a mutex owned by the current thread. */ UNIV_INLINE @@ -373,6 +364,7 @@ Memory pool mutex */ #define SYNC_LEVEL_NONE 2000 /* default: level not defined */ #define SYNC_DICT 1000 #define SYNC_DICT_AUTOINC_MUTEX 999 +#define SYNC_FOREIGN_KEY_CHECK 998 #define SYNC_PURGE_IS_RUNNING 997 #define SYNC_DICT_HEADER 995 #define SYNC_IBUF_HEADER 914 @@ -418,6 +410,7 @@ Memory pool mutex */ #define SYNC_BUF_BLOCK 149 #define SYNC_DOUBLEWRITE 140 #define SYNC_ANY_LATCH 135 +#define SYNC_THR_LOCAL 133 #define SYNC_MEM_HASH 131 #define SYNC_MEM_POOL 130 @@ -429,8 +422,6 @@ Memory pool mutex */ #define RW_LOCK_WAIT_EX 353 #define SYNC_MUTEX 354 -#define MUTEX_CNAME_LEN 8 - /* NOTE! The structure appears here only for the compiler to know its size. Do not use its fields directly! The structure used in the spin lock implementation of a mutual exclusion semaphore. */ @@ -457,8 +448,7 @@ struct mutex_struct { locked */ ulint level; /* Debug version: level in the global latching order; default SYNC_LEVEL_NONE */ - char cfile_name[MUTEX_CNAME_LEN]; - /* File name where mutex created */ + char* cfile_name; /* File name where mutex created */ ulint cline; /* Line where created */ ulint magic_n; }; diff --git a/innobase/include/sync0sync.ic b/innobase/include/sync0sync.ic index f7b341cb386..9531377ce0b 100644 --- a/innobase/include/sync0sync.ic +++ b/innobase/include/sync0sync.ic @@ -22,13 +22,9 @@ for the mutex before suspending the thread. */ void mutex_spin_wait( /*============*/ - mutex_t* mutex /* in: pointer to mutex */ - - #ifdef UNIV_SYNC_DEBUG - ,char* file_name, /* in: file name where mutex requested */ - ulint line /* in: line where requested */ - #endif -); + mutex_t* mutex, /* in: pointer to mutex */ + char* file_name,/* in: file name where mutex requested */ + ulint line); /* in: line where requested */ /********************************************************************** Sets the debug information for a reserved mutex. */ @@ -209,6 +205,18 @@ mutex_exit( #endif mutex_reset_lock_word(mutex); + /* A problem: we assume that mutex_reset_lock word + is a memory barrier, that is when we read the waiters + field next, the read must be serialized in memory + after the reset. A speculative processor might + perform the read first, which could leave a waiting + thread hanging indefinitely. + + Our current solution call every 10 seconds + sync_arr_wake_threads_if_sema_free() + to wake up possible hanging threads if + they are missed in mutex_signal_object. */ + if (mutex_get_waiters(mutex) != 0) { mutex_signal_object(mutex); @@ -227,12 +235,9 @@ UNIV_INLINE void mutex_enter_func( /*=============*/ - mutex_t* mutex /* in: pointer to mutex */ - #ifdef UNIV_SYNC_DEBUG - ,char* file_name, /* in: file name where locked */ - ulint line /* in: line where locked */ - #endif - ) + mutex_t* mutex, /* in: pointer to mutex */ + char* file_name,/* in: file name where locked */ + ulint line) /* in: line where locked */ { ut_ad(mutex_validate(mutex)); @@ -245,13 +250,11 @@ mutex_enter_func( mutex_set_debug_info(mutex, file_name, line); #endif + mutex->file_name = file_name; + mutex->line = line; + return; /* Succeeded! */ } - mutex_spin_wait(mutex - #ifdef UNIV_SYNC_DEBUG - ,file_name, - line - #endif - ); + mutex_spin_wait(mutex, file_name, line); } diff --git a/innobase/include/trx0rseg.ic b/innobase/include/trx0rseg.ic index aeb4466ff0f..423447d5566 100644 --- a/innobase/include/trx0rseg.ic +++ b/innobase/include/trx0rseg.ic @@ -61,7 +61,11 @@ trx_rsegf_get_nth_undo( ulint n, /* in: index of slot */ mtr_t* mtr) /* in: mtr */ { - ut_ad(n < TRX_RSEG_N_SLOTS); + if (n >= TRX_RSEG_N_SLOTS) { + fprintf(stderr, + "InnoDB: Error: trying to get slot %lu of rseg\n", n); + ut_a(0); + } return(mtr_read_ulint(rsegf + TRX_RSEG_UNDO_SLOTS + n * TRX_RSEG_SLOT_SIZE, MLOG_4BYTES, mtr)); @@ -78,7 +82,11 @@ trx_rsegf_set_nth_undo( ulint page_no,/* in: page number of the undo log segment */ mtr_t* mtr) /* in: mtr */ { - ut_ad(n < TRX_RSEG_N_SLOTS); + if (n >= TRX_RSEG_N_SLOTS) { + fprintf(stderr, + "InnoDB: Error: trying to set slot %lu of rseg\n", n); + ut_a(0); + } mlog_write_ulint(rsegf + TRX_RSEG_UNDO_SLOTS + n * TRX_RSEG_SLOT_SIZE, page_no, MLOG_4BYTES, mtr); diff --git a/innobase/include/trx0trx.h b/innobase/include/trx0trx.h index f179e20ad62..8db0b39d3b4 100644 --- a/innobase/include/trx0trx.h +++ b/innobase/include/trx0trx.h @@ -317,6 +317,19 @@ struct trx_struct{ ibool has_search_latch; /* TRUE if this trx has latched the search system latch in S-mode */ + ulint search_latch_timeout; + /* If we notice that someone is + waiting for our S-lock on the search + latch to be released, we wait in + row0sel.c for BTR_SEA_TIMEOUT new + searches until we try to keep + the search latch again over + calls from MySQL; this is intended + to reduce contention on the search + latch */ + lock_t* auto_inc_lock; /* possible auto-inc lock reserved by + the transaction; note that it is also + in the lock list trx_locks */ ibool ignore_duplicates_in_insert; /* in an insert roll back only insert of the latest row in case @@ -401,11 +414,9 @@ struct trx_struct{ checking algorithm */ /*------------------------------*/ mem_heap_t* lock_heap; /* memory heap for the locks of the - transaction; protected by - lock_heap_mutex */ + transaction */ UT_LIST_BASE_NODE_T(lock_t) - trx_locks; /* locks reserved by the transaction; - protected by lock_heap_mutex */ + trx_locks; /* locks reserved by the transaction */ /*------------------------------*/ mem_heap_t* read_view_heap; /* memory heap for the read view */ read_view_t* read_view; /* consistent read view or NULL */ diff --git a/innobase/include/ut0mem.h b/innobase/include/ut0mem.h index 8e5a4fda0d3..2d245e5f72f 100644 --- a/innobase/include/ut0mem.h +++ b/innobase/include/ut0mem.h @@ -13,6 +13,9 @@ Created 5/30/1994 Heikki Tuuri #include <string.h> #include <stdlib.h> +/* The total amount of memory currently allocated from the OS with malloc */ +extern ulint ut_total_allocated_memory; + UNIV_INLINE void* ut_memcpy(void* dest, void* sour, ulint n); diff --git a/innobase/lock/lock0lock.c b/innobase/lock/lock0lock.c index df35e22005f..fa0641bad73 100644 --- a/innobase/lock/lock0lock.c +++ b/innobase/lock/lock0lock.c @@ -578,6 +578,17 @@ lock_sys_create( } /************************************************************************* +Gets the size of a lock struct. */ + +ulint +lock_get_size(void) +/*===============*/ + /* out: size in bytes */ +{ + return((ulint)sizeof(lock_t)); +} + +/************************************************************************* Gets the mode of a lock. */ UNIV_INLINE ulint @@ -709,13 +720,17 @@ lock_mode_stronger_or_eq( ulint mode2) /* in: lock mode */ { ut_ad(mode1 == LOCK_X || mode1 == LOCK_S || mode1 == LOCK_IX - || mode1 == LOCK_IS); + || mode1 == LOCK_IS || mode1 == LOCK_AUTO_INC); ut_ad(mode2 == LOCK_X || mode2 == LOCK_S || mode2 == LOCK_IX - || mode2 == LOCK_IS); + || mode2 == LOCK_IS || mode2 == LOCK_AUTO_INC); if (mode1 == LOCK_X) { return(TRUE); + } else if (mode1 == LOCK_AUTO_INC && mode2 == LOCK_AUTO_INC) { + + return(TRUE); + } else if (mode1 == LOCK_S && (mode2 == LOCK_S || mode2 == LOCK_IS)) { return(TRUE); @@ -743,9 +758,9 @@ lock_mode_compatible( ulint mode2) /* in: lock mode */ { ut_ad(mode1 == LOCK_X || mode1 == LOCK_S || mode1 == LOCK_IX - || mode1 == LOCK_IS); + || mode1 == LOCK_IS || mode1 == LOCK_AUTO_INC); ut_ad(mode2 == LOCK_X || mode2 == LOCK_S || mode2 == LOCK_IX - || mode2 == LOCK_IS); + || mode2 == LOCK_IS || mode2 == LOCK_AUTO_INC); if (mode1 == LOCK_S && (mode2 == LOCK_IS || mode2 == LOCK_S)) { @@ -755,12 +770,18 @@ lock_mode_compatible( return(FALSE); + } else if (mode1 == LOCK_AUTO_INC && (mode2 == LOCK_IS + || mode2 == LOCK_IX)) { + return(TRUE); + } else if (mode1 == LOCK_IS && (mode2 == LOCK_IS || mode2 == LOCK_IX + || mode2 == LOCK_AUTO_INC || mode2 == LOCK_S)) { return(TRUE); } else if (mode1 == LOCK_IX && (mode2 == LOCK_IS + || mode2 == LOCK_AUTO_INC || mode2 == LOCK_IX)) { return(TRUE); } @@ -1836,7 +1857,7 @@ lock_grant( Cancels a waiting record lock request and releases the waiting transaction that requested it. NOTE: does NOT check if waiting lock requests behind this one can now be granted! */ - +static void lock_rec_cancel( /*============*/ @@ -2812,7 +2833,18 @@ lock_table_create( ut_ad(table && trx); ut_ad(mutex_own(&kernel_mutex)); - lock = mem_heap_alloc(trx->lock_heap, sizeof(lock_t)); + if (type_mode == LOCK_AUTO_INC) { + /* Only one trx can have the lock on the table + at a time: we may use the memory preallocated + to the table object */ + + lock = table->auto_inc_lock; + + ut_a(trx->auto_inc_lock == NULL); + trx->auto_inc_lock = lock; + } else { + lock = mem_heap_alloc(trx->lock_heap, sizeof(lock_t)); + } if (lock == NULL) { @@ -2854,6 +2886,10 @@ lock_table_remove_low( table = lock->un_member.tab_lock.table; trx = lock->trx; + if (lock == trx->auto_inc_lock) { + trx->auto_inc_lock = NULL; + } + UT_LIST_REMOVE(trx_locks, trx->trx_locks, lock); UT_LIST_REMOVE(un_member.tab_lock.locks, table->locks, lock); } @@ -2988,7 +3024,7 @@ lock_table( if (lock_table_other_has_incompatible(trx, LOCK_WAIT, table, mode)) { - /* Another trx has request on the table in an incompatible + /* Another trx has a request on the table in an incompatible mode: this trx must wait */ err = lock_table_enqueue_waiting(mode, table, thr); @@ -3102,6 +3138,24 @@ lock_table_dequeue( /*=========================== LOCK RELEASE ==============================*/ /************************************************************************* +Releases an auto-inc lock a transaction possibly has on a table. +Releases possible other transactions waiting for this lock. */ + +void +lock_table_unlock_auto_inc( +/*=======================*/ + trx_t* trx) /* in: transaction */ +{ + if (trx->auto_inc_lock) { + mutex_enter(&kernel_mutex); + + lock_table_dequeue(trx->auto_inc_lock); + + mutex_exit(&kernel_mutex); + } +} + +/************************************************************************* Releases transaction locks, and releases possible other transactions waiting because of these locks. */ @@ -3147,6 +3201,37 @@ lock_release_off_kernel( } mem_heap_empty(trx->lock_heap); + + ut_a(trx->auto_inc_lock == NULL); +} + +/************************************************************************* +Cancels a waiting lock request and releases possible other transactions +waiting behind it. */ + +void +lock_cancel_waiting_and_release( +/*============================*/ + lock_t* lock) /* in: waiting lock request */ +{ + ut_ad(mutex_own(&kernel_mutex)); + + if (lock_get_type(lock) == LOCK_REC) { + + lock_rec_dequeue_from_page(lock); + } else { + ut_ad(lock_get_type(lock) == LOCK_TABLE); + + lock_table_dequeue(lock); + } + + /* Reset the wait flag and the back pointer to lock in trx */ + + lock_reset_lock_and_trx_wait(lock); + + /* The following function releases the trx from lock wait */ + + trx_end_lock_wait(lock->trx); } /************************************************************************* @@ -3237,8 +3322,10 @@ lock_table_print( printf(" lock_mode IS"); } else if (lock_get_mode(lock) == LOCK_IX) { printf(" lock_mode IX"); + } else if (lock_get_mode(lock) == LOCK_AUTO_INC) { + printf(" lock_mode AUTO-INC"); } else { - ut_error; + printf(" unknown lock_mode %lu", lock_get_mode(lock)); } if (lock_get_wait(lock)) { @@ -3304,10 +3391,7 @@ lock_rec_print( page = buf_page_get_gen(space, page_no, RW_NO_LATCH, NULL, BUF_GET_IF_IN_POOL, -#ifdef UNIV_SYNC_DEBUG - IB__FILE__, __LINE__, -#endif - &mtr); + IB__FILE__, __LINE__, &mtr); if (page) { page = buf_page_get_nowait(space, page_no, RW_S_LATCH, &mtr); } @@ -3417,6 +3501,11 @@ loop: trx = UT_LIST_GET_FIRST(trx_sys->trx_list); i = 0; + + /* Since we temporarily release the kernel mutex when + reading a database page in below, variable trx may be + obsolete now and we must loop through the trx list to + get probably the same trx, or some other trx. */ while (trx && (i < nth_trx)) { trx = UT_LIST_GET_NEXT(trx_list, trx); @@ -3466,6 +3555,9 @@ loop: i = 0; + /* Look at the note about the trx loop above why we loop here: + lock may be an obsolete pointer now. */ + lock = UT_LIST_GET_FIRST(trx->trx_locks); while (lock && (i < nth_lock)) { diff --git a/innobase/log/log0log.c b/innobase/log/log0log.c index ec42c8f2e08..06ac7a578a5 100644 --- a/innobase/log/log0log.c +++ b/innobase/log/log0log.c @@ -569,9 +569,12 @@ log_init(void) ut_a(LOG_BUFFER_SIZE >= 4 * UNIV_PAGE_SIZE); buf = ut_malloc(LOG_BUFFER_SIZE + OS_FILE_LOG_BLOCK_SIZE); - log_sys->buf = ut_align(buf, OS_FILE_LOG_BLOCK_SIZE); + log_sys->buf = ut_align(buf, OS_FILE_LOG_BLOCK_SIZE); log_sys->buf_size = LOG_BUFFER_SIZE; + + memset(log_sys->buf, '\0', LOG_BUFFER_SIZE); + log_sys->max_buf_free = log_sys->buf_size / LOG_BUF_FLUSH_RATIO - LOG_BUF_FLUSH_MARGIN; log_sys->check_flush_or_checkpoint = TRUE; @@ -579,6 +582,8 @@ log_init(void) log_sys->n_log_ios = 0; + log_sys->n_log_ios_old = log_sys->n_log_ios; + log_sys->last_printout_time = time(NULL); /*----------------------------*/ log_sys->buf_next_to_write = 0; @@ -609,6 +614,7 @@ log_init(void) log_sys->checkpoint_buf = ut_align( mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE), OS_FILE_LOG_BLOCK_SIZE); + memset(log_sys->checkpoint_buf, '\0', OS_FILE_LOG_BLOCK_SIZE); /*----------------------------*/ log_sys->archiving_state = LOG_ARCH_ON; @@ -626,6 +632,8 @@ log_init(void) OS_FILE_LOG_BLOCK_SIZE); log_sys->archive_buf_size = LOG_ARCHIVE_BUF_SIZE; + memset(log_sys->archive_buf, '\0', LOG_ARCHIVE_BUF_SIZE); + log_sys->archiving_on = os_event_create(NULL); /*----------------------------*/ @@ -1849,7 +1857,7 @@ log_group_archive( os_file_t file_handle; dulint start_lsn; dulint end_lsn; - char name[100]; + char name[1024]; byte* buf; ulint len; ibool ret; @@ -2796,8 +2804,35 @@ void log_print(void) /*===========*/ { - printf("Log sequence number %lu %lu\n", - ut_dulint_get_high(log_sys->lsn), - ut_dulint_get_low(log_sys->lsn)); -} + double time_elapsed; + time_t current_time; + + mutex_enter(&(log_sys->mutex)); + printf("Log sequence number %lu %lu\n" + "Log flushed up to %lu %lu\n" + "Last checkpoint at %lu %lu\n", + ut_dulint_get_high(log_sys->lsn), + ut_dulint_get_low(log_sys->lsn), + ut_dulint_get_high(log_sys->written_to_some_lsn), + ut_dulint_get_low(log_sys->written_to_some_lsn), + ut_dulint_get_high(log_sys->last_checkpoint_lsn), + ut_dulint_get_low(log_sys->last_checkpoint_lsn)); + + current_time = time(NULL); + + time_elapsed = difftime(current_time, log_sys->last_printout_time); + + printf( + "%lu pending log writes, %lu pending chkp writes\n" + "%lu log i/o's done, %.2f log i/o's/second\n", + log_sys->n_pending_writes, + log_sys->n_pending_checkpoint_writes, + log_sys->n_log_ios, + (log_sys->n_log_ios - log_sys->n_log_ios_old) / time_elapsed); + + log_sys->n_log_ios_old = log_sys->n_log_ios; + log_sys->last_printout_time = current_time; + + mutex_exit(&(log_sys->mutex)); +} diff --git a/innobase/log/log0recv.c b/innobase/log/log0recv.c index edab98fa39c..eb3eadcede9 100644 --- a/innobase/log/log0recv.c +++ b/innobase/log/log0recv.c @@ -560,6 +560,7 @@ recv_parse_or_apply_log_rec_body( } else if (type <= MLOG_WRITE_STRING) { new_ptr = mlog_parse_string(ptr, end_ptr, page); } else { + new_ptr = NULL; /* Eliminate compiler warning */ ut_error; } @@ -801,9 +802,7 @@ recv_recover_page( mtr_set_log_mode(&mtr, MTR_LOG_NONE); success = buf_page_get_known_nowait(RW_X_LATCH, page, BUF_KEEP_OLD, -#ifdef UNIV_SYNC_DEBUG IB__FILE__, __LINE__, -#endif &mtr); ut_a(success); @@ -1212,9 +1211,7 @@ recv_compare_spaces( frame = buf_page_get_gen(space1, page_no, RW_S_LATCH, NULL, BUF_GET_IF_IN_POOL, -#ifdef UNIV_SYNC_DEBUG IB__FILE__, __LINE__, -#endif &mtr); if (frame) { buf_page_dbg_add_level(frame, SYNC_NO_ORDER_CHECK); @@ -1227,9 +1224,7 @@ recv_compare_spaces( frame = buf_page_get_gen(space2, page_no, RW_S_LATCH, NULL, BUF_GET_IF_IN_POOL, -#ifdef UNIV_SYNC_DEBUG IB__FILE__, __LINE__, -#endif &mtr); if (frame) { buf_page_dbg_add_level(frame, SYNC_NO_ORDER_CHECK); @@ -2033,8 +2028,11 @@ recv_recovery_from_checkpoint_start( while (group) { old_scanned_lsn = recv_sys->scanned_lsn; - recv_group_scan_log_recs(group, &contiguous_lsn, + if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) { + recv_group_scan_log_recs(group, &contiguous_lsn, &group_scanned_lsn); + } + group->scanned_lsn = group_scanned_lsn; if (ut_dulint_cmp(old_scanned_lsn, group_scanned_lsn) < 0) { @@ -2120,10 +2118,12 @@ recv_recovery_from_checkpoint_finish(void) { /* Rollback the uncommitted transactions which have no user session */ - trx_rollback_all_without_sess(); + if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO) { + trx_rollback_all_without_sess(); + } /* Apply the hashed log records to the respective file pages */ - + recv_apply_hashed_log_recs(TRUE); if (log_debug_writes) { diff --git a/innobase/mem/mem0pool.c b/innobase/mem/mem0pool.c index 6c3a4adebae..48e7e686953 100644 --- a/innobase/mem/mem0pool.c +++ b/innobase/mem/mem0pool.c @@ -76,7 +76,7 @@ pool, and after that its locks will grow into the buffer pool. */ #define MEM_AREA_FREE 1 /* The smallest memory area total size */ -#define MEM_AREA_MIN_SIZE (2 * sizeof(struct mem_area_struct)) +#define MEM_AREA_MIN_SIZE (2 * MEM_AREA_EXTRA_SIZE) /* Data structure for a memory pool. The space is allocated using the buddy algorithm, where free list i contains areas of size 2 to power i. */ @@ -556,7 +556,7 @@ Returns the amount of reserved memory. */ ulint mem_pool_get_reserved( /*==================*/ - /* out: reserved mmeory in bytes */ + /* out: reserved memory in bytes */ mem_pool_t* pool) /* in: memory pool */ { ulint reserved; diff --git a/innobase/mtr/mtr0log.c b/innobase/mtr/mtr0log.c index 11c0c476fcb..26f5a5d1cb7 100644 --- a/innobase/mtr/mtr0log.c +++ b/innobase/mtr/mtr0log.c @@ -54,6 +54,13 @@ mlog_write_initial_log_record( ut_ad(type <= MLOG_BIGGEST_TYPE); + if (ptr < buf_pool->frame_zero || ptr >= buf_pool->high_end) { + fprintf(stderr, + "InnoDB: Error: trying to write to a stray memory location %lx\n", + (ulint)ptr); + ut_a(0); + } + log_ptr = mlog_open(mtr, 20); /* If no logging is requested, we may return now */ @@ -184,6 +191,13 @@ mlog_write_ulint( { byte* log_ptr; + if (ptr < buf_pool->frame_zero || ptr >= buf_pool->high_end) { + fprintf(stderr, + "InnoDB: Error: trying to write to a stray memory location %lx\n", + (ulint)ptr); + ut_a(0); + } + if (type == MLOG_1BYTE) { mach_write_to_1(ptr, val); } else if (type == MLOG_2BYTES) { @@ -225,6 +239,13 @@ mlog_write_dulint( { byte* log_ptr; + if (ptr < buf_pool->frame_zero || ptr >= buf_pool->high_end) { + fprintf(stderr, + "InnoDB: Error: trying to write to a stray memory location %lx\n", + (ulint)ptr); + ut_a(0); + } + ut_ad(ptr && mtr); ut_ad(type == MLOG_8BYTES); @@ -262,6 +283,12 @@ mlog_write_string( { byte* log_ptr; + if (ptr < buf_pool->frame_zero || ptr >= buf_pool->high_end) { + fprintf(stderr, + "InnoDB: Error: trying to write to a stray memory location %lx\n", + (ulint)ptr); + ut_a(0); + } ut_ad(ptr && mtr); ut_ad(len < UNIV_PAGE_SIZE); diff --git a/innobase/os/os0file.c b/innobase/os/os0file.c index d4d30f6aabc..ced601d4de1 100644 --- a/innobase/os/os0file.c +++ b/innobase/os/os0file.c @@ -11,6 +11,7 @@ Created 10/21/1995 Heikki Tuuri #include "ut0mem.h" #include "srv0srv.h" #include "trx0sys.h" +#include "fil0fil.h" #undef HAVE_FDATASYNC @@ -109,6 +110,14 @@ os_aio_array_t* os_aio_sync_array = NULL; ulint os_aio_n_segments = ULINT_UNDEFINED; +ulint os_n_file_reads = 0; +ulint os_n_file_writes = 0; +ulint os_n_fsyncs = 0; +ulint os_n_file_reads_old = 0; +ulint os_n_file_writes_old = 0; +ulint os_n_fsyncs_old = 0; +time_t os_last_printout; + /*************************************************************************** Gets the operating system version. Currently works only on Windows. */ @@ -118,26 +127,26 @@ os_get_os_version(void) /* out: OS_WIN95, OS_WIN31, OS_WINNT (2000 == NT) */ { #ifdef __WIN__ - OSVERSIONINFO os_info; - - os_info.dwOSVersionInfoSize = sizeof(OSVERSIONINFO); - - ut_a(GetVersionEx(&os_info)); - - if (os_info.dwPlatformId == VER_PLATFORM_WIN32s) { - return(OS_WIN31); - } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS) { - return(OS_WIN95); - } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_NT) { - return(OS_WINNT); - } else { - ut_error; - return(0); - } + OSVERSIONINFO os_info; + + os_info.dwOSVersionInfoSize = sizeof(OSVERSIONINFO); + + ut_a(GetVersionEx(&os_info)); + + if (os_info.dwPlatformId == VER_PLATFORM_WIN32s) { + return(OS_WIN31); + } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS) { + return(OS_WIN95); + } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_NT) { + return(OS_WINNT); + } else { + ut_error; + return(0); + } #else - ut_error; + ut_error; - return(0); + return(0); #endif } @@ -160,7 +169,7 @@ os_file_get_last_error(void) if (err != ERROR_FILE_EXISTS) { fprintf(stderr, - "InnoDB: operating system error number %li in a file operation.\n", + "InnoDB: Warning: operating system error number %li in a file operation.\n", (long) err); } @@ -178,7 +187,7 @@ os_file_get_last_error(void) if (err != EEXIST) { fprintf(stderr, - "InnoDB: operating system error number %i in a file operation.\n", + "InnoDB: Warning: operating system error number %i in a file operation.\n", errno); } @@ -231,8 +240,10 @@ os_file_handle_error( exit(1); } else if (err == OS_FILE_AIO_RESOURCES_RESERVED) { - return(TRUE); + + } else if (err == OS_FILE_ALREADY_EXISTS) { + return(FALSE); } else { fprintf(stderr, "InnoDB: Cannot continue operation.\n"); @@ -317,14 +328,10 @@ try_again: if (file == INVALID_HANDLE_VALUE) { *success = FALSE; - if (create_mode != OS_FILE_OPEN - && os_file_get_last_error() == OS_FILE_DISK_FULL) { - - retry = os_file_handle_error(file, name); + retry = os_file_handle_error(file, name); - if (retry) { - goto try_again; - } + if (retry) { + goto try_again; } } else { *success = TRUE; @@ -369,14 +376,10 @@ try_again: if (file == -1) { *success = FALSE; - if (create_mode != OS_FILE_OPEN - && errno == ENOSPC) { + retry = os_file_handle_error(file, name); - retry = os_file_handle_error(file, name); - - if (retry) { - goto try_again; - } + if (retry) { + goto try_again; } } else { *success = TRUE; @@ -407,6 +410,7 @@ os_file_close( return(TRUE); } + os_file_handle_error(file, NULL); return(FALSE); #else int ret; @@ -414,6 +418,7 @@ os_file_close( ret = close(file); if (ret == -1) { + os_file_handle_error(file, NULL); return(FALSE); } @@ -551,6 +556,8 @@ os_file_flush( return(TRUE); } + os_file_handle_error(file, NULL); + return(FALSE); #else int ret; @@ -560,6 +567,8 @@ os_file_flush( #else ret = fsync(file); #endif + os_n_fsyncs++; + if (ret == 0) { return(TRUE); } @@ -589,9 +598,12 @@ os_file_pread( { off_t offs = (off_t)offset; + os_n_file_reads++; + #ifdef HAVE_PREAD return(pread(file, buf, n, offs)); #else + { ssize_t ret; ulint i; @@ -613,6 +625,7 @@ os_file_pread( os_mutex_exit(os_file_seek_mutexes[i]); return(ret); + } #endif } @@ -631,6 +644,8 @@ os_file_pwrite( ssize_t ret; off_t offs = (off_t)offset; + os_n_file_writes++; + #ifdef HAVE_PWRITE ret = pwrite(file, buf, n, offs); @@ -647,6 +662,7 @@ os_file_pwrite( return(ret); #else + { ulint i; /* Protect the seek / write operation with a mutex */ @@ -678,6 +694,7 @@ os_file_pwrite( os_mutex_exit(os_file_seek_mutexes[i]); return(ret); + } #endif } #endif @@ -702,12 +719,13 @@ os_file_read( BOOL ret; DWORD len; DWORD ret2; - DWORD err; DWORD low; DWORD high; ibool retry; ulint i; + os_n_file_reads++; + try_again: ut_ad(file); ut_ad(buf); @@ -724,7 +742,6 @@ try_again: ret2 = SetFilePointer(file, low, &high, FILE_BEGIN); if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) { - err = GetLastError(); os_mutex_exit(os_file_seek_mutexes[i]); @@ -738,8 +755,6 @@ try_again: if (ret && len == n) { return(TRUE); } - - err = GetLastError(); #else ibool retry; ssize_t ret; @@ -791,12 +806,12 @@ os_file_write( BOOL ret; DWORD len; DWORD ret2; - DWORD err; DWORD low; DWORD high; ibool retry; ulint i; + os_n_file_writes++; try_again: ut_ad(file); ut_ad(buf); @@ -813,7 +828,6 @@ try_again: ret2 = SetFilePointer(file, low, &high, FILE_BEGIN); if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) { - err = GetLastError(); os_mutex_exit(os_file_seek_mutexes[i]); @@ -987,6 +1001,8 @@ os_aio_init( os_aio_segment_wait_events[i] = os_event_create(NULL); } + os_last_printout = time(NULL); + #ifdef POSIX_ASYNC_IO /* Block aio signals from the current thread and its children: for this to work, the current thread must be the first created @@ -1461,6 +1477,7 @@ try_again: } else if (mode == OS_AIO_SYNC) { array = os_aio_sync_array; } else { + array = NULL; /* Eliminate compiler warning */ ut_error; } @@ -1469,6 +1486,7 @@ try_again: if (type == OS_FILE_READ) { if (os_aio_use_native_aio) { #ifdef WIN_ASYNC_IO + os_n_file_reads++; ret = ReadFile(file, buf, (DWORD)n, &len, &(slot->control)); #elif defined(POSIX_ASYNC_IO) @@ -1485,6 +1503,7 @@ try_again: } else if (type == OS_FILE_WRITE) { if (os_aio_use_native_aio) { #ifdef WIN_ASYNC_IO + os_n_file_writes++; ret = WriteFile(file, buf, (DWORD)n, &len, &(slot->control)); #elif defined(POSIX_ASYNC_IO) @@ -1583,7 +1602,6 @@ os_aio_windows_handle( ulint n; ulint i; ibool ret_val; - ulint err; BOOL ret; DWORD len; @@ -1635,7 +1653,8 @@ os_aio_windows_handle( ut_a(TRUE == os_file_flush(slot->file)); } } else { - err = GetLastError(); + os_file_get_last_error(); + ut_error; ret_val = FALSE; @@ -2032,6 +2051,8 @@ os_aio_print(void) os_aio_array_t* array; os_aio_slot_t* slot; ulint n_reserved; + time_t current_time; + double time_elapsed; ulint i; for (i = 0; i < srv_n_file_io_threads; i++) { @@ -2039,7 +2060,7 @@ os_aio_print(void) srv_io_thread_op_info[i]); } - printf("Pending normal aio reads: "); + printf("Pending normal aio reads:"); array = os_aio_read_array; loop: @@ -2066,12 +2087,12 @@ loop: ut_a(array->n_reserved == n_reserved); - printf("%lu\n", n_reserved); + printf(" %lu", n_reserved); os_mutex_exit(array->mutex); if (array == os_aio_read_array) { - printf("Pending aio writes: "); + printf(", aio writes:"); array = os_aio_write_array; @@ -2079,25 +2100,48 @@ loop: } if (array == os_aio_write_array) { - printf("Pending insert buffer aio reads: "); + printf(",\n ibuf aio reads:"); array = os_aio_ibuf_array; goto loop; } if (array == os_aio_ibuf_array) { - printf("Pending log writes or reads: "); + printf(", log i/o's:"); array = os_aio_log_array; goto loop; } if (array == os_aio_log_array) { - printf("Pending synchronous reads or writes: "); + printf(", sync i/o's:"); array = os_aio_sync_array; goto loop; } + + printf("\n"); + + current_time = time(NULL); + time_elapsed = difftime(current_time, os_last_printout); + + printf("Pending flushes (fsync) log: %lu; buffer pool: %lu\n", + fil_n_pending_log_flushes, fil_n_pending_tablespace_flushes); + printf("%lu OS file reads, %lu OS file writes, %lu OS fsyncs\n", + os_n_file_reads, os_n_file_writes, os_n_fsyncs); + printf("%.2f reads/s, %.2f writes/s, %.2f fsyncs/s\n", + (os_n_file_reads - os_n_file_reads_old) + / time_elapsed, + (os_n_file_writes - os_n_file_writes_old) + / time_elapsed, + (os_n_fsyncs - os_n_fsyncs_old) + / time_elapsed); + + os_n_file_reads_old = os_n_file_reads; + os_n_file_writes_old = os_n_file_writes; + os_n_fsyncs_old = os_n_fsyncs; + + os_last_printout = current_time; } /************************************************************************** diff --git a/innobase/page/page0page.c b/innobase/page/page0page.c index 511191ecd89..427064bc89c 100644 --- a/innobase/page/page0page.c +++ b/innobase/page/page0page.c @@ -1258,8 +1258,8 @@ page_validate( page_dir_get_nth_slot(page, n_slots - 1))) { fprintf(stderr, "Record heap and dir overlap on a page in index %s, %lu, %lu\n", - index->name, page_header_get_ptr(page, PAGE_HEAP_TOP), - page_dir_get_nth_slot(page, n_slots - 1)); + index->name, (ulint)page_header_get_ptr(page, PAGE_HEAP_TOP), + (ulint)page_dir_get_nth_slot(page, n_slots - 1)); goto func_exit; } diff --git a/innobase/pars/lexyy.c b/innobase/pars/lexyy.c index 64b8963028b..67bd12afa60 100644 --- a/innobase/pars/lexyy.c +++ b/innobase/pars/lexyy.c @@ -7362,7 +7362,7 @@ static void *yy_flex_alloc( size ) unsigned int size; #endif { - return (void *) malloc( size ); + return (void *) mem_alloc( size ); } #ifdef YY_USE_PROTOS @@ -7373,7 +7373,7 @@ void *ptr; unsigned int size; #endif { - return (void *) realloc( ptr, size ); + return (void *) mem_realloc( ptr, size ); } #ifdef YY_USE_PROTOS @@ -7383,6 +7383,6 @@ static void yy_flex_free( ptr ) void *ptr; #endif { - free( ptr ); + mem_free( ptr ); } diff --git a/innobase/pars/pars0grm.c b/innobase/pars/pars0grm.c index e06cba4e69d..1f631c69f41 100644 --- a/innobase/pars/pars0grm.c +++ b/innobase/pars/pars0grm.c @@ -97,11 +97,8 @@ que_node_t */ #define YYSTYPE que_node_t* #include "univ.i" -#undef alloca -#define alloca mem_alloc #include <math.h> #include "pars0pars.h" -#include "mem0mem.h" #include "que0types.h" #include "que0que.h" #include "row0sel.h" @@ -705,7 +702,7 @@ int yydebug; /* nonzero means print parse trace */ /* YYINITDEPTH indicates the initial size of the parser's stacks */ #ifndef YYINITDEPTH -#define YYINITDEPTH 200 +#define YYINITDEPTH 1000 #endif /* YYMAXDEPTH is the maximum size the stacks can grow to @@ -896,17 +893,22 @@ yynewstate: if (yystacksize >= YYMAXDEPTH) { yyerror("parser stack overflow"); + ut_a(0); return 2; } yystacksize *= 2; if (yystacksize > YYMAXDEPTH) yystacksize = YYMAXDEPTH; - yyss = (short *) alloca (yystacksize * sizeof (*yyssp)); + + ut_a(0); /* Prevent possible memory leaks through the following + mem_alloc's */ + + yyss = (short *) mem_alloc (yystacksize * sizeof (*yyssp)); __yy_memcpy ((char *)yyss, (char *)yyss1, size * sizeof (*yyssp)); - yyvs = (YYSTYPE *) alloca (yystacksize * sizeof (*yyvsp)); + yyvs = (YYSTYPE *) mem_alloc (yystacksize * sizeof (*yyvsp)); __yy_memcpy ((char *)yyvs, (char *)yyvs1, size * sizeof (*yyvsp)); #ifdef YYLSP_NEEDED - yyls = (YYLTYPE *) alloca (yystacksize * sizeof (*yylsp)); + yyls = (YYLTYPE *) mem_alloc (yystacksize * sizeof (*yylsp)); __yy_memcpy ((char *)yyls, (char *)yyls1, size * sizeof (*yylsp)); #endif #endif /* no yyoverflow */ @@ -1663,7 +1665,7 @@ yyerrlab: /* here on detecting error */ x < (sizeof(yytname) / sizeof(char *)); x++) if (yycheck[x + yyn] == x) size += strlen(yytname[x]) + 15, count++; - msg = (char *) malloc(size + 15); + msg = (char *) mem_alloc(size + 15); if (msg != 0) { strcpy(msg, "parse error"); @@ -1682,7 +1684,7 @@ yyerrlab: /* here on detecting error */ } } yyerror(msg); - free(msg); + mem_free(msg); } else yyerror ("parse error; also virtual memory exceeded"); diff --git a/innobase/pars/pars0opt.c b/innobase/pars/pars0opt.c index 5d187ad2faf..6f4957f96ee 100644 --- a/innobase/pars/pars0opt.c +++ b/innobase/pars/pars0opt.c @@ -543,6 +543,7 @@ opt_search_plan_for_table( /* Calculate goodness for each index of the table */ index = dict_table_get_first_index(table); + best_index = index; /* Eliminate compiler warning */ best_goodness = 0; while (index) { diff --git a/innobase/pars/pars0pars.c b/innobase/pars/pars0pars.c index 4a298426476..8ffbca579b8 100644 --- a/innobase/pars/pars0pars.c +++ b/innobase/pars/pars0pars.c @@ -922,7 +922,8 @@ pars_process_assign_list( changes_ord_field = UPD_NODE_NO_ORD_CHANGE; - if (row_upd_changes_some_index_ord_field(node->table, node->update)) { + if (row_upd_changes_some_index_ord_field_binary(node->table, + node->update)) { changes_ord_field = 0; } diff --git a/innobase/que/que0que.c b/innobase/que/que0que.c index ddf8c8ebc43..96e505f8b80 100644 --- a/innobase/que/que0que.c +++ b/innobase/que/que0que.c @@ -832,7 +832,7 @@ que_thr_dec_refer_count( sess_t* sess; ibool send_srv_msg = FALSE; ibool release_stored_proc = FALSE; - ulint msg_len; + ulint msg_len = 0; byte msg_buf[ODBC_DATAGRAM_SIZE]; ulint fork_type; ibool stopped; diff --git a/innobase/rem/rem0cmp.c b/innobase/rem/rem0cmp.c index cdf1f363946..c3687ebb0e0 100644 --- a/innobase/rem/rem0cmp.c +++ b/innobase/rem/rem0cmp.c @@ -12,6 +12,8 @@ Created 7/1/1994 Heikki Tuuri #include "rem0cmp.ic" #endif +#include "srv0srv.h" + /* ALPHABETICAL ORDER ================== @@ -68,6 +70,54 @@ innobase_mysql_cmp( unsigned int b_length); /* in: data field length, not UNIV_SQL_NULL */ +/************************************************************************* +Transforms the character code so that it is ordered appropriately for the +language. This is only used for the latin1 char set. MySQL does the +comparisons for other char sets. */ +UNIV_INLINE +ulint +cmp_collate( +/*========*/ + /* out: collation order position */ + dtype_t* type, /* in: type */ + ulint code) /* in: code of a character stored in database + record */ +{ + ut_ad((type->mtype == DATA_CHAR) || (type->mtype == DATA_VARCHAR)); + + return((ulint) srv_latin1_ordering[code]); +} + + +/***************************************************************** +Returns TRUE if two types are equal for comparison purposes. */ + +ibool +cmp_types_are_equal( +/*================*/ + /* out: TRUE if the types are considered + equal in comparisons */ + dtype_t* type1, /* in: type 1 */ + dtype_t* type2) /* in: type 2 */ +{ + if (type1->mtype != type2->mtype) { + + return(FALSE); + } + + if (type1->mtype == DATA_MYSQL + || type1->mtype == DATA_VARMYSQL) { + + if ((type1->prtype & ~DATA_NOT_NULL) + != (type2->prtype & ~DATA_NOT_NULL)) { + + return(FALSE); + } + } + + return(TRUE); +} + /***************************************************************** Innobase uses this function is to compare two data fields for which the data type is such that we must compare whole fields. */ @@ -269,8 +319,8 @@ cmp_data_data_slow( } if (cur_type->mtype <= DATA_CHAR) { - data1_byte = dtype_collate(cur_type, data1_byte); - data2_byte = dtype_collate(cur_type, data2_byte); + data1_byte = cmp_collate(cur_type, data1_byte); + data2_byte = cmp_collate(cur_type, data2_byte); } if (data1_byte > data2_byte) { @@ -482,8 +532,8 @@ cmp_dtuple_rec_with_match( } if (cur_type->mtype <= DATA_CHAR) { - rec_byte = dtype_collate(cur_type, rec_byte); - dtuple_byte = dtype_collate(cur_type, + rec_byte = cmp_collate(cur_type, rec_byte); + dtuple_byte = cmp_collate(cur_type, dtuple_byte); } @@ -796,8 +846,8 @@ cmp_rec_rec_with_match( } if (cur_type->mtype <= DATA_CHAR) { - rec1_byte = dtype_collate(cur_type, rec1_byte); - rec2_byte = dtype_collate(cur_type, rec2_byte); + rec1_byte = cmp_collate(cur_type, rec1_byte); + rec2_byte = cmp_collate(cur_type, rec2_byte); } if (rec1_byte < rec2_byte) { diff --git a/innobase/row/row0ins.c b/innobase/row/row0ins.c index 8542dcae326..92cac5a55cf 100644 --- a/innobase/row/row0ins.c +++ b/innobase/row/row0ins.c @@ -207,16 +207,33 @@ row_ins_sec_index_entry_by_modify( /*==============================*/ /* out: DB_SUCCESS or error code */ btr_cur_t* cursor, /* in: B-tree cursor */ + dtuple_t* entry, /* in: index entry to insert */ que_thr_t* thr, /* in: query thread */ mtr_t* mtr) /* in: mtr */ { - ulint err; - - ut_ad(((cursor->index)->type & DICT_CLUSTERED) == 0); - ut_ad(rec_get_deleted_flag(btr_cur_get_rec(cursor))); + mem_heap_t* heap; + upd_t* update; + rec_t* rec; + ulint err; + + rec = btr_cur_get_rec(cursor); + + ut_ad((cursor->index->type & DICT_CLUSTERED) == 0); + ut_ad(rec_get_deleted_flag(rec)); - /* We just remove the delete mark from the secondary index record */ - err = btr_cur_del_mark_set_sec_rec(0, cursor, FALSE, thr, mtr); + /* We know that in the ordering entry and rec are identified. + But in their binary form there may be differences if there + are char fields in them. Therefore we have to calculate the + difference and do an update-in-place if necessary. */ + + heap = mem_heap_create(1024); + + update = row_upd_build_sec_rec_difference_binary(cursor->index, + entry, rec, heap); + + err = btr_cur_update_sec_rec_in_place(cursor, update, thr, mtr); + + mem_heap_free(heap); return(err); } @@ -262,7 +279,7 @@ row_ins_clust_index_entry_by_modify( /* Build an update vector containing all the fields to be modified; NOTE that this vector may contain also system columns! */ - update = row_upd_build_difference(cursor->index, entry, ext_vec, + update = row_upd_build_difference_binary(cursor->index, entry, ext_vec, n_ext_vec, rec, heap); if (mode == BTR_MODIFY_LEAF) { /* Try optimistic updating of the record, keeping changes @@ -348,6 +365,203 @@ row_ins_set_shared_rec_lock( } /******************************************************************* +Checks if foreign key constraint fails for an index entry. Sets shared locks +which lock either the success or the failure of the constraint. NOTE that +the caller must have a shared latch on dict_foreign_key_check_lock. */ + +ulint +row_ins_check_foreign_constraint( +/*=============================*/ + /* out: DB_SUCCESS, DB_LOCK_WAIT, + DB_NO_REFERENCED_ROW, + or DB_ROW_IS_REFERENCED */ + ibool check_ref,/* in: TRUE If we want to check that + the referenced table is ok, FALSE if we + want to to check the foreign key table */ + dict_foreign_t* foreign,/* in: foreign constraint; NOTE that the + tables mentioned in it must be in the + dictionary cache if they exist at all */ + dict_table_t* table, /* in: if check_ref is TRUE, then the foreign + table, else the referenced table */ + dict_index_t* index, /* in: index in table */ + dtuple_t* entry, /* in: index entry for index */ + que_thr_t* thr) /* in: query thread */ +{ + dict_table_t* check_table; + dict_index_t* check_index; + ulint n_fields_cmp; + rec_t* rec; + btr_pcur_t pcur; + ibool moved; + int cmp; + ulint err; + mtr_t mtr; + + ut_ad(rw_lock_own(&dict_foreign_key_check_lock, RW_LOCK_SHARED)); + + if (check_ref) { + check_table = foreign->referenced_table; + check_index = foreign->referenced_index; + } else { + check_table = foreign->foreign_table; + check_index = foreign->foreign_index; + } + + if (check_table == NULL) { + if (check_ref) { + return(DB_NO_REFERENCED_ROW); + } + + return(DB_SUCCESS); + } + + ut_a(check_table && check_index); + + if (check_table != table) { + /* We already have a LOCK_IX on table, but not necessarily + on check_table */ + + err = lock_table(0, check_table, LOCK_IS, thr); + + if (err != DB_SUCCESS) { + + return(err); + } + } + + mtr_start(&mtr); + + /* Store old value on n_fields_cmp */ + + n_fields_cmp = dtuple_get_n_fields_cmp(entry); + + dtuple_set_n_fields_cmp(entry, foreign->n_fields); + + btr_pcur_open(check_index, entry, PAGE_CUR_GE, + BTR_SEARCH_LEAF, &pcur, &mtr); + + /* Scan index records and check if there is a matching record */ + + for (;;) { + rec = btr_pcur_get_rec(&pcur); + + if (rec == page_get_infimum_rec(buf_frame_align(rec))) { + + goto next_rec; + } + + /* Try to place a lock on the index record */ + + err = row_ins_set_shared_rec_lock(rec, check_index, thr); + + if (err != DB_SUCCESS) { + + break; + } + + if (rec == page_get_supremum_rec(buf_frame_align(rec))) { + + goto next_rec; + } + + cmp = cmp_dtuple_rec(entry, rec); + + if (cmp == 0) { + if (!rec_get_deleted_flag(rec)) { + /* Found a matching record */ + + if (check_ref) { + err = DB_SUCCESS; + } else { + err = DB_ROW_IS_REFERENCED; + } + + break; + } + } + + if (cmp < 0) { + if (check_ref) { + err = DB_NO_REFERENCED_ROW; + } else { + err = DB_SUCCESS; + } + + break; + } + + ut_a(cmp == 0); +next_rec: + moved = btr_pcur_move_to_next(&pcur, &mtr); + + if (!moved) { + if (check_ref) { + err = DB_NO_REFERENCED_ROW; + } else { + err = DB_SUCCESS; + } + + break; + } + } + + mtr_commit(&mtr); + + /* Restore old value */ + dtuple_set_n_fields_cmp(entry, n_fields_cmp); + + return(err); +} + +/******************************************************************* +Checks if foreign key constraints fail for an index entry. If index +is not mentioned in any constraint, this function does nothing, +Otherwise does searches to the indexes of referenced tables and +sets shared locks which lock either the success or the failure of +a constraint. */ +static +ulint +row_ins_check_foreign_constraints( +/*==============================*/ + /* out: DB_SUCCESS, DB_LOCK_WAIT, or error + code */ + dict_table_t* table, /* in: table */ + dict_index_t* index, /* in: index */ + dtuple_t* entry, /* in: index entry for index */ + que_thr_t* thr) /* in: query thread */ +{ + dict_foreign_t* foreign; + ulint err; + + foreign = UT_LIST_GET_FIRST(table->foreign_list); + + while (foreign) { + if (foreign->foreign_index == index) { + + if (foreign->referenced_table == NULL) { + dict_table_get(foreign->referenced_table_name, + thr_get_trx(thr)); + } + + rw_lock_s_lock(&dict_foreign_key_check_lock); + + err = row_ins_check_foreign_constraint(TRUE, foreign, + table, index, entry, thr); + + rw_lock_s_unlock(&dict_foreign_key_check_lock); + + if (err != DB_SUCCESS) { + return(err); + } + } + + foreign = UT_LIST_GET_NEXT(foreign_list, foreign); + } + + return(DB_SUCCESS); +} + +/******************************************************************* Scans a unique non-clustered index at a given index entry to determine whether a uniqueness violation has occurred for the key value of the entry. Set shared locks on possible duplicate records. */ @@ -365,7 +579,6 @@ row_ins_scan_sec_index_for_duplicate( ulint n_fields_cmp; rec_t* rec; btr_pcur_t pcur; - trx_t* trx = thr_get_trx(thr); ulint err = DB_SUCCESS; ibool moved; mtr_t mtr; @@ -414,7 +627,7 @@ row_ins_scan_sec_index_for_duplicate( err = DB_DUPLICATE_KEY; - trx->error_info = index; + thr_get_trx(thr)->error_info = index; break; } @@ -699,7 +912,7 @@ row_ins_index_entry_low( ext_vec, n_ext_vec, thr, &mtr); } else { - err = row_ins_sec_index_entry_by_modify(&cursor, + err = row_ins_sec_index_entry_by_modify(&cursor, entry, thr, &mtr); } @@ -765,6 +978,15 @@ row_ins_index_entry( { ulint err; + if (UT_LIST_GET_FIRST(index->table->foreign_list)) { + err = row_ins_check_foreign_constraints(index->table, index, + entry, thr); + if (err != DB_SUCCESS) { + + return(err); + } + } + /* Try first optimistic descent to the B-tree */ err = row_ins_index_entry_low(BTR_MODIFY_LEAF, index, entry, @@ -812,7 +1034,7 @@ row_ins_index_entry_set_vals( /*************************************************************** Inserts a single index entry to the table. */ -UNIV_INLINE +static ulint row_ins_index_entry_step( /*=====================*/ diff --git a/innobase/row/row0mysql.c b/innobase/row/row0mysql.c index 373ee4ac4bd..13c0332dcef 100644 --- a/innobase/row/row0mysql.c +++ b/innobase/row/row0mysql.c @@ -21,6 +21,7 @@ Created 9/17/2000 Heikki Tuuri #include "pars0pars.h" #include "dict0dict.h" #include "dict0crea.h" +#include "dict0load.h" #include "trx0roll.h" #include "trx0purge.h" #include "lock0lock.h" @@ -151,7 +152,7 @@ row_mysql_handle_errors( during the function entry */ trx_t* trx, /* in: transaction */ que_thr_t* thr, /* in: query thread */ - trx_savept_t* savept) /* in: savepoint */ + trx_savept_t* savept) /* in: savepoint or NULL */ { ibool timeout_expired; ulint err; @@ -172,12 +173,16 @@ handle_new_error: } } else if (err == DB_TOO_BIG_RECORD) { /* MySQL will roll back the latest SQL statement */ + } else if (err == DB_ROW_IS_REFERENCED + || err == DB_NO_REFERENCED_ROW + || err == DB_CANNOT_ADD_CONSTRAINT) { + /* MySQL will roll back the latest SQL statement */ } else if (err == DB_LOCK_WAIT) { timeout_expired = srv_suspend_mysql_thread(thr); if (timeout_expired) { - trx->error_state = DB_DEADLOCK; + trx->error_state = DB_LOCK_WAIT_TIMEOUT; que_thr_stop_for_mysql(thr); @@ -188,9 +193,12 @@ handle_new_error: return(TRUE); - } else if (err == DB_DEADLOCK) { - /* MySQL will roll back the latest SQL statement */ + } else if (err == DB_DEADLOCK || err == DB_LOCK_WAIT_TIMEOUT) { + /* Roll back the whole transaction; this resolution was added + to version 3.23.43 */ + trx_general_rollback_for_mysql(trx, FALSE, NULL); + } else if (err == DB_OUT_OF_FILE_SPACE) { /* MySQL will roll back the latest SQL statement */ @@ -203,6 +211,7 @@ handle_new_error: exit(1); } else { + fprintf(stderr, "InnoDB: unknown error code %lu\n", err); ut_a(0); } @@ -440,7 +449,94 @@ row_update_statistics_if_needed( dict_update_statistics(prebuilt->table); } } + +/************************************************************************* +Unlocks an AUTO_INC type lock possibly reserved by trx. */ + +void +row_unlock_table_autoinc_for_mysql( +/*===============================*/ + trx_t* trx) /* in: transaction */ +{ + if (!trx->auto_inc_lock) { + + return; + } + + lock_table_unlock_auto_inc(trx); +} + +/************************************************************************* +Sets an AUTO_INC type lock on the table mentioned in prebuilt. The +AUTO_INC lock gives exclusive access to the auto-inc counter of the +table. The lock is reserved only for the duration of an SQL statement. +It is not compatible with another AUTO_INC or exclusive lock on the +table. */ + +int +row_lock_table_autoinc_for_mysql( +/*=============================*/ + /* out: error code or DB_SUCCESS */ + row_prebuilt_t* prebuilt) /* in: prebuilt struct in the MySQL + table handle */ +{ + trx_t* trx = prebuilt->trx; + ins_node_t* node = prebuilt->ins_node; + que_thr_t* thr; + ulint err; + ibool was_lock_wait; + + ut_ad(trx); + ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); + + trx->op_info = "setting auto-inc lock"; + + if (node == NULL) { + row_get_prebuilt_insert_row(prebuilt); + node = prebuilt->ins_node; + } + + /* We use the insert query graph as the dummy graph needed + in the lock module call */ + + thr = que_fork_get_first_thr(prebuilt->ins_graph); + + que_thr_move_to_run_state_for_mysql(thr, trx); + +run_again: + thr->run_node = node; + thr->prev_node = node; + + /* It may be that the current session has not yet started + its transaction, or it has been committed: */ + + trx_start_if_not_started(trx); + + err = lock_table(0, prebuilt->table, LOCK_AUTO_INC, thr); + + trx->error_state = err; + + if (err != DB_SUCCESS) { + que_thr_stop_for_mysql(thr); + + was_lock_wait = row_mysql_handle_errors(&err, trx, thr, NULL); + + if (was_lock_wait) { + goto run_again; + } + + trx->op_info = ""; + + return(err); + } + + que_thr_stop_for_mysql_no_error(thr, trx); + + trx->op_info = ""; + return((int) err); +} + /************************************************************************* Does an insert for MySQL. */ @@ -462,6 +558,17 @@ row_insert_for_mysql( ut_ad(trx); ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); + if (srv_created_new_raw || srv_force_recovery) { + fprintf(stderr, + "InnoDB: A new raw disk partition was initialized or\n" + "InnoDB: innodb_force_recovery is on: we do not allow\n" + "InnoDB: database modifications by the user. Shut down\n" + "InnoDB: mysqld and edit my.cnf so that newraw is replaced\n" + "InnoDB: with raw, and innodb_force_... is removed.\n"); + + return(DB_ERROR); + } + trx->op_info = "inserting"; if (node == NULL) { @@ -634,6 +741,17 @@ row_update_for_mysql( ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); UT_NOT_USED(mysql_rec); + if (srv_created_new_raw || srv_force_recovery) { + fprintf(stderr, + "InnoDB: A new raw disk partition was initialized or\n" + "InnoDB: innodb_force_recovery is on: we do not allow\n" + "InnoDB: database modifications by the user. Shut down\n" + "InnoDB: mysqld and edit my.cnf so that newraw is replaced\n" + "InnoDB: with raw, and innodb_force_... is removed.\n"); + + return(DB_ERROR); + } + trx->op_info = "updating or deleting"; node = prebuilt->upd_node; @@ -816,8 +934,69 @@ row_create_table_for_mysql( ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); + if (srv_created_new_raw || srv_force_recovery) { + fprintf(stderr, + "InnoDB: A new raw disk partition was initialized or\n" + "InnoDB: innodb_force_recovery is on: we do not allow\n" + "InnoDB: database modifications by the user. Shut down\n" + "InnoDB: mysqld and edit my.cnf so that newraw is replaced\n" + "InnoDB: with raw, and innodb_force_... is removed.\n"); + + return(DB_ERROR); + } + trx->op_info = "creating table"; + namelen = ut_strlen(table->name); + + keywordlen = ut_strlen("innodb_monitor"); + + if (namelen >= keywordlen + && 0 == ut_memcmp(table->name + namelen - keywordlen, + "innodb_monitor", keywordlen)) { + + /* Table name ends to characters innodb_monitor: + start monitor prints */ + + srv_print_innodb_monitor = TRUE; + + /* The lock timeout monitor thread also takes care + of InnoDB monitor prints */ + + os_event_set(srv_lock_timeout_thread_event); + } + + keywordlen = ut_strlen("innodb_lock_monitor"); + + if (namelen >= keywordlen + && 0 == ut_memcmp(table->name + namelen - keywordlen, + "innodb_lock_monitor", keywordlen)) { + + srv_print_innodb_monitor = TRUE; + srv_print_innodb_lock_monitor = TRUE; + os_event_set(srv_lock_timeout_thread_event); + } + + keywordlen = ut_strlen("innodb_tablespace_monitor"); + + if (namelen >= keywordlen + && 0 == ut_memcmp(table->name + namelen - keywordlen, + "innodb_tablespace_monitor", keywordlen)) { + + srv_print_innodb_tablespace_monitor = TRUE; + os_event_set(srv_lock_timeout_thread_event); + } + + keywordlen = ut_strlen("innodb_table_monitor"); + + if (namelen >= keywordlen + && 0 == ut_memcmp(table->name + namelen - keywordlen, + "innodb_table_monitor", keywordlen)) { + + srv_print_innodb_table_monitor = TRUE; + os_event_set(srv_lock_timeout_thread_event); + } + /* Serialize data dictionary operations with dictionary mutex: no deadlocks can occur then in these operations */ @@ -845,9 +1024,12 @@ row_create_table_for_mysql( trx_general_rollback_for_mysql(trx, FALSE, NULL); if (err == DB_OUT_OF_FILE_SPACE) { + fprintf(stderr, + "InnoDB: Warning: cannot create table %s because tablespace full\n", + table->name); row_drop_table_for_mysql(table->name, trx, TRUE); } else { - assert(err == DB_DUPLICATE_KEY); + ut_a(err == DB_DUPLICATE_KEY); fprintf(stderr, "InnoDB: Error: table %s already exists in InnoDB internal\n" "InnoDB: data dictionary. Have you deleted the .frm file\n" @@ -864,39 +1046,6 @@ row_create_table_for_mysql( } trx->error_state = DB_SUCCESS; - } else { - namelen = ut_strlen(table->name); - - keywordlen = ut_strlen("innodb_monitor"); - - if (namelen >= keywordlen - && 0 == ut_memcmp(table->name + namelen - keywordlen, - "innodb_monitor", keywordlen)) { - - /* Table name ends to characters innodb_monitor: - start monitor prints */ - - srv_print_innodb_monitor = TRUE; - } - - keywordlen = ut_strlen("innodb_lock_monitor"); - - if (namelen >= keywordlen - && 0 == ut_memcmp(table->name + namelen - keywordlen, - "innodb_lock_monitor", keywordlen)) { - - srv_print_innodb_monitor = TRUE; - srv_print_innodb_lock_monitor = TRUE; - } - - keywordlen = ut_strlen("innodb_tablespace_monitor"); - - if (namelen >= keywordlen - && 0 == ut_memcmp(table->name + namelen - keywordlen, - "innodb_tablespace_monitor", keywordlen)) { - - srv_print_innodb_tablespace_monitor = TRUE; - } } mutex_exit(&(dict_sys->mutex)); @@ -970,6 +1119,65 @@ row_create_index_for_mysql( } /************************************************************************* +Scans a table create SQL string and adds to the data dictionary +the foreign key constraints declared in the string. This function +should be called after the indexes for a table have been created. +Each foreign key constraint must be accompanied with indexes in +bot participating tables. The indexes are allowed to contain more +fields than mentioned in the constraint. Check also that foreign key +constraints which reference this table are ok. */ + +int +row_table_add_foreign_constraints( +/*==============================*/ + /* out: error code or DB_SUCCESS */ + trx_t* trx, /* in: transaction */ + char* sql_string, /* in: table create statement where + foreign keys are declared like: + FOREIGN KEY (a, b) REFERENCES table2(c, d), + table2 can be written also with the database + name before it: test.table2 */ + char* name) /* in: table full name in the normalized form + database_name/table_name */ +{ + ulint err; + + ut_a(sql_string); + + trx->op_info = "adding foreign keys"; + + /* Serialize data dictionary operations with dictionary mutex: + no deadlocks can occur then in these operations */ + + mutex_enter(&(dict_sys->mutex)); + + trx->dict_operation = TRUE; + + err = dict_create_foreign_constraints(trx, sql_string, name); + + if (err == DB_SUCCESS) { + /* Check that also referencing constraints are ok */ + err = dict_load_foreigns(name); + } + + if (err != DB_SUCCESS) { + /* We have special error handling here */ + + trx->error_state = DB_SUCCESS; + + trx_general_rollback_for_mysql(trx, FALSE, NULL); + + row_drop_table_for_mysql(name, trx, TRUE); + + trx->error_state = DB_SUCCESS; + } + + mutex_exit(&(dict_sys->mutex)); + + return((int) err); +} + +/************************************************************************* Drops a table for MySQL. If the name of the dropped table ends to characters INNODB_MONITOR, then this also stops printing of monitor output by the master thread. */ @@ -997,6 +1205,17 @@ row_drop_table_for_mysql( ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); ut_a(name != NULL); + if (srv_created_new_raw || srv_force_recovery) { + fprintf(stderr, + "InnoDB: A new raw disk partition was initialized or\n" + "InnoDB: innodb_force_recovery is on: we do not allow\n" + "InnoDB: database modifications by the user. Shut down\n" + "InnoDB: mysqld and edit my.cnf so that newraw is replaced\n" + "InnoDB: with raw, and innodb_force_... is removed.\n"); + + return(DB_ERROR); + } + trx->op_info = "dropping table"; namelen = ut_strlen(name); @@ -1032,6 +1251,15 @@ row_drop_table_for_mysql( srv_print_innodb_tablespace_monitor = FALSE; } + keywordlen = ut_strlen("innodb_table_monitor"); + + if (namelen >= keywordlen + && 0 == ut_memcmp(name + namelen - keywordlen, + "innodb_table_monitor", keywordlen)) { + + srv_print_innodb_table_monitor = FALSE; + } + /* We use the private SQL parser of Innobase to generate the query graphs needed in deleting the dictionary data from system tables in Innobase. Deleting a row from SYS_INDEXES table also @@ -1039,21 +1267,49 @@ row_drop_table_for_mysql( str1 = "PROCEDURE DROP_TABLE_PROC () IS\n" + "table_name CHAR;\n" + "sys_foreign_id CHAR;\n" "table_id CHAR;\n" "index_id CHAR;\n" + "foreign_id CHAR;\n" "found INT;\n" "BEGIN\n" - "SELECT ID INTO table_id\n" - "FROM SYS_TABLES\n" - "WHERE NAME ='"; - + "table_name := '"; + str2 = "';\n" + "SELECT ID INTO table_id\n" + "FROM SYS_TABLES\n" + "WHERE NAME = table_name;\n" "IF (SQL % NOTFOUND) THEN\n" " COMMIT WORK;\n" " RETURN;\n" "END IF;\n" "found := 1;\n" + "SELECT ID INTO sys_foreign_id\n" + "FROM SYS_TABLES\n" + "WHERE NAME = 'SYS_FOREIGN';\n" + "IF (SQL % NOTFOUND) THEN\n" + " found := 0;\n" + "END IF;\n" + "IF (table_name = 'SYS_FOREIGN') THEN\n" + " found := 0;\n" + "END IF;\n" + "IF (table_name = 'SYS_FOREIGN_COLS') THEN\n" + " found := 0;\n" + "END IF;\n" + "WHILE found = 1 LOOP\n" + " SELECT ID INTO foreign_id\n" + " FROM SYS_FOREIGN\n" + " WHERE FOR_NAME = table_name;\n" + " IF (SQL % NOTFOUND) THEN\n" + " found := 0;\n" + " ELSE" + " DELETE FROM SYS_FOREIGN_COLS WHERE ID = foreign_id;\n" + " DELETE FROM SYS_FOREIGN WHERE ID = foreign_id;\n" + " END IF;\n" + "END LOOP;\n" + "found := 1;\n" "WHILE found = 1 LOOP\n" " SELECT ID INTO index_id\n" " FROM SYS_INDEXES\n" @@ -1095,6 +1351,9 @@ row_drop_table_for_mysql( graph->fork_type = QUE_FORK_MYSQL_INTERFACE; + /* Prevent foreign key checks while we are dropping the table */ + rw_lock_x_lock(&(dict_foreign_key_check_lock)); + /* Prevent purge from running while we are dropping the table */ rw_lock_s_lock(&(purge_sys->purge_is_running)); @@ -1103,6 +1362,12 @@ row_drop_table_for_mysql( if (!table) { err = DB_TABLE_NOT_FOUND; + fprintf(stderr, + "InnoDB: Error: table %s does not exist in the InnoDB internal\n" + "InnoDB: data dictionary though MySQL is trying to drop it.\n" + "InnoDB: Have you copied the .frm file of the table to the\n" + "InnoDB: MySQL database directory from another database?\n", + name); goto funct_exit; } @@ -1138,6 +1403,8 @@ row_drop_table_for_mysql( funct_exit: rw_lock_s_unlock(&(purge_sys->purge_is_running)); + rw_lock_x_unlock(&(dict_foreign_key_check_lock)); + if (!has_dict_mutex) { mutex_exit(&(dict_sys->mutex)); } @@ -1150,6 +1417,49 @@ funct_exit: } /************************************************************************* +Drops a database for MySQL. */ + +int +row_drop_database_for_mysql( +/*========================*/ + /* out: error code or DB_SUCCESS */ + char* name, /* in: database name which ends to '/' */ + trx_t* trx) /* in: transaction handle */ +{ + char* table_name; + int err = DB_SUCCESS; + + ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); + ut_a(name != NULL); + ut_a(name[strlen(name) - 1] == '/'); + + trx->op_info = "dropping database"; + + mutex_enter(&(dict_sys->mutex)); + + while (table_name = dict_get_first_table_name_in_db(name)) { + ut_a(memcmp(table_name, name, strlen(name)) == 0); + + err = row_drop_table_for_mysql(table_name, trx, TRUE); + + mem_free(table_name); + + if (err != DB_SUCCESS) { + fprintf(stderr, + "InnoDB: DROP DATABASE %s failed with error %lu for table %s\n", + name, (ulint)err, table_name); + break; + } + } + + mutex_exit(&(dict_sys->mutex)); + + trx->op_info = ""; + + return(err); +} + +/************************************************************************* Renames a table for MySQL. */ int @@ -1174,18 +1484,37 @@ row_rename_table_for_mysql( ut_a(old_name != NULL); ut_a(new_name != NULL); + if (srv_created_new_raw || srv_force_recovery) { + fprintf(stderr, + "InnoDB: A new raw disk partition was initialized or\n" + "InnoDB: innodb_force_recovery is on: we do not allow\n" + "InnoDB: database modifications by the user. Shut down\n" + "InnoDB: mysqld and edit my.cnf so that newraw is replaced\n" + "InnoDB: with raw, and innodb_force_... is removed.\n"); + + return(DB_ERROR); + } + trx->op_info = "renaming table"; str1 = "PROCEDURE RENAME_TABLE_PROC () IS\n" + "new_table_name CHAR;\n" + "old_table_name CHAR;\n" "BEGIN\n" - "UPDATE SYS_TABLES SET NAME ='"; + "new_table_name :='"; str2 = - "' WHERE NAME = '"; + "';\nold_table_name := '"; str3 = "';\n" + "UPDATE SYS_TABLES SET NAME = new_table_name\n" + "WHERE NAME = old_table_name;\n" + "UPDATE SYS_FOREIGN SET FOR_NAME = new_table_name\n" + "WHERE FOR_NAME = old_table_name;\n" + "UPDATE SYS_FOREIGN SET REF_NAME = new_table_name\n" + "WHERE REF_NAME = old_table_name;\n" "COMMIT WORK;\n" "END;\n"; @@ -1356,7 +1685,7 @@ row_check_table_for_mysql( dict_table_t* table = prebuilt->table; dict_index_t* index; ulint n_rows; - ulint n_rows_in_table; + ulint n_rows_in_table = ULINT_UNDEFINED; ulint ret = DB_SUCCESS; prebuilt->trx->op_info = "checking table"; diff --git a/innobase/row/row0purge.c b/innobase/row/row0purge.c index 43bc166347a..0dffa273938 100644 --- a/innobase/row/row0purge.c +++ b/innobase/row/row0purge.c @@ -220,7 +220,7 @@ row_purge_remove_sec_if_poss_low( if (!found) { /* Not found */ - /* FIXME: printf("PURGE:........sec entry not found\n"); */ + /* printf("PURGE:........sec entry not found\n"); */ /* dtuple_print(entry); */ btr_pcur_close(&pcur); @@ -382,7 +382,7 @@ row_purge_upd_exist_or_extern( while (node->index != NULL) { index = node->index; - if (row_upd_changes_ord_field(NULL, node->index, + if (row_upd_changes_ord_field_binary(NULL, node->index, node->update)) { /* Build the older version of the index entry */ entry = row_build_index_entry(node->row, index, heap); diff --git a/innobase/row/row0sel.c b/innobase/row/row0sel.c index d041e34a558..e42486f1e17 100644 --- a/innobase/row/row0sel.c +++ b/innobase/row/row0sel.c @@ -50,15 +50,21 @@ to que_run_threads: this is to allow canceling runaway queries */ /************************************************************************ Returns TRUE if the user-defined column values in a secondary index record -are the same as the corresponding columns in the clustered index record. */ +are the same as the corresponding columns in the clustered index record. +NOTE: the comparison is NOT done as a binary comparison, but character +fields are compared with collation! */ static ibool row_sel_sec_rec_is_for_clust_rec( /*=============================*/ - rec_t* sec_rec, - dict_index_t* sec_index, - rec_t* clust_rec, - dict_index_t* clust_index) + /* out: TRUE if the secondary + record is equal to the corresponding + fields in the clustered record, + when compared with collation */ + rec_t* sec_rec, /* in: secondary index record */ + dict_index_t* sec_index, /* in: secondary index */ + rec_t* clust_rec, /* in: clustered index record */ + dict_index_t* clust_index) /* in: clustered index */ { dict_col_t* col; byte* sec_field; @@ -84,9 +90,9 @@ row_sel_sec_rec_is_for_clust_rec( return(FALSE); } - if (sec_len != UNIV_SQL_NULL - && ut_memcmp(sec_field, clust_field, sec_len) != 0) { - + if (0 != cmp_data_data(dict_col_get_type(col), + clust_field, clust_len, + sec_field, sec_len)) { return(FALSE); } } @@ -763,7 +769,7 @@ row_sel_open_pcur( /************************************************************************* Restores a stored pcur position to a table index. */ -UNIV_INLINE +static ibool row_sel_restore_pcur_pos( /*=====================*/ @@ -813,7 +819,8 @@ row_sel_restore_pcur_pos( return(TRUE); } - ut_ad(relative_position == BTR_PCUR_AFTER); + ut_ad(relative_position == BTR_PCUR_AFTER + || relative_position == BTR_PCUR_AFTER_LAST_IN_TREE); return(FALSE); } @@ -835,7 +842,8 @@ row_sel_restore_pcur_pos( plan->stored_cursor_rec_processed is TRUE, we must move to the previous record, else there is no need to move the cursor. */ - if (relative_position == BTR_PCUR_BEFORE) { + if (relative_position == BTR_PCUR_BEFORE + || relative_position == BTR_PCUR_BEFORE_FIRST_IN_TREE) { return(FALSE); } @@ -850,7 +858,8 @@ row_sel_restore_pcur_pos( return(FALSE); } - ut_ad(relative_position == BTR_PCUR_AFTER); + ut_ad(relative_position == BTR_PCUR_AFTER + || relative_position == BTR_PCUR_AFTER_LAST_IN_TREE); return(TRUE); } @@ -1762,7 +1771,7 @@ fetch_step( if (sel_node->state == SEL_NODE_CLOSED) { /* SQL error detected */ - printf("SQL error %lu\n", DB_ERROR); + printf("SQL error %lu\n", (ulint)DB_ERROR); que_thr_handle_error(thr, DB_ERROR, NULL, 0); @@ -2251,7 +2260,7 @@ row_sel_get_clust_rec_for_mysql( /************************************************************************ Restores cursor position after it has been stored. We have to take into -account that the record cursor was positioned on can have been deleted. +account that the record cursor was positioned on may have been deleted. Then we may have to move the cursor one step up or down. */ static ibool @@ -2284,14 +2293,14 @@ sel_restore_position_for_mysql( if (moves_up) { btr_pcur_move_to_next(pcur, mtr); - - return(TRUE); } return(TRUE); } - if (relative_position == BTR_PCUR_AFTER) { + if (relative_position == BTR_PCUR_AFTER + || relative_position == BTR_PCUR_AFTER_LAST_IN_TREE) { + if (moves_up) { return(TRUE); } @@ -2303,7 +2312,8 @@ sel_restore_position_for_mysql( return(TRUE); } - ut_ad(relative_position == BTR_PCUR_BEFORE); + ut_ad(relative_position == BTR_PCUR_BEFORE + || relative_position == BTR_PCUR_BEFORE_FIRST_IN_TREE); if (moves_up && btr_pcur_is_on_user_rec(pcur, mtr)) { btr_pcur_move_to_next(pcur, mtr); @@ -2586,21 +2596,30 @@ row_search_for_mysql( let us try a search shortcut through the hash index */ + if (btr_search_latch.writer != RW_LOCK_NOT_LOCKED) { + /* There is an x-latch request: release + a possible s-latch to reduce starvation + and wait for BTR_SEA_TIMEOUT rounds before + trying to keep it again over calls from + MySQL */ + + if (trx->has_search_latch) { + rw_lock_s_unlock(&btr_search_latch); + trx->has_search_latch = FALSE; + } + + trx->search_latch_timeout = BTR_SEA_TIMEOUT; + + goto no_shortcut; + } + if (!trx->has_search_latch) { rw_lock_s_lock(&btr_search_latch); trx->has_search_latch = TRUE; - - } else if (btr_search_latch.writer_is_wait_ex) { - /* There is an x-latch request waiting: - release the s-latch for a moment to reduce - starvation */ - - rw_lock_s_unlock(&btr_search_latch); - rw_lock_s_lock(&btr_search_latch); } shortcut = row_sel_try_search_shortcut_for_mysql(&rec, - prebuilt, &mtr); + prebuilt, &mtr); if (shortcut == SEL_FOUND) { row_sel_store_mysql_rec(buf, prebuilt, rec); @@ -2609,7 +2628,16 @@ row_search_for_mysql( /* printf("%s shortcut\n", index->name); */ srv_n_rows_read++; + + if (trx->search_latch_timeout > 0 + && trx->has_search_latch) { + trx->search_latch_timeout--; + + rw_lock_s_unlock(&btr_search_latch); + trx->has_search_latch = FALSE; + } + trx->op_info = ""; return(DB_SUCCESS); @@ -2619,6 +2647,16 @@ row_search_for_mysql( /* printf("%s record not found 2\n", index->name); */ + + if (trx->search_latch_timeout > 0 + && trx->has_search_latch) { + + trx->search_latch_timeout--; + + rw_lock_s_unlock(&btr_search_latch); + trx->has_search_latch = FALSE; + } + trx->op_info = ""; return(DB_RECORD_NOT_FOUND); } @@ -2627,7 +2665,7 @@ row_search_for_mysql( mtr_start(&mtr); } } - +no_shortcut: if (trx->has_search_latch) { rw_lock_s_unlock(&btr_search_latch); trx->has_search_latch = FALSE; diff --git a/innobase/row/row0umod.c b/innobase/row/row0umod.c index a7c8957d61a..37f5b1f0bc1 100644 --- a/innobase/row/row0umod.c +++ b/innobase/row/row0umod.c @@ -443,6 +443,8 @@ row_undo_mod_del_unmark_sec( "InnoDB: Make a detailed bug report and send it\n"); fprintf(stderr, "InnoDB: to mysql@lists.mysql.com\n"); + trx_print(thr_get_trx(thr)); + mem_free(err_buf); } else { btr_cur = btr_pcur_get_btr_cur(&pcur); @@ -552,7 +554,7 @@ row_undo_mod_upd_exist_sec( while (node->index != NULL) { index = node->index; - if (row_upd_changes_ord_field(node->row, node->index, + if (row_upd_changes_ord_field_binary(node->row, node->index, node->update)) { /* Build the newest version of the index entry */ diff --git a/innobase/row/row0upd.c b/innobase/row/row0upd.c index 3fa98db3a02..fa859729141 100644 --- a/innobase/row/row0upd.c +++ b/innobase/row/row0upd.c @@ -72,6 +72,134 @@ searched delete is obviously to keep the x-latch for several steps of query graph execution. */ /************************************************************************* +Checks if index currently is mentioned as a referenced index in a foreign +key constraint. This function also loads into the dictionary cache the +possible referencing table. */ +static +ibool +row_upd_index_is_referenced( +/*========================*/ + /* out: TRUE if referenced; NOTE that since + we do not hold dict_foreign_key_check_lock + when leaving the function, it may be that + the referencing table has been dropped when + we leave this function: this function is only + for heuristic use! */ + dict_index_t* index) /* in: index */ +{ + dict_table_t* table = index->table; + dict_foreign_t* foreign; + ulint phase = 1; + +try_again: + if (!UT_LIST_GET_FIRST(table->referenced_list)) { + + return(FALSE); + } + + if (phase == 2) { + mutex_enter(&(dict_sys->mutex)); + } + + rw_lock_s_lock(&dict_foreign_key_check_lock); + + foreign = UT_LIST_GET_FIRST(table->referenced_list); + + while (foreign) { + if (foreign->referenced_index == index) { + if (foreign->foreign_table == NULL) { + if (phase == 2) { + dict_table_get_low(foreign-> + foreign_table_name); + } else { + phase = 2; + rw_lock_s_unlock( + &dict_foreign_key_check_lock); + goto try_again; + } + } + + rw_lock_s_unlock(&dict_foreign_key_check_lock); + + if (phase == 2) { + mutex_exit(&(dict_sys->mutex)); + } + + return(TRUE); + } + + foreign = UT_LIST_GET_NEXT(referenced_list, foreign); + } + + rw_lock_s_unlock(&dict_foreign_key_check_lock); + + if (phase == 2) { + mutex_exit(&(dict_sys->mutex)); + } + + return(FALSE); +} + +/************************************************************************* +Checks if possible foreign key constraints hold after a delete of the record +under pcur. NOTE that this function will temporarily commit mtr and lose +pcur position! */ +static +ulint +row_upd_check_references_constraints( +/*=================================*/ + /* out: DB_SUCCESS, DB_LOCK_WAIT, or an error + code */ + btr_pcur_t* pcur, /* in: cursor positioned on a record; NOTE: the + cursor position is lost in this function! */ + dict_table_t* table, /* in: table in question */ + dict_index_t* index, /* in: index of the cursor */ + que_thr_t* thr, /* in: query thread */ + mtr_t* mtr) /* in: mtr */ +{ + dict_foreign_t* foreign; + mem_heap_t* heap; + dtuple_t* entry; + rec_t* rec; + ulint err; + + rec = btr_pcur_get_rec(pcur); + + heap = mem_heap_create(500); + + entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, heap); + + mtr_commit(mtr); + + mtr_start(mtr); + + rw_lock_s_lock(&dict_foreign_key_check_lock); + + foreign = UT_LIST_GET_FIRST(table->referenced_list); + + while (foreign) { + if (foreign->referenced_index == index) { + + err = row_ins_check_foreign_constraint(FALSE, foreign, + table, index, entry, thr); + if (err != DB_SUCCESS) { + rw_lock_s_unlock(&dict_foreign_key_check_lock); + mem_heap_free(heap); + + return(err); + } + } + + foreign = UT_LIST_GET_NEXT(referenced_list, foreign); + } + + rw_lock_s_unlock(&dict_foreign_key_check_lock); + mem_heap_free(heap); + + return(DB_SUCCESS); +} + +/************************************************************************* Creates an update node for a query graph. */ upd_node_t* @@ -484,13 +612,73 @@ upd_ext_vec_contains( } /******************************************************************* +Builds an update vector from those fields which in a secondary index entry +differ from a record that has the equal ordering fields. NOTE: we compare +the fields as binary strings! */ + +upd_t* +row_upd_build_sec_rec_difference_binary( +/*====================================*/ + /* out, own: update vector of differing + fields */ + dict_index_t* index, /* in: index */ + dtuple_t* entry, /* in: entry to insert */ + rec_t* rec, /* in: secondary index record */ + mem_heap_t* heap) /* in: memory heap from which allocated */ +{ + upd_field_t* upd_field; + dfield_t* dfield; + byte* data; + ulint len; + upd_t* update; + ulint n_diff; + ulint i; + + /* This function is used only for a secondary index */ + ut_ad(0 == (index->type & DICT_CLUSTERED)); + + update = upd_create(dtuple_get_n_fields(entry), heap); + + n_diff = 0; + + for (i = 0; i < dtuple_get_n_fields(entry); i++) { + + data = rec_get_nth_field(rec, i, &len); + + dfield = dtuple_get_nth_field(entry, i); + + ut_a(len == dfield_get_len(dfield)); + + /* NOTE: we compare the fields as binary strings! + (No collation) */ + + if (!dfield_data_is_binary_equal(dfield, len, data)) { + + upd_field = upd_get_nth_field(update, n_diff); + + dfield_copy(&(upd_field->new_val), dfield); + + upd_field_set_field_no(upd_field, i, index); + + upd_field->extern_storage = FALSE; + + n_diff++; + } + } + + update->n_fields = n_diff; + + return(update); +} + +/******************************************************************* Builds an update vector from those fields, excluding the roll ptr and trx id fields, which in an index entry differ from a record that has -the equal ordering fields. */ +the equal ordering fields. NOTE: we compare the fields as binary strings! */ upd_t* -row_upd_build_difference( -/*=====================*/ +row_upd_build_difference_binary( +/*============================*/ /* out, own: update vector of differing fields, excluding roll ptr and trx id */ dict_index_t* index, /* in: clustered index */ @@ -527,10 +715,13 @@ row_upd_build_difference( dfield = dtuple_get_nth_field(entry, i); + /* NOTE: we compare the fields as binary strings! + (No collation) */ + if ((rec_get_nth_field_extern_bit(rec, i) != upd_ext_vec_contains(ext_vec, n_ext_vec, i)) || ((i != trx_id_pos) && (i != roll_ptr_pos) - && !dfield_data_is_equal(dfield, len, data))) { + && !dfield_data_is_binary_equal(dfield, len, data))) { upd_field = upd_get_nth_field(update, n_diff); @@ -630,13 +821,16 @@ row_upd_clust_index_replace_new_col_vals( /*************************************************************** Checks if an update vector changes an ordering field of an index record. This function is fast if the update vector is short or the number of ordering -fields in the index is small. Otherwise, this can be quadratic. */ +fields in the index is small. Otherwise, this can be quadratic. +NOTE: we compare the fields as binary strings! */ ibool -row_upd_changes_ord_field( -/*======================*/ +row_upd_changes_ord_field_binary( +/*=============================*/ /* out: TRUE if update vector changes - an ordering field in the index record */ + an ordering field in the index record; + NOTE: the fields are compared as binary + strings */ dtuple_t* row, /* in: old value of row, or NULL if the row and the data values in update are not known when this function is called, e.g., at @@ -671,7 +865,7 @@ row_upd_changes_ord_field( if (col_pos == upd_field->field_no && (row == NULL - || !dfield_datas_are_equal( + || !dfield_datas_are_binary_equal( dtuple_get_nth_field(row, col_no), &(upd_field->new_val)))) { return(TRUE); @@ -683,11 +877,12 @@ row_upd_changes_ord_field( } /*************************************************************** -Checks if an update vector changes an ordering field of an index record. */ +Checks if an update vector changes an ordering field of an index record. +NOTE: we compare the fields as binary strings! */ ibool -row_upd_changes_some_index_ord_field( -/*=================================*/ +row_upd_changes_some_index_ord_field_binary( +/*========================================*/ /* out: TRUE if update vector may change an ordering field in an index record */ dict_table_t* table, /* in: table */ @@ -812,6 +1007,7 @@ row_upd_sec_index_entry( upd_node_t* node, /* in: row update node */ que_thr_t* thr) /* in: query thread */ { + ibool check_ref; ibool found; dict_index_t* index; dtuple_t* entry; @@ -825,6 +1021,8 @@ row_upd_sec_index_entry( index = node->index; + check_ref = row_upd_index_is_referenced(index); + heap = mem_heap_create(1024); /* Build old index entry */ @@ -855,6 +1053,8 @@ row_upd_sec_index_entry( "InnoDB: Make a detailed bug report and send it\n"); fprintf(stderr, "InnoDB: to mysql@lists.mysql.com\n"); + trx_print(thr_get_trx(thr)); + mem_free(err_buf); } else { /* Delete mark the old index record; it can already be @@ -864,9 +1064,21 @@ row_upd_sec_index_entry( if (!rec_get_deleted_flag(rec)) { err = btr_cur_del_mark_set_sec_rec(0, btr_cur, TRUE, thr, &mtr); + if (err == DB_SUCCESS && check_ref) { + /* NOTE that the following call loses + the position of pcur ! */ + err = row_upd_check_references_constraints( + &pcur, index->table, + index, thr, &mtr); + if (err != DB_SUCCESS) { + + goto close_cur; + } + } + } } - +close_cur: btr_pcur_close(&pcur); mtr_commit(&mtr); @@ -907,8 +1119,8 @@ row_upd_sec_step( ut_ad(!(node->index->type & DICT_CLUSTERED)); if (node->state == UPD_NODE_UPDATE_ALL_SEC - || row_upd_changes_ord_field(node->row, node->index, - node->update)) { + || row_upd_changes_ord_field_binary(node->row, node->index, + node->update)) { err = row_upd_sec_index_entry(node, thr); return(err); @@ -931,6 +1143,8 @@ row_upd_clust_rec_by_insert( upd_node_t* node, /* in: row update node */ dict_index_t* index, /* in: clustered index of the record */ que_thr_t* thr, /* in: query thread */ + ibool check_ref,/* in: TRUE if index may be referenced in + a foreign key constraint */ mtr_t* mtr) /* in: mtr; gets committed here */ { mem_heap_t* heap; @@ -958,6 +1172,7 @@ row_upd_clust_rec_by_insert( return(err); } + /* Mark as not-owned the externally stored fields which the new row inherits from the delete marked record: purge should not free those externally stored fields even if the delete marked @@ -965,6 +1180,19 @@ row_upd_clust_rec_by_insert( btr_cur_mark_extern_inherited_fields(btr_cur_get_rec(btr_cur), node->update, mtr); + if (check_ref) { + /* NOTE that the following call loses + the position of pcur ! */ + err = row_upd_check_references_constraints( + pcur, table, + index, thr, mtr); + if (err != DB_SUCCESS) { + mtr_commit(mtr); + + return(err); + } + } + } mtr_commit(mtr); @@ -1095,6 +1323,8 @@ row_upd_del_mark_clust_rec( upd_node_t* node, /* in: row update node */ dict_index_t* index, /* in: clustered index */ que_thr_t* thr, /* in: query thread */ + ibool check_ref,/* in: TRUE if index may be referenced in + a foreign key constraint */ mtr_t* mtr) /* in: mtr; gets committed here */ { btr_pcur_t* pcur; @@ -1120,6 +1350,18 @@ row_upd_del_mark_clust_rec( err = btr_cur_del_mark_set_clust_rec(BTR_NO_LOCKING_FLAG, btr_cur, TRUE, thr, mtr); + if (err == DB_SUCCESS && check_ref) { + /* NOTE that the following call loses + the position of pcur ! */ + err = row_upd_check_references_constraints(pcur, index->table, + index, thr, mtr); + if (err != DB_SUCCESS) { + mtr_commit(mtr); + + return(err); + } + } + mtr_commit(mtr); return(err); @@ -1140,12 +1382,15 @@ row_upd_clust_step( dict_index_t* index; btr_pcur_t* pcur; ibool success; + ibool check_ref; ulint err; - mtr_t mtr_buf; mtr_t* mtr; + mtr_t mtr_buf; index = dict_table_get_first_index(node->table); + check_ref = row_upd_index_is_referenced(index); + pcur = node->pcur; /* We have to restore the cursor to its position */ @@ -1210,8 +1455,8 @@ row_upd_clust_step( /* NOTE: the following function calls will also commit mtr */ if (node->is_delete) { - err = row_upd_del_mark_clust_rec(node, index, thr, mtr); - + err = row_upd_del_mark_clust_rec(node, index, thr, check_ref, + mtr); if (err != DB_SUCCESS) { return(err); @@ -1244,7 +1489,7 @@ row_upd_clust_step( row_upd_store_row(node); - if (row_upd_changes_ord_field(node->row, index, node->update)) { + if (row_upd_changes_ord_field_binary(node->row, index, node->update)) { /* Update causes an ordering field (ordering fields within the B-tree) of the clustered index record to change: perform @@ -1257,8 +1502,8 @@ row_upd_clust_step( choosing records to update. MySQL solves now the problem externally! */ - err = row_upd_clust_rec_by_insert(node, index, thr, mtr); - + err = row_upd_clust_rec_by_insert(node, index, thr, check_ref, + mtr); if (err != DB_SUCCESS) { return(err); @@ -1304,8 +1549,8 @@ row_upd( interpreter: we must calculate it on the fly: */ if (node->is_delete || - row_upd_changes_some_index_ord_field(node->table, - node->update)) { + row_upd_changes_some_index_ord_field_binary( + node->table, node->update)) { node->cmpl_info = 0; } else { node->cmpl_info = UPD_NODE_NO_ORD_CHANGE; diff --git a/innobase/row/row0vers.c b/innobase/row/row0vers.c index 4dc65669247..5b62cd2b7e3 100644 --- a/innobase/row/row0vers.c +++ b/innobase/row/row0vers.c @@ -269,7 +269,13 @@ row_vers_old_has_index_entry( row = row_build(ROW_COPY_POINTERS, clust_index, rec, heap); entry = row_build_index_entry(row, index, heap); - if (dtuple_datas_are_equal(ientry, entry)) { + /* NOTE that we cannot do the comparison as binary + fields because the row is maybe being modified so that + the clustered index record has already been updated + to a different binary value in a char field, but the + collation identifies the old and new value anyway! */ + + if (dtuple_datas_are_ordering_equal(ientry, entry)) { mem_heap_free(heap); @@ -307,7 +313,13 @@ row_vers_old_has_index_entry( prev_version, heap); entry = row_build_index_entry(row, index, heap); - if (dtuple_datas_are_equal(ientry, entry)) { + /* NOTE that we cannot do the comparison as binary + fields because maybe the secondary index record has + already been updated to a different binary value in + a char field, but the collation identifies the old + and new value anyway! */ + + if (dtuple_datas_are_ordering_equal(ientry, entry)) { mem_heap_free(heap); diff --git a/innobase/srv/srv0srv.c b/innobase/srv/srv0srv.c index 1237a788622..45f7b1b6879 100644 --- a/innobase/srv/srv0srv.c +++ b/innobase/srv/srv0srv.c @@ -30,6 +30,7 @@ Created 10/8/1995 Heikki Tuuri #include "ut0mem.h" #include "os0proc.h" #include "mem0mem.h" +#include "mem0pool.h" #include "sync0sync.h" #include "sync0ipm.h" #include "thr0loc.h" @@ -46,11 +47,14 @@ Created 10/8/1995 Heikki Tuuri #include "ibuf0ibuf.h" #include "buf0flu.h" #include "btr0sea.h" +#include "dict0load.h" /* The following counter is incremented whenever there is some user activity in the server */ ulint srv_activity_count = 0; +char* srv_main_thread_op_info = ""; + /* Server parameters which are read from the initfile */ /* The following three are dir paths which are catenated before file @@ -66,6 +70,11 @@ ulint* srv_data_file_sizes = NULL; /* size in database pages */ ulint* srv_data_file_is_raw_partition = NULL; +/* If the following is TRUE we do not allow inserts etc. This protects +the user from forgetting the 'newraw' keyword to my.cnf */ + +ibool srv_created_new_raw = FALSE; + char** srv_log_group_home_dirs = NULL; ulint srv_n_log_groups = ULINT_MAX; @@ -75,6 +84,9 @@ ibool srv_log_archive_on = TRUE; ulint srv_log_buffer_size = ULINT_MAX; /* size in database pages */ ibool srv_flush_log_at_trx_commit = TRUE; +byte srv_latin1_ordering[256]; /* The sort order table of the latin1 + character set */ + ibool srv_use_native_aio = FALSE; ulint srv_pool_size = ULINT_MAX; /* size in database pages; @@ -93,6 +105,11 @@ ulint srv_lock_wait_timeout = 1024 * 1024 * 1024; char* srv_unix_file_flush_method_str = NULL; ulint srv_unix_file_flush_method = 0; +/* If the following is != 0 we do not allow inserts etc. This protects +the user from forgetting innodb_force_recovery keyword to my.cnf */ + +ulint srv_force_recovery = 0; + ibool srv_use_doublewrite_buf = TRUE; ibool srv_set_thread_priorities = TRUE; @@ -115,6 +132,10 @@ ulint srv_n_rows_inserted = 0; ulint srv_n_rows_updated = 0; ulint srv_n_rows_deleted = 0; ulint srv_n_rows_read = 0; +ulint srv_n_rows_inserted_old = 0; +ulint srv_n_rows_updated_old = 0; +ulint srv_n_rows_deleted_old = 0; +ulint srv_n_rows_read_old = 0; ibool srv_print_innodb_monitor = FALSE; ibool srv_print_innodb_lock_monitor = FALSE; @@ -125,6 +146,7 @@ ibool srv_print_innodb_tablespace_monitor = FALSE; stderr on startup/shutdown */ ibool srv_print_verbose_log = TRUE; +ibool srv_print_innodb_table_monitor = FALSE; /* The parameters below are obsolete: */ @@ -1745,31 +1767,153 @@ srv_release_mysql_thread_if_suspended( } /************************************************************************* -A thread which wakes up threads whose lock wait may have lasted too long. */ +A thread which wakes up threads whose lock wait may have lasted too long. +This also prints the info output by various InnoDB monitors. */ #ifndef __WIN__ void* #else ulint #endif -srv_lock_timeout_monitor_thread( -/*============================*/ +srv_lock_timeout_and_monitor_thread( +/*================================*/ /* out: a dummy parameter */ void* arg) /* in: a dummy parameter required by os_thread_create */ { + double time_elapsed; + time_t current_time; + time_t last_monitor_time; ibool some_waits; srv_slot_t* slot; double wait_time; ulint i; UT_NOT_USED(arg); + last_monitor_time = time(NULL); loop: /* When someone is waiting for a lock, we wake up every second and check if a timeout has passed for a lock wait */ - os_thread_sleep(1000000); - + os_thread_sleep(1000000); + + /* In case mutex_exit is not a memory barrier, it is + theoretically possible some threads are left waiting though + the semaphore is already released. Wake up those threads: */ + + sync_arr_wake_threads_if_sema_free(); + + current_time = time(NULL); + + time_elapsed = difftime(current_time, last_monitor_time); + + if (time_elapsed > 15) { + + last_monitor_time = time(NULL); + + if (srv_print_innodb_monitor) { + + printf("=====================================\n"); + ut_print_timestamp(stdout); + + printf(" INNODB MONITOR OUTPUT\n" + "=====================================\n"); + printf("----------\n" + "SEMAPHORES\n" + "----------\n"); + sync_print(); + printf("------------\n" + "TRANSACTIONS\n" + "------------\n"); + lock_print_info(); + printf("--------\n" + "FILE I/O\n" + "--------\n"); + os_aio_print(); + printf("-------------\n" + "INSERT BUFFER\n" + "-------------\n"); + ibuf_print(); + printf("---\n" + "LOG\n" + "---\n"); + log_print(); + printf("----------------------\n" + "BUFFER POOL AND MEMORY\n" + "----------------------\n"); + printf( + "Total memory allocated %lu; in additional pool allocated %lu\n", + ut_total_allocated_memory, + mem_pool_get_reserved(mem_comm_pool)); + buf_print_io(); + printf("--------------\n" + "ROW OPERATIONS\n" + "--------------\n"); + printf("InnoDB main thread state: %s\n", + srv_main_thread_op_info); + printf( + "Number of rows inserted %lu, updated %lu, deleted %lu, read %lu\n", + srv_n_rows_inserted, + srv_n_rows_updated, + srv_n_rows_deleted, + srv_n_rows_read); + printf( + "%.2f inserts/s, %.2f updates/s, %.2f deletes/s, %.2f reads/s\n", + (srv_n_rows_inserted - srv_n_rows_inserted_old) + / time_elapsed, + (srv_n_rows_updated - srv_n_rows_updated_old) + / time_elapsed, + (srv_n_rows_deleted - srv_n_rows_deleted_old) + / time_elapsed, + (srv_n_rows_read - srv_n_rows_read_old) + / time_elapsed); + + srv_n_rows_inserted_old = srv_n_rows_inserted; + srv_n_rows_updated_old = srv_n_rows_updated; + srv_n_rows_deleted_old = srv_n_rows_deleted; + srv_n_rows_read_old = srv_n_rows_read; + + printf("----------------------------\n" + "END OF INNODB MONITOR OUTPUT\n" + "============================\n"); + + + } + + if (srv_print_innodb_tablespace_monitor) { + + printf("================================================\n"); + + ut_print_timestamp(stdout); + + printf(" INNODB TABLESPACE MONITOR OUTPUT\n" + "================================================\n"); + + fsp_print(0); + fprintf(stderr, "Validating tablespace\n"); + fsp_validate(0); + fprintf(stderr, "Validation ok\n"); + printf("---------------------------------------\n" + "END OF INNODB TABLESPACE MONITOR OUTPUT\n" + "=======================================\n"); + } + + if (srv_print_innodb_table_monitor) { + + printf("===========================================\n"); + + ut_print_timestamp(stdout); + + printf(" INNODB TABLE MONITOR OUTPUT\n" + "===========================================\n"); + dict_print(); + + printf("-----------------------------------\n" + "END OF INNODB TABLE MONITOR OUTPUT\n" + "==================================\n"); + } + } + mutex_enter(&kernel_mutex); some_waits = FALSE; @@ -1792,11 +1936,10 @@ loop: /* Timeout exceeded or a wrap over in system time counter: cancel the lock request queued - by the transaction; NOTE that currently only - a record lock request can be waiting in - MySQL! */ + by the transaction and release possible + other transactions waiting behind */ - lock_rec_cancel( + lock_cancel_waiting_and_release( thr_get_trx(slot->thr)->wait_lock); } } @@ -1806,11 +1949,15 @@ loop: mutex_exit(&kernel_mutex); - if (some_waits) { + if (some_waits || srv_print_innodb_monitor + || srv_print_innodb_lock_monitor + || srv_print_innodb_tablespace_monitor + || srv_print_innodb_table_monitor) { goto loop; } - /* No one was waiting for a lock: suspend this thread */ + /* No one was waiting for a lock and no monitor was active: + suspend this thread */ os_event_wait(srv_lock_timeout_thread_event); @@ -1823,6 +1970,36 @@ loop: #endif } +/************************************************************************* +A thread which prints warnings about semaphore waits which have lasted +too long. These can be used to track bugs which cause hangs. */ + +#ifndef __WIN__ +void* +#else +ulint +#endif +srv_error_monitor_thread( +/*=====================*/ + /* out: a dummy parameter */ + void* arg) /* in: a dummy parameter required by + os_thread_create */ +{ + UT_NOT_USED(arg); +loop: + os_thread_sleep(10000000); + + sync_array_print_long_waits(); + + goto loop; + +#ifndef __WIN__ + return(NULL); +#else + return(0); +#endif +} + /*********************************************************************** Tells the InnoDB server that there has been activity in the database and wakes up the master thread if it is suspended (not sleeping). Used @@ -1861,15 +2038,18 @@ srv_master_thread( os_thread_create */ { os_event_t event; + time_t last_flush_time; + time_t current_time; ulint old_activity_count; ulint n_pages_purged; ulint n_bytes_merged; ulint n_pages_flushed; ulint n_bytes_archived; + ulint n_ios; + ulint n_ios_old; + ulint n_ios_very_old; + ulint n_pend_ios; ulint i; - time_t last_flush_time; - time_t current_time; - time_t last_monitor_time; UT_NOT_USED(arg); @@ -1882,26 +2062,56 @@ srv_master_thread( mutex_exit(&kernel_mutex); os_event_set(srv_sys->operational); - - last_monitor_time = time(NULL); loop: + srv_main_thread_op_info = "reserving kernel mutex"; + + n_ios_very_old = log_sys->n_log_ios + buf_pool->n_pages_read + + buf_pool->n_pages_written; mutex_enter(&kernel_mutex); old_activity_count = srv_activity_count; mutex_exit(&kernel_mutex); - /* We run purge every 10 seconds, even if the server were active: */ + /* We run purge and a batch of ibuf_contract every 10 seconds, even + if the server were active: */ for (i = 0; i < 10; i++) { + n_ios_old = log_sys->n_log_ios + buf_pool->n_pages_read + + buf_pool->n_pages_written; + + srv_main_thread_op_info = "sleeping"; os_thread_sleep(1000000); + if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND) { + + goto loop; + } + /* We flush the log once in a second even if no commit is issued or the we have specified in my.cnf no flush at transaction commit */ + srv_main_thread_op_info = "flushing log"; log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP); + /* If there were less than 10 i/os during the + one second sleep, we assume that there is free + disk i/o capacity available, and it makes sense to + do an insert buffer merge. */ + + n_pend_ios = buf_get_n_pending_ios() + + log_sys->n_pending_writes; + n_ios = log_sys->n_log_ios + buf_pool->n_pages_read + + buf_pool->n_pages_written; + if (n_pend_ios < 3 && (n_ios - n_ios_old < 10)) { + srv_main_thread_op_info = "doing insert buffer merge"; + ibuf_contract_for_n_pages(TRUE, 5); + + srv_main_thread_op_info = "flushing log"; + log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP); + } + if (srv_activity_count == old_activity_count) { if (srv_print_thread_releases) { @@ -1916,28 +2126,48 @@ loop: printf("Master thread wakes up!\n"); } + /* If there were less than 200 i/os during the 10 second period, + we assume that there is free disk i/o capacity available, and it + makes sense to do a buffer pool flush. */ + + n_pend_ios = buf_get_n_pending_ios() + log_sys->n_pending_writes; + n_ios = log_sys->n_log_ios + buf_pool->n_pages_read + + buf_pool->n_pages_written; + if (n_pend_ios < 3 && (n_ios - n_ios_very_old < 200)) { + + srv_main_thread_op_info = "flushing buffer pool pages"; + buf_flush_batch(BUF_FLUSH_LIST, 50, ut_dulint_max); + + srv_main_thread_op_info = "flushing log"; + log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP); + } + + /* We run a batch of insert buffer merge every 10 seconds, + even if the server were active */ + + srv_main_thread_op_info = "doing insert buffer merge"; + ibuf_contract_for_n_pages(TRUE, 5); + + srv_main_thread_op_info = "flushing log"; + log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP); + + /* We run a full purge every 10 seconds, even if the server + were active */ + n_pages_purged = 1; last_flush_time = time(NULL); while (n_pages_purged) { - /* TODO: replace this by a check if we are running - out of file space! */ - if (srv_print_innodb_monitor) { - ut_print_timestamp(stdout); - printf(" InnoDB starts purge\n"); - } - - n_pages_purged = trx_purge(); - if (srv_print_innodb_monitor) { - ut_print_timestamp(stdout); - printf(" InnoDB purged %lu pages\n", n_pages_purged); - } + srv_main_thread_op_info = "purging"; + n_pages_purged = trx_purge(); current_time = time(NULL); if (difftime(current_time, last_flush_time) > 1) { + srv_main_thread_op_info = "flushing log"; + log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP); last_flush_time = current_time; } @@ -1947,67 +2177,7 @@ background_loop: /* In this loop we run background operations when the server is quiet */ - current_time = time(NULL); - - if (difftime(current_time, last_monitor_time) > 15) { - - last_monitor_time = time(NULL); - - if (srv_print_innodb_monitor) { - - printf("=====================================\n"); - ut_print_timestamp(stdout); - - printf(" INNODB MONITOR OUTPUT\n" - "=====================================\n"); - printf("------------\n" - "TRANSACTIONS\n" - "------------\n"); - lock_print_info(); - printf("-----------------------------------------------\n" - "CURRENT SEMAPHORES RESERVED AND SEMAPHORE WAITS\n" - "-----------------------------------------------\n"); - sync_print(); - printf("CURRENT PENDING FILE I/O'S\n" - "--------------------------\n"); - os_aio_print(); - printf("-----------\n" - "BUFFER POOL\n" - "-----------\n"); - buf_print_io(); - printf("--------------\n" - "ROW OPERATIONS\n" - "--------------\n"); - printf( - "Number of rows inserted %lu, updated %lu, deleted %lu, read %lu\n", - srv_n_rows_inserted, - srv_n_rows_updated, - srv_n_rows_deleted, - srv_n_rows_read); - printf("Server activity counter %lu\n", srv_activity_count); - printf("----------------------------\n" - "END OF INNODB MONITOR OUTPUT\n" - "============================\n"); - } - - if (srv_print_innodb_tablespace_monitor) { - - printf("================================================\n"); - - ut_print_timestamp(stdout); - - printf(" INNODB TABLESPACE MONITOR OUTPUT\n" - "================================================\n"); - - fsp_print(0); - fprintf(stderr, "Validating tablespace\n"); - fsp_validate(0); - fprintf(stderr, "Validation ok\n"); - printf("---------------------------------------\n" - "END OF INNODB TABLESPACE MONITOR OUTPUT\n" - "=======================================\n"); - } - } + srv_main_thread_op_info = "reserving kernel mutex"; mutex_enter(&kernel_mutex); if (srv_activity_count != old_activity_count) { @@ -2020,17 +2190,11 @@ background_loop: /* The server has been quiet for a while: start running background operations */ - if (srv_print_innodb_monitor) { - ut_print_timestamp(stdout); - printf(" InnoDB starts purge\n"); - } + srv_main_thread_op_info = "purging"; n_pages_purged = trx_purge(); - if (srv_print_innodb_monitor) { - ut_print_timestamp(stdout); - printf(" InnoDB purged %lu pages\n", n_pages_purged); - } + srv_main_thread_op_info = "reserving kernel mutex"; mutex_enter(&kernel_mutex); if (srv_activity_count != old_activity_count) { @@ -2039,17 +2203,10 @@ background_loop: } mutex_exit(&kernel_mutex); - if (srv_print_innodb_monitor) { - ut_print_timestamp(stdout); - printf(" InnoDB starts insert buffer merge\n"); - } + srv_main_thread_op_info = "doing insert buffer merge"; + n_bytes_merged = ibuf_contract_for_n_pages(TRUE, 20); - n_bytes_merged = ibuf_contract(TRUE); - - if (srv_print_innodb_monitor) { - ut_print_timestamp(stdout); - printf(" InnoDB merged %lu bytes\n", n_bytes_merged); - } + srv_main_thread_op_info = "reserving kernel mutex"; mutex_enter(&kernel_mutex); if (srv_activity_count != old_activity_count) { @@ -2058,17 +2215,10 @@ background_loop: } mutex_exit(&kernel_mutex); - if (srv_print_innodb_monitor) { - ut_print_timestamp(stdout); - printf(" InnoDB (main thread) starts buffer pool flush\n"); - } - + srv_main_thread_op_info = "flushing buffer pool pages"; n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max); - if (srv_print_innodb_monitor) { - ut_print_timestamp(stdout); - printf(" InnoDB flushed %lu pages\n", n_pages_flushed); - } + srv_main_thread_op_info = "reserving kernel mutex"; mutex_enter(&kernel_mutex); if (srv_activity_count != old_activity_count) { @@ -2077,16 +2227,23 @@ background_loop: } mutex_exit(&kernel_mutex); + srv_main_thread_op_info = "waiting for buffer pool flush to end"; buf_flush_wait_batch_end(BUF_FLUSH_LIST); + srv_main_thread_op_info = "making checkpoint"; + log_checkpoint(TRUE, FALSE); + srv_main_thread_op_info = "reserving kernel mutex"; + mutex_enter(&kernel_mutex); if (srv_activity_count != old_activity_count) { mutex_exit(&kernel_mutex); goto loop; } mutex_exit(&kernel_mutex); + + srv_main_thread_op_info = "archiving log (if log archive is on)"; log_archive_do(FALSE, &n_bytes_archived); @@ -2104,12 +2261,16 @@ background_loop: /* There is no work for background operations either: suspend master thread to wait for more server activity */ + srv_main_thread_op_info = "suspending"; + mutex_enter(&kernel_mutex); event = srv_suspend_thread(); mutex_exit(&kernel_mutex); + srv_main_thread_op_info = "waiting for server activity"; + os_event_wait(event); goto loop; diff --git a/innobase/srv/srv0start.c b/innobase/srv/srv0start.c index 2e9bade8b35..bdc8225a14f 100644 --- a/innobase/srv/srv0start.c +++ b/innobase/srv/srv0start.c @@ -73,7 +73,10 @@ os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 5]; #define SRV_N_PENDING_IOS_PER_THREAD OS_AIO_N_PENDING_IOS_PER_THREAD #define SRV_MAX_N_PENDING_SYNC_IOS 100 -#define SRV_MAX_N_OPEN_FILES 25 +/* The following limit may be too big in some old operating systems: +we may get an assertion failure in os0file.c */ + +#define SRV_MAX_N_OPEN_FILES 500 #define SRV_LOG_SPACE_FIRST_ID 1000000000 @@ -315,7 +318,12 @@ open_or_create_data_files( ulint size_high; char name[10000]; - ut_a(srv_n_data_files < 1000); + if (srv_n_data_files >= 1000) { + fprintf(stderr, "InnoDB: can only have < 1000 data files\n" + "InnoDB: you have defined %lu\n", + srv_n_data_files); + return(DB_ERROR); + } *sum_of_new_sizes = 0; @@ -336,6 +344,8 @@ open_or_create_data_files( /* The partition is opened, not created; then it is written over */ + srv_created_new_raw = TRUE; + files[i] = os_file_create( name, OS_FILE_OPEN, OS_FILE_NORMAL, OS_DATA_FILE, &ret); @@ -375,6 +385,7 @@ open_or_create_data_files( if (!ret) { fprintf(stderr, "InnoDB: Error in opening %s\n", name); + os_file_get_last_error(); return(DB_ERROR); } @@ -537,9 +548,6 @@ innobase_start_or_create_for_mysql(void) /*====================================*/ /* out: DB_SUCCESS or error code */ { - ulint i; - ulint k; - ulint err; ibool create_new_db; ibool log_file_created; ibool log_created = FALSE; @@ -550,6 +558,9 @@ innobase_start_or_create_for_mysql(void) ulint max_arch_log_no; ibool start_archive; ulint sum_of_new_sizes; + ulint err; + ulint i; + ulint k; mtr_t mtr; log_do_write = TRUE; @@ -866,17 +877,19 @@ innobase_start_or_create_for_mysql(void) SRV_MAX_N_IO_THREADS); */ } - /* Create the master thread which monitors the database - server, and does purge and other utility operations */ - - os_thread_create(&srv_master_thread, NULL, thread_ids + 1 + - SRV_MAX_N_IO_THREADS); /* fprintf(stderr, "Max allowed record size %lu\n", page_get_free_space_of_empty() / 2); */ - /* Create the thread which watches the timeouts for lock waits */ - os_thread_create(&srv_lock_timeout_monitor_thread, NULL, + /* Create the thread which watches the timeouts for lock waits + and prints InnoDB monitor info */ + + os_thread_create(&srv_lock_timeout_and_monitor_thread, NULL, thread_ids + 2 + SRV_MAX_N_IO_THREADS); + + /* Create the thread which warns of long semaphore waits */ + os_thread_create(&srv_error_monitor_thread, NULL, + thread_ids + 3 + SRV_MAX_N_IO_THREADS); + srv_was_started = TRUE; srv_is_being_started = FALSE; @@ -886,6 +899,17 @@ innobase_start_or_create_for_mysql(void) trx_sys_create_doublewrite_buf(); } + err = dict_create_or_check_foreign_constraint_tables(); + + if (err != DB_SUCCESS) { + return((int)DB_ERROR); + } + + /* Create the master thread which monitors the database + server, and does purge and other utility operations */ + + os_thread_create(&srv_master_thread, NULL, thread_ids + 1 + + SRV_MAX_N_IO_THREADS); /* buf_debug_prints = TRUE; */ if (srv_print_verbose_log) @@ -905,12 +929,16 @@ innobase_shutdown_for_mysql(void) /* out: DB_SUCCESS or error code */ { if (!srv_was_started) { - if (srv_is_being_started) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Warning: shutting down a not properly started database\n"); - } - return(DB_SUCCESS); + if (srv_is_being_started) { + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Warning: shutting down a not properly started\n"); + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: or created database!\n"); + } + + return(DB_SUCCESS); } /* Flush buffer pool to disk, write the current lsn to @@ -919,6 +947,6 @@ innobase_shutdown_for_mysql(void) logs_empty_and_mark_files_at_shutdown(); ut_free_all_mem(); - + return((int) DB_SUCCESS); } diff --git a/innobase/sync/sync0arr.c b/innobase/sync/sync0arr.c index 4183f3f1c4c..e4c351b9d21 100644 --- a/innobase/sync/sync0arr.c +++ b/innobase/sync/sync0arr.c @@ -14,6 +14,7 @@ Created 9/5/1995 Heikki Tuuri #include "sync0sync.h" #include "sync0rw.h" #include "os0sync.h" +#include "srv0srv.h" /* WAIT ARRAY @@ -64,6 +65,8 @@ struct sync_cell_struct { ibool event_set; /* TRUE if the event is set */ os_event_t event; /* operating system event semaphore handle */ + time_t reservation_time;/* time when the thread reserved + the wait cell */ }; /* NOTE: It is allowed for a thread to wait @@ -321,16 +324,12 @@ sync_array_reserve_cell( sync_array_t* arr, /* in: wait array */ void* object, /* in: pointer to the object to wait for */ ulint type, /* in: lock request type */ - #ifdef UNIV_SYNC_DEBUG - char* file, /* in: in debug version file where - requested */ - ulint line, /* in: in the debug version line where - requested */ - #endif + char* file, /* in: file where requested */ + ulint line, /* in: line where requested */ ulint* index) /* out: index of the reserved cell */ { - ulint i; sync_cell_t* cell; + ulint i; ut_a(object); ut_a(index); @@ -350,18 +349,15 @@ sync_array_reserve_cell( sync_cell_event_reset(cell); } + cell->reservation_time = time(NULL); + cell->thread = os_thread_get_curr_id(); + cell->wait_object = object; cell->request_type = type; - cell->thread = os_thread_get_curr_id(); cell->waiting = FALSE; - #ifdef UNIV_SYNC_DEBUG cell->file = file; cell->line = line; - #else - cell->file = "NOT KNOWN"; - cell->line = 0; - #endif arr->n_reserved++; @@ -436,6 +432,7 @@ static void sync_array_cell_print( /*==================*/ + FILE* file, /* in: file where to print */ sync_cell_t* cell) /* in: sync cell */ { mutex_t* mutex; @@ -445,53 +442,63 @@ sync_array_cell_print( type = cell->request_type; + fprintf(file, +"--Thread %lu has waited at %s line %lu for %.2f seconds the semaphore:\n", + (ulint)cell->thread, cell->file, cell->line, + difftime(time(NULL), cell->reservation_time)); + if (type == SYNC_MUTEX) { - str = "MUTEX ENTER"; mutex = (mutex_t*)cell->wait_object; - printf("Mutex created in file %s line %lu", - mutex->cfile_name, mutex->cline); + fprintf(file, + "Mutex at %lx created file %s line %lu, lock var %lu\n", + (ulint)mutex, mutex->cfile_name, mutex->cline, + mutex->lock_word); + fprintf(file, + "Last time reserved in file %s line %lu, waiters flag %lu\n", + mutex->file_name, mutex->line, mutex->waiters); + } else if (type == RW_LOCK_EX || type == RW_LOCK_SHARED) { if (type == RW_LOCK_EX) { - str = "X-LOCK"; + fprintf(file, "X-lock on"); } else { - str = "S_LOCK"; + fprintf(file, "S-lock on"); } rwlock = (rw_lock_t*)cell->wait_object; - printf("Rw-latch created in file %s line %lu", - rwlock->cfile_name, rwlock->cline); + fprintf(file, " RW-latch at %lx created in file %s line %lu\n", + (ulint)rwlock, rwlock->cfile_name, rwlock->cline); if (rwlock->writer != RW_LOCK_NOT_LOCKED) { - printf(" writer reserved with %lu", rwlock->writer); + fprintf(file, + "a writer (thread id %lu) has reserved it in mode", + (ulint)rwlock->writer_thread); + if (rwlock->writer == RW_LOCK_EX) { + fprintf(file, " exclusive\n"); + } else { + fprintf(file, " wait exclusive\n"); + } } - if (rwlock->writer == RW_LOCK_EX) { - printf(" reserv. thread id %lu", - (ulint)rwlock->writer_thread); - } - - if (rwlock->reader_count > 0) { - printf(" readers %lu", rwlock->reader_count); - } + fprintf(file, "number of readers %lu, waiters flag %lu\n", + rwlock->reader_count, rwlock->waiters); + + fprintf(file, "Last time read locked in file %s line %lu\n", + rwlock->last_s_file_name, rwlock->last_s_line); + fprintf(file, "Last time write locked in file %s line %lu\n", + rwlock->last_x_file_name, rwlock->last_x_line); } else { ut_error; } - printf(" at addr %lx waited for by thread %lu op. %s file %s line %lu ", - (ulint)cell->wait_object, - (ulint)cell->thread, - str, cell->file, cell->line); if (!cell->waiting) { - printf("WAIT ENDED "); + fprintf(file, "wait has ended\n"); } if (cell->event_set) { - printf("EVENT SET"); + fprintf(file, "wait is ending\n"); } - - printf("\n"); } /********************************************************************** @@ -620,14 +627,15 @@ sync_array_detect_deadlock( released the mutex: in this case no deadlock can occur, as the wait array cannot contain a thread with ID_UNDEFINED value. */ + ret = sync_array_deadlock_step(arr, start, thread, 0, - depth); + depth); if (ret) { printf( "Mutex %lx owned by thread %lu file %s line %lu\n", (ulint)mutex, mutex->thread_id, mutex->file_name, mutex->line); - sync_array_cell_print(cell); + sync_array_cell_print(stdout, cell); return(TRUE); } } @@ -636,11 +644,11 @@ sync_array_detect_deadlock( } else if (cell->request_type == RW_LOCK_EX) { - lock = cell->wait_object; + lock = cell->wait_object; - debug = UT_LIST_GET_FIRST(lock->debug_list); + debug = UT_LIST_GET_FIRST(lock->debug_list); - while (debug != NULL) { + while (debug != NULL) { thread = debug->thread_id; @@ -661,23 +669,23 @@ sync_array_detect_deadlock( if (ret) { printf("rw-lock %lx ", (ulint) lock); rw_lock_debug_print(debug); - sync_array_cell_print(cell); + sync_array_cell_print(stdout, cell); return(TRUE); } } debug = UT_LIST_GET_NEXT(list, debug); - } + } - return(FALSE); + return(FALSE); } else if (cell->request_type == RW_LOCK_SHARED) { - lock = cell->wait_object; - debug = UT_LIST_GET_FIRST(lock->debug_list); + lock = cell->wait_object; + debug = UT_LIST_GET_FIRST(lock->debug_list); - while (debug != NULL) { + while (debug != NULL) { thread = debug->thread_id; @@ -694,16 +702,16 @@ sync_array_detect_deadlock( if (ret) { printf("rw-lock %lx ", (ulint) lock); rw_lock_debug_print(debug); - sync_array_cell_print(cell); + sync_array_cell_print(stdout, cell); return(TRUE); } } debug = UT_LIST_GET_NEXT(list, debug); - } + } - return(FALSE); + return(FALSE); } else { ut_error; @@ -714,6 +722,55 @@ sync_array_detect_deadlock( } /********************************************************************** +Determines if we can wake up the thread waiting for a sempahore. */ +static +ibool +sync_arr_cell_can_wake_up( +/*======================*/ + sync_cell_t* cell) /* in: cell to search */ +{ + mutex_t* mutex; + rw_lock_t* lock; + + if (cell->request_type == SYNC_MUTEX) { + + mutex = cell->wait_object; + + if (mutex_get_lock_word(mutex) == 0) { + + return(TRUE); + } + + } else if (cell->request_type == RW_LOCK_EX) { + + lock = cell->wait_object; + + if (rw_lock_get_reader_count(lock) == 0 + && rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED) { + + return(TRUE); + } + + if (rw_lock_get_reader_count(lock) == 0 + && rw_lock_get_writer(lock) == RW_LOCK_WAIT_EX + && lock->writer_thread == cell->thread) { + + return(TRUE); + } + + } else if (cell->request_type == RW_LOCK_SHARED) { + lock = cell->wait_object; + + if (rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED) { + + return(TRUE); + } + } + + return(FALSE); +} + +/********************************************************************** Frees the cell. NOTE! sync_array_wait_event frees the cell automatically! */ @@ -740,9 +797,8 @@ sync_array_free_cell( } /************************************************************************** -Looks for the cells in the wait array which refer -to the wait object specified, -and sets their corresponding events to the signaled state. In this +Looks for the cells in the wait array which refer to the wait object +specified, and sets their corresponding events to the signaled state. In this way releases the threads waiting for the object to contend for the object. It is possible that no such cell is found, in which case does nothing. */ @@ -783,6 +839,88 @@ sync_array_signal_object( } /************************************************************************** +If the wakeup algorithm does not work perfectly at semaphore relases, +this function will do the waking (see the comment in mutex_exit). This +function should be called about every 1 second in the server. */ + +void +sync_arr_wake_threads_if_sema_free(void) +/*====================================*/ +{ + sync_array_t* arr = sync_primary_wait_array; + sync_cell_t* cell; + ulint count; + ulint i; + + sync_array_enter(arr); + + i = 0; + count = 0; + + while (count < arr->n_reserved) { + + cell = sync_array_get_nth_cell(arr, i); + + if (cell->wait_object != NULL) { + + count++; + + if (sync_arr_cell_can_wake_up(cell)) { + + sync_cell_event_set(cell); + } + } + + i++; + } + + sync_array_exit(arr); +} + +/************************************************************************** +Prints warnings of long semaphore waits to stderr. Currently > 120 sec. */ + +void +sync_array_print_long_waits(void) +/*=============================*/ +{ + sync_cell_t* cell; + ibool old_val; + ibool noticed = FALSE; + ulint i; + + for (i = 0; i < sync_primary_wait_array->n_cells; i++) { + + cell = sync_array_get_nth_cell(sync_primary_wait_array, i); + + if (cell->wait_object != NULL + && difftime(time(NULL), cell->reservation_time) > 120) { + + fprintf(stderr, + "InnoDB: Warning: a long semaphore wait:\n"); + sync_array_cell_print(stderr, cell); + + noticed = TRUE; + } + } + + if (noticed) { + fprintf(stderr, +"InnoDB: ###### Starts InnoDB Monitor for 30 secs to print diagnostic info:\n"); + old_val = srv_print_innodb_monitor; + + srv_print_innodb_monitor = TRUE; + os_event_set(srv_lock_timeout_thread_event); + + os_thread_sleep(30000000); + + srv_print_innodb_monitor = old_val; + fprintf(stderr, +"InnoDB: ###### Diagnostic info printed to the standard output\n"); + } +} + +/************************************************************************** Prints info of the wait array. */ static void @@ -795,9 +933,8 @@ sync_array_output_info( ulint count; ulint i; - printf("-----------------------------------------------------\n"); - printf("SYNC ARRAY INFO: reservation count %ld, signal count %ld\n", - arr->res_count, arr->sg_count); + printf("OS WAIT ARRAY INFO: reservation count %ld, signal count %ld\n", + arr->res_count, arr->sg_count); i = 0; count = 0; @@ -807,7 +944,7 @@ sync_array_output_info( if (cell->wait_object != NULL) { count++; - sync_array_cell_print(cell); + sync_array_cell_print(stdout, cell); } i++; diff --git a/innobase/sync/sync0rw.c b/innobase/sync/sync0rw.c index dc49ce2197e..1ef2920618f 100644 --- a/innobase/sync/sync0rw.c +++ b/innobase/sync/sync0rw.c @@ -17,11 +17,13 @@ Created 9/11/1995 Heikki Tuuri ulint rw_s_system_call_count = 0; ulint rw_s_spin_wait_count = 0; +ulint rw_s_os_wait_count = 0; ulint rw_s_exit_count = 0; ulint rw_x_system_call_count = 0; ulint rw_x_spin_wait_count = 0; +ulint rw_x_os_wait_count = 0; ulint rw_x_exit_count = 0; @@ -95,8 +97,7 @@ rw_lock_create_func( mutex_create(rw_lock_get_mutex(lock)); mutex_set_level(rw_lock_get_mutex(lock), SYNC_NO_ORDER_CHECK); - ut_memcpy(&(lock->mutex.cfile_name), cfile_name, - ut_min(RW_CNAME_LEN - 1, ut_strlen(cfile_name))); + lock->mutex.cfile_name = cfile_name; lock->mutex.cline = cline; rw_lock_set_waiters(lock, 0); @@ -111,11 +112,14 @@ rw_lock_create_func( lock->magic_n = RW_LOCK_MAGIC_N; lock->level = SYNC_LEVEL_NONE; - ut_memcpy(&(lock->cfile_name), cfile_name, - ut_min(RW_CNAME_LEN - 1, ut_strlen(cfile_name))); - lock->cfile_name[RW_CNAME_LEN - 1] = '\0'; + lock->cfile_name = cfile_name; lock->cline = cline; + lock->last_s_file_name = "not yet reserved"; + lock->last_x_file_name = "not yet reserved"; + lock->last_s_line = 0; + lock->last_x_line = 0; + mutex_enter(&rw_lock_list_mutex); UT_LIST_ADD_FIRST(list, rw_lock_list, lock); @@ -186,14 +190,11 @@ for the lock, before suspending the thread. */ void rw_lock_s_lock_spin( /*================*/ - rw_lock_t* lock /* in: pointer to rw-lock */ - #ifdef UNIV_SYNC_DEBUG - ,ulint pass, /* in: pass value; != 0, if the lock + rw_lock_t* lock, /* in: pointer to rw-lock */ + ulint pass, /* in: pass value; != 0, if the lock will be passed to another thread to unlock */ char* file_name, /* in: file name where lock requested */ - ulint line /* in: line where requested */ - #endif -) + ulint line) /* in: line where requested */ { ulint index; /* index of the reserved wait cell */ ulint i; /* spin round count */ @@ -203,7 +204,7 @@ rw_lock_s_lock_spin( lock_loop: rw_s_spin_wait_count++; - /* Spin waiting for the writer field to become free */ + /* Spin waiting for the writer field to become free */ i = 0; while (rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED @@ -223,19 +224,14 @@ lock_loop: printf( "Thread %lu spin wait rw-s-lock at %lx cfile %s cline %lu rnds %lu\n", os_thread_get_curr_id(), (ulint)lock, - &(lock->cfile_name), lock->cline, i); + lock->cfile_name, lock->cline, i); } mutex_enter(rw_lock_get_mutex(lock)); /* We try once again to obtain the lock */ - if (TRUE == rw_lock_s_lock_low(lock - #ifdef UNIV_SYNC_DEBUG - , pass, file_name, - line - #endif - )) { + if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) { mutex_exit(rw_lock_get_mutex(lock)); return; /* Success */ @@ -247,9 +243,7 @@ lock_loop: sync_array_reserve_cell(sync_primary_wait_array, lock, RW_LOCK_SHARED, - #ifdef UNIV_SYNC_DEBUG file_name, line, - #endif &index); rw_lock_set_waiters(lock, 1); @@ -260,12 +254,13 @@ lock_loop: printf( "Thread %lu OS wait rw-s-lock at %lx cfile %s cline %lu\n", os_thread_get_curr_id(), (ulint)lock, - &(lock->cfile_name), lock->cline); + lock->cfile_name, lock->cline); } rw_s_system_call_count++; + rw_s_os_wait_count++; - sync_array_wait_event(sync_primary_wait_array, index); + sync_array_wait_event(sync_primary_wait_array, index); goto lock_loop; } @@ -307,13 +302,10 @@ rw_lock_x_lock_low( not succeed, RW_LOCK_EX if success, RW_LOCK_WAIT_EX, if got wait reservation */ rw_lock_t* lock, /* in: pointer to rw-lock */ - ulint pass /* in: pass value; != 0, if the lock will + ulint pass, /* in: pass value; != 0, if the lock will be passed to another thread to unlock */ - #ifdef UNIV_SYNC_DEBUG - ,char* file_name, /* in: file name where lock requested */ - ulint line /* in: line where requested */ - #endif -) + char* file_name,/* in: file name where lock requested */ + ulint line) /* in: line where requested */ { ut_ad(mutex_own(rw_lock_get_mutex(lock))); @@ -330,6 +322,8 @@ rw_lock_x_lock_low( rw_lock_add_debug_info(lock, pass, RW_LOCK_EX, file_name, line); #endif + lock->last_x_file_name = file_name; + lock->last_x_line = line; /* Locking succeeded, we may return */ return(RW_LOCK_EX); @@ -364,6 +358,9 @@ rw_lock_x_lock_low( file_name, line); #endif + lock->last_x_file_name = file_name; + lock->last_x_line = line; + /* Locking succeeded, we may return */ return(RW_LOCK_EX); } @@ -382,6 +379,9 @@ rw_lock_x_lock_low( line); #endif + lock->last_x_file_name = file_name; + lock->last_x_line = line; + /* Locking succeeded, we may return */ return(RW_LOCK_EX); } @@ -404,13 +404,10 @@ void rw_lock_x_lock_func( /*================*/ rw_lock_t* lock, /* in: pointer to rw-lock */ - ulint pass /* in: pass value; != 0, if the lock will + ulint pass, /* in: pass value; != 0, if the lock will be passed to another thread to unlock */ - #ifdef UNIV_SYNC_DEBUG - ,char* file_name, /* in: file name where lock requested */ - ulint line /* in: line where requested */ - #endif -) + char* file_name,/* in: file name where lock requested */ + ulint line) /* in: line where requested */ { ulint index; /* index of the reserved wait cell */ ulint state; /* lock state acquired */ @@ -422,11 +419,7 @@ lock_loop: /* Acquire the mutex protecting the rw-lock fields */ mutex_enter_fast(&(lock->mutex)); - state = rw_lock_x_lock_low(lock, pass - #ifdef UNIV_SYNC_DEBUG - ,file_name, line - #endif - ); + state = rw_lock_x_lock_low(lock, pass, file_name, line); mutex_exit(&(lock->mutex)); @@ -469,6 +462,7 @@ lock_loop: os_thread_yield(); } } else { + i = 0; /* Eliminate a compiler warning */ ut_error; } @@ -476,7 +470,7 @@ lock_loop: printf( "Thread %lu spin wait rw-x-lock at %lx cfile %s cline %lu rnds %lu\n", os_thread_get_curr_id(), (ulint)lock, - &(lock->cfile_name), lock->cline, i); + lock->cfile_name, lock->cline, i); } rw_x_spin_wait_count++; @@ -486,11 +480,7 @@ lock_loop: mutex_enter(rw_lock_get_mutex(lock)); - state = rw_lock_x_lock_low(lock, pass - #ifdef UNIV_SYNC_DEBUG - ,file_name, line - #endif - ); + state = rw_lock_x_lock_low(lock, pass, file_name, line); if (state == RW_LOCK_EX) { mutex_exit(rw_lock_get_mutex(lock)); @@ -502,9 +492,7 @@ lock_loop: sync_array_reserve_cell(sync_primary_wait_array, lock, RW_LOCK_EX, - #ifdef UNIV_SYNC_DEBUG file_name, line, - #endif &index); rw_lock_set_waiters(lock, 1); @@ -514,11 +502,12 @@ lock_loop: if (srv_print_latch_waits) { printf( "Thread %lu OS wait for rw-x-lock at %lx cfile %s cline %lu\n", - os_thread_get_curr_id(), (ulint)lock, &(lock->cfile_name), + os_thread_get_curr_id(), (ulint)lock, lock->cfile_name, lock->cline); } rw_x_system_call_count++; + rw_x_os_wait_count++; sync_array_wait_event(sync_primary_wait_array, index); @@ -537,8 +526,8 @@ rw_lock_debug_mutex_enter(void) /*==========================*/ { loop: - if (0 == mutex_enter_nowait(&rw_lock_debug_mutex)) { - + if (0 == mutex_enter_nowait(&rw_lock_debug_mutex, + IB__FILE__, __LINE__)) { return; } @@ -546,8 +535,8 @@ loop: rw_lock_debug_waiters = TRUE; - if (0 == mutex_enter_nowait(&rw_lock_debug_mutex)) { - + if (0 == mutex_enter_nowait(&rw_lock_debug_mutex, + IB__FILE__, __LINE__)) { return; } @@ -747,8 +736,6 @@ rw_lock_list_print_info(void) /*=========================*/ { #ifndef UNIV_SYNC_DEBUG - printf( - "Sorry, cannot give rw-lock list info in non-debug version!\n"); #else rw_lock_t* lock; ulint count = 0; @@ -756,8 +743,9 @@ rw_lock_list_print_info(void) mutex_enter(&rw_lock_list_mutex); - printf("----------------------------------------------\n"); - printf("RW-LOCK INFO\n"); + printf("-------------\n"); + printf("RW-LATCH INFO\n"); + printf("-------------\n"); lock = UT_LIST_GET_FIRST(rw_lock_list); @@ -810,9 +798,9 @@ rw_lock_print( ulint count = 0; rw_lock_debug_t* info; - printf("-------------------------------------------------\n"); - printf("RW-LOCK INFO\n"); - printf("RW-LOCK: %lx ", (ulint)lock); + printf("-------------\n"); + printf("RW-LATCH INFO\n"); + printf("RW-LATCH: %lx ", (ulint)lock); if ((rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED) || (rw_lock_get_reader_count(lock) != 0) diff --git a/innobase/sync/sync0sync.c b/innobase/sync/sync0sync.c index f0dbe145098..8b2a39e15eb 100644 --- a/innobase/sync/sync0sync.c +++ b/innobase/sync/sync0sync.c @@ -119,6 +119,7 @@ ulint mutex_system_call_count = 0; ulint mutex_spin_round_count = 0; ulint mutex_spin_wait_count = 0; +ulint mutex_os_wait_count = 0; ulint mutex_exit_count = 0; /* The global array of wait cells for implementation of the database's own @@ -228,12 +229,10 @@ mutex_create_func( mutex_set_waiters(mutex, 0); mutex->magic_n = MUTEX_MAGIC_N; mutex->line = 0; - mutex->file_name = "FILE NOT KNOWN"; + mutex->file_name = "not yet reserved"; mutex->thread_id = ULINT_UNDEFINED; mutex->level = SYNC_LEVEL_NONE; - ut_memcpy(&(mutex->cfile_name), cfile_name, - ut_min(MUTEX_CNAME_LEN - 1, ut_strlen(cfile_name))); - mutex->cfile_name[MUTEX_CNAME_LEN - 1] = '\0'; + mutex->cfile_name = cfile_name; mutex->cline = cline; /* Check that lock_word is aligned; this is important on Intel */ @@ -291,17 +290,23 @@ immediately, returns with return value 1. */ ulint mutex_enter_nowait( /*===============*/ - /* out: 0 if succeed, 1 if not */ - mutex_t* mutex) /* in: pointer to mutex */ + /* out: 0 if succeed, 1 if not */ + mutex_t* mutex, /* in: pointer to mutex */ + char* file_name, /* in: file name where mutex + requested */ + ulint line) /* in: line where requested */ { ut_ad(mutex_validate(mutex)); if (!mutex_test_and_set(mutex)) { #ifdef UNIV_SYNC_DEBUG - mutex_set_debug_info(mutex, IB__FILE__, __LINE__); + mutex_set_debug_info(mutex, file_name, line); #endif + mutex->file_name = file_name; + mutex->line = line; + return(0); /* Succeeded! */ } @@ -349,13 +354,9 @@ for the mutex before suspending the thread. */ void mutex_spin_wait( /*============*/ - mutex_t* mutex /* in: pointer to mutex */ - - #ifdef UNIV_SYNC_DEBUG - ,char* file_name, /* in: file name where mutex requested */ - ulint line /* in: line where requested */ - #endif -) + mutex_t* mutex, /* in: pointer to mutex */ + char* file_name, /* in: file name where mutex requested */ + ulint line) /* in: line where requested */ { ulint index; /* index of the reserved wait cell */ ulint i; /* spin round count */ @@ -391,7 +392,7 @@ spin_loop: if (srv_print_latch_waits) { printf( "Thread %lu spin wait mutex at %lx cfile %s cline %lu rnds %lu\n", - os_thread_get_curr_id(), (ulint)mutex, &(mutex->cfile_name), + os_thread_get_curr_id(), (ulint)mutex, mutex->cfile_name, mutex->cline, i); } @@ -404,6 +405,9 @@ spin_loop: mutex_set_debug_info(mutex, file_name, line); #endif + mutex->file_name = file_name; + mutex->line = line; + return; } @@ -423,9 +427,7 @@ spin_loop: sync_array_reserve_cell(sync_primary_wait_array, mutex, SYNC_MUTEX, - #ifdef UNIV_SYNC_DEBUG file_name, line, - #endif &index); mutex_system_call_count++; @@ -438,7 +440,9 @@ spin_loop: mutex_set_waiters(mutex, 1); - if (mutex_test_and_set(mutex) == 0) { + /* Try to reserve still a few times */ + for (i = 0; i < 4; i++) { + if (mutex_test_and_set(mutex) == 0) { /* Succeeded! Free the reserved wait cell */ @@ -448,6 +452,9 @@ spin_loop: mutex_set_debug_info(mutex, file_name, line); #endif + mutex->file_name = file_name; + mutex->line = line; + if (srv_print_latch_waits) { printf( "Thread %lu spin wait succeeds at 2: mutex at %lx\n", @@ -459,6 +466,7 @@ spin_loop: /* Note that in this case we leave the waiters field set to 1. We cannot reset it to zero, as we do not know if there are other waiters. */ + } } /* Now we know that there has been some thread holding the mutex @@ -468,11 +476,13 @@ spin_loop: if (srv_print_latch_waits) { printf( "Thread %lu OS wait mutex at %lx cfile %s cline %lu rnds %lu\n", - os_thread_get_curr_id(), (ulint)mutex, &(mutex->cfile_name), + os_thread_get_curr_id(), (ulint)mutex, mutex->cfile_name, mutex->cline, i); } mutex_system_call_count++; + mutex_os_wait_count++; + sync_array_wait_event(sync_primary_wait_array, index); goto mutex_loop; @@ -578,7 +588,6 @@ mutex_list_print_info(void) /*=======================*/ { #ifndef UNIV_SYNC_DEBUG - printf("Sorry, cannot give mutex list info in non-debug version!\n"); #else mutex_t* mutex; char* file_name; @@ -586,8 +595,9 @@ mutex_list_print_info(void) os_thread_id_t thread_id; ulint count = 0; - printf("-----------------------------------------------\n"); + printf("----------\n"); printf("MUTEX INFO\n"); + printf("----------\n"); mutex_enter(&mutex_list_mutex); @@ -597,10 +607,10 @@ mutex_list_print_info(void) count++; if (mutex_get_lock_word(mutex) != 0) { - - mutex_get_debug_info(mutex, &file_name, &line, &thread_id); - - printf("Locked mutex: addr %lx thread %ld file %s line %ld\n", + mutex_get_debug_info(mutex, &file_name, &line, + &thread_id); + printf( + "Locked mutex: addr %lx thread %ld file %s line %ld\n", (ulint)mutex, thread_id, file_name, line); } @@ -791,7 +801,7 @@ sync_thread_levels_g( limit, slot->level); if (mutex->magic_n == MUTEX_MAGIC_N) { - printf("Mutex created at %s %lu\n", &(mutex->cfile_name), + printf("Mutex created at %s %lu\n", mutex->cfile_name, mutex->cline); if (mutex_get_lock_word(mutex) != 0) { @@ -890,6 +900,7 @@ sync_thread_levels_empty_gen( if (slot->latch != NULL && (!dict_mutex_allowed || (slot->level != SYNC_DICT + && slot->level != SYNC_FOREIGN_KEY_CHECK && slot->level != SYNC_PURGE_IS_RUNNING))) { lock = slot->latch; @@ -993,6 +1004,8 @@ sync_thread_add_level( ut_a(sync_thread_levels_g(array, SYNC_RECV)); } else if (level == SYNC_LOG) { ut_a(sync_thread_levels_g(array, SYNC_LOG)); + } else if (level == SYNC_THR_LOCAL) { + ut_a(sync_thread_levels_g(array, SYNC_THR_LOCAL)); } else if (level == SYNC_ANY_LATCH) { ut_a(sync_thread_levels_g(array, SYNC_ANY_LATCH)); } else if (level == SYNC_TRX_SYS_HEADER) { @@ -1071,6 +1084,8 @@ sync_thread_add_level( SYNC_IBUF_PESS_INSERT_MUTEX)); } else if (level == SYNC_DICT_AUTOINC_MUTEX) { ut_a(sync_thread_levels_g(array, SYNC_DICT_AUTOINC_MUTEX)); + } else if (level == SYNC_FOREIGN_KEY_CHECK) { + ut_a(sync_thread_levels_g(array, SYNC_FOREIGN_KEY_CHECK)); } else if (level == SYNC_DICT_HEADER) { ut_a(sync_thread_levels_g(array, SYNC_DICT_HEADER)); } else if (level == SYNC_PURGE_IS_RUNNING) { @@ -1231,15 +1246,17 @@ void sync_print_wait_info(void) /*======================*/ { +#ifdef UNIV_SYNC_DEBUG + printf("Mutex exits %lu, rws exits %lu, rwx exits %lu\n", + mutex_exit_count, rw_s_exit_count, rw_x_exit_count); +#endif printf( - "Mut ex %lu sp %lu r %lu sys %lu; rws %lu %lu %lu; rwx %lu %lu %lu\n", - mutex_exit_count, +"Mutex spin waits %lu, rounds %lu, OS waits %lu\n" +"RW-shared spins %lu, OS waits %lu; RW-excl spins %lu, OS waits %lu\n", mutex_spin_wait_count, mutex_spin_round_count, - mutex_system_call_count, - rw_s_exit_count, - rw_s_spin_wait_count, rw_s_system_call_count, - rw_x_exit_count, - rw_x_spin_wait_count, rw_x_system_call_count); + mutex_os_wait_count, + rw_s_spin_wait_count, rw_s_os_wait_count, + rw_x_spin_wait_count, rw_x_os_wait_count); } /*********************************************************************** @@ -1249,10 +1266,8 @@ void sync_print(void) /*============*/ { - printf("SYNC INFO:\n"); mutex_list_print_info(); rw_lock_list_print_info(); sync_array_print_info(sync_primary_wait_array); sync_print_wait_info(); - printf("-----------------------------------------------------\n"); } diff --git a/innobase/thr/thr0loc.c b/innobase/thr/thr0loc.c index 897e53557c3..d3d7a58d313 100644 --- a/innobase/thr/thr0loc.c +++ b/innobase/thr/thr0loc.c @@ -224,5 +224,5 @@ thr_local_init(void) thr_local_hash = hash_create(OS_THREAD_MAX_N + 100); mutex_create(&thr_local_mutex); - mutex_set_level(&thr_local_mutex, SYNC_ANY_LATCH); + mutex_set_level(&thr_local_mutex, SYNC_THR_LOCAL); } diff --git a/innobase/trx/trx0purge.c b/innobase/trx/trx0purge.c index afb83926fa3..c50ffb65e00 100644 --- a/innobase/trx/trx0purge.c +++ b/innobase/trx/trx0purge.c @@ -276,6 +276,12 @@ trx_purge_add_update_undo_to_history( if (undo->state != TRX_UNDO_CACHED) { /* The undo log segment will not be reused */ + if (undo->id >= TRX_RSEG_N_SLOTS) { + fprintf(stderr, + "InnoDB: Error: undo->id is %lu\n", undo->id); + ut_a(0); + } + trx_rsegf_set_nth_undo(rseg_header, undo->id, FIL_NULL, mtr); hist_size = mtr_read_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE, diff --git a/innobase/trx/trx0rec.c b/innobase/trx/trx0rec.c index 64febb8f523..73153cfaa37 100644 --- a/innobase/trx/trx0rec.c +++ b/innobase/trx/trx0rec.c @@ -800,7 +800,7 @@ trx_undo_update_rec_get_update( TRX_UNDO_DEL_MARK_REC; in the last case, only trx id and roll ptr fields are added to the update vector */ - dulint trx_id, /* in: transaction id from this undorecord */ + dulint trx_id, /* in: transaction id from this undo record */ dulint roll_ptr,/* in: roll pointer from this undo record */ ulint info_bits,/* in: info bits from this undo record */ mem_heap_t* heap, /* in: memory heap from which the memory @@ -1078,9 +1078,7 @@ trx_undo_report_row_operation( undo_page = buf_page_get_gen(undo->space, page_no, RW_X_LATCH, undo->guess_page, BUF_GET, - #ifdef UNIV_SYNC_DEBUG IB__FILE__, __LINE__, - #endif &mtr); buf_page_dbg_add_level(undo_page, SYNC_TRX_UNDO_PAGE); @@ -1282,7 +1280,7 @@ trx_undo_prev_version_build( return(DB_SUCCESS); } - rec_trx_id = row_get_rec_trx_id(rec, index); + rec_trx_id = row_get_rec_trx_id(rec, index); err = trx_undo_get_undo_rec(roll_ptr, rec_trx_id, &undo_rec, heap); diff --git a/innobase/trx/trx0trx.c b/innobase/trx/trx0trx.c index 5d8c57edf34..13b37775dce 100644 --- a/innobase/trx/trx0trx.c +++ b/innobase/trx/trx0trx.c @@ -109,7 +109,10 @@ trx_create( UT_LIST_INIT(trx->trx_locks); trx->has_search_latch = FALSE; + trx->search_latch_timeout = BTR_SEA_TIMEOUT; + trx->auto_inc_lock = NULL; + trx->read_view_heap = mem_heap_create(256); trx->read_view = NULL; @@ -193,6 +196,7 @@ trx_free( ut_a(UT_LIST_GET_LEN(trx->wait_thrs) == 0); ut_a(!trx->has_search_latch); + ut_a(!trx->auto_inc_lock); if (trx->lock_heap) { mem_heap_free(trx->lock_heap); diff --git a/innobase/trx/trx0undo.c b/innobase/trx/trx0undo.c index 1f408428582..598090bdee2 100644 --- a/innobase/trx/trx0undo.c +++ b/innobase/trx/trx0undo.c @@ -361,6 +361,8 @@ trx_undo_page_init( mach_write_to_2(page_hdr + TRX_UNDO_PAGE_FREE, TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE); + fil_page_set_type(undo_page, FIL_PAGE_UNDO_LOG); + trx_undo_page_init_log(undo_page, type, mtr); } @@ -1106,6 +1108,12 @@ trx_undo_mem_create_at_db_start( page_t* last_page; trx_undo_rec_t* rec; + if (id >= TRX_RSEG_N_SLOTS) { + fprintf(stderr, + "InnoDB: Error: undo->id is %lu\n", id); + ut_a(0); + } + undo_page = trx_undo_page_get(rseg->space, page_no, mtr); page_header = undo_page + TRX_UNDO_PAGE_HDR; @@ -1251,7 +1259,13 @@ trx_undo_mem_create( trx_undo_t* undo; ut_ad(mutex_own(&(rseg->mutex))); - + + if (id >= TRX_RSEG_N_SLOTS) { + fprintf(stderr, + "InnoDB: Error: undo->id is %lu\n", id); + ut_a(0); + } + undo = mem_alloc(sizeof(trx_undo_t)); undo->id = id; @@ -1290,6 +1304,12 @@ trx_undo_mem_init_for_reuse( { ut_ad(mutex_own(&((undo->rseg)->mutex))); + if (undo->id >= TRX_RSEG_N_SLOTS) { + fprintf(stderr, + "InnoDB: Error: undo->id is %lu\n", undo->id); + ut_a(0); + } + undo->state = TRX_UNDO_ACTIVE; undo->del_marks = FALSE; undo->trx_id = trx_id; @@ -1308,6 +1328,12 @@ trx_undo_mem_free( /*==============*/ trx_undo_t* undo) /* in: the undo object to be freed */ { + if (undo->id >= TRX_RSEG_N_SLOTS) { + fprintf(stderr, + "InnoDB: Error: undo->id is %lu\n", undo->id); + ut_a(0); + } + mem_free(undo); } @@ -1493,6 +1519,9 @@ trx_undo_assign_undo( mutex_exit(&(rseg->mutex)); mtr_commit(&mtr); + fprintf(stderr, "InnoDB: no undo log slots free\n"); + ut_a(0); + return(NULL); } } @@ -1536,6 +1565,12 @@ trx_undo_set_state_at_finish( ut_ad(trx && undo && mtr); + if (undo->id >= TRX_RSEG_N_SLOTS) { + fprintf(stderr, + "InnoDB: Error: undo->id is %lu\n", undo->id); + ut_a(0); + } + undo_page = trx_undo_page_get(undo->space, undo->hdr_page_no, mtr); seg_hdr = undo_page + TRX_UNDO_SEG_HDR; diff --git a/innobase/ut/ut0mem.c b/innobase/ut/ut0mem.c index ebeefe0c297..630bd3a9b71 100644 --- a/innobase/ut/ut0mem.c +++ b/innobase/ut/ut0mem.c @@ -13,15 +13,22 @@ Created 5/11/1994 Heikki Tuuri #endif #include "mem0mem.h" - +#include "os0sync.h" /* This struct is placed first in every allocated memory block */ typedef struct ut_mem_block_struct ut_mem_block_t; +/* The total amount of memory currently allocated from the OS with malloc */ +ulint ut_total_allocated_memory = 0; + struct ut_mem_block_struct{ - UT_LIST_NODE_T(ut_mem_block_t) mem_block_list;/* mem block list node */ + UT_LIST_NODE_T(ut_mem_block_t) mem_block_list; + /* mem block list node */ + ulint size; /* size of allocated memory */ + ulint magic_n; }; +#define UT_MEM_MAGIC_N 1601650166 /* List of all memory blocks allocated from the operating system with malloc */ @@ -70,16 +77,17 @@ ut_malloc_low( if (ret == NULL) { fprintf(stderr, "InnoDB: Fatal error: cannot allocate %lu bytes of\n" - "InnoDB: memory with malloc!\n" - "InnoDB: Operating system errno: %lu\n" + "InnoDB: memory with malloc! Total allocated memory\n" + "InnoDB: by InnoDB %lu bytes. Operating system errno: %lu\n" "InnoDB: Cannot continue operation!\n" "InnoDB: Check if you should increase the swap file or\n" - "InnoDB: ulimits of your operating system.\n", n, errno); + "InnoDB: ulimits of your operating system.\n", + n, ut_total_allocated_memory, errno); os_fast_mutex_unlock(&ut_list_mutex); exit(1); - } + } if (set_to_zero) { #ifdef UNIV_SET_MEM_TO_ZERO @@ -87,6 +95,11 @@ ut_malloc_low( #endif } + ((ut_mem_block_t*)ret)->size = n + sizeof(ut_mem_block_t); + ((ut_mem_block_t*)ret)->magic_n = UT_MEM_MAGIC_N; + + ut_total_allocated_memory += n + sizeof(ut_mem_block_t); + UT_LIST_ADD_FIRST(mem_block_list, ut_mem_block_list, ((ut_mem_block_t*)ret)); os_fast_mutex_unlock(&ut_list_mutex); @@ -107,7 +120,7 @@ ut_malloc( return(ut_malloc_low(n, TRUE)); } /************************************************************************** -Frees a memory bloock allocated with ut_malloc. */ +Frees a memory block allocated with ut_malloc. */ void ut_free( @@ -120,6 +133,11 @@ ut_free( os_fast_mutex_lock(&ut_list_mutex); + ut_a(block->magic_n == UT_MEM_MAGIC_N); + ut_a(ut_total_allocated_memory >= block->size); + + ut_total_allocated_memory -= block->size; + UT_LIST_REMOVE(mem_block_list, ut_mem_block_list, block); free(block); @@ -139,11 +157,18 @@ ut_free_all_mem(void) while (block = UT_LIST_GET_FIRST(ut_mem_block_list)) { + ut_a(block->magic_n == UT_MEM_MAGIC_N); + ut_a(ut_total_allocated_memory >= block->size); + + ut_total_allocated_memory -= block->size; + UT_LIST_REMOVE(mem_block_list, ut_mem_block_list, block); free(block); } os_fast_mutex_unlock(&ut_list_mutex); + + ut_a(ut_total_allocated_memory == 0); } /************************************************************************** diff --git a/innobase/ut/ut0ut.c b/innobase/ut/ut0ut.c index 1436f6a10a3..964d5bca567 100644 --- a/innobase/ut/ut0ut.c +++ b/innobase/ut/ut0ut.c @@ -187,6 +187,8 @@ ut_sprintf_buf( for (i = 0; i < len; i++) { if (isprint((char)(*data))) { n += sprintf(str + n, "%c", (char)*data); + } else { + n += sprintf(str + n, "."); } data++; diff --git a/libmysql/libmysql.c b/libmysql/libmysql.c index 2a7aeead1b4..c08c93e2439 100644 --- a/libmysql/libmysql.c +++ b/libmysql/libmysql.c @@ -148,7 +148,7 @@ static MYSQL* spawn_init(MYSQL* parent, const char* host, *****************************************************************************/ int my_connect(my_socket s, const struct sockaddr *name, uint namelen, - uint timeout) + uint timeout) { #if defined(__WIN__) || defined(OS2) return connect(s, (struct sockaddr*) name, namelen); diff --git a/libmysqld/Makefile.am b/libmysqld/Makefile.am index f0e0ad2728e..badbc6d3425 100644 --- a/libmysqld/Makefile.am +++ b/libmysqld/Makefile.am @@ -45,7 +45,8 @@ sqlsources = convert.cc derror.cc field.cc field_conv.cc filesort.cc \ item_func.cc item_strfunc.cc item_sum.cc item_timefunc.cc \ item_uniq.cc key.cc lock.cc log.cc log_event.cc \ mini_client.cc net_pkg.cc net_serv.cc opt_ft.cc opt_range.cc \ - opt_sum.cc procedure.cc records.cc slave.cc sql_acl.cc \ + opt_sum.cc procedure.cc records.cc sql_acl.cc \ + repl_failsafe.cc slave.cc \ sql_analyse.cc sql_base.cc sql_cache.cc sql_class.cc \ sql_crypt.cc sql_db.cc sql_delete.cc sql_insert.cc sql_lex.cc \ sql_list.cc sql_manager.cc sql_map.cc sql_parse.cc \ @@ -59,6 +60,18 @@ EXTRA_DIST = lib_vio.c libmysqld_int_a_SOURCES= $(libmysqld_sources) $(libmysqlsources) $(sqlsources) libmysqld_a_SOURCES= +# Don't depend on things from mit-pthreads + +OMIT_DEPENDENCIES = pthread.h stdio.h __stdio.h stdlib.h __stdlib.h math.h\ + __math.h time.h __time.h unistd.h __unistd.h types.h \ + xtypes.h ac-types.h posix.h string.h __string.h \ + errno.h socket.h inet.h dirent.h netdb.h \ + cleanup.h cond.h debug_out.h fd.h kernel.h mutex.h \ + prio_queue.h pthread_attr.h pthread_once.h queue.h\ + sleep.h specific.h version.h pwd.h timers.h uio.h \ + cdefs.h machdep.h signal.h __signal.h util.h lex.h \ + wait.h + # automake misses these sql_yacc.cc sql_yacc.h: $(top_srcdir)/sql/sql_yacc.yy diff --git a/libmysqld/examples/test-run b/libmysqld/examples/test-run index b97d2742b74..d525c7a14a3 100755 --- a/libmysqld/examples/test-run +++ b/libmysqld/examples/test-run @@ -37,7 +37,7 @@ usage: $0 [-g|-h|-r] [test-name ...] EOF } -init_args="" +init_args="--server-arg=--language=$top_builddir/sql/share/english" while test $# -gt 0 do arg= diff --git a/mysql-test/include/have_gemini.inc b/mysql-test/include/have_gemini.inc deleted file mode 100644 index d98c9750714..00000000000 --- a/mysql-test/include/have_gemini.inc +++ /dev/null @@ -1,4 +0,0 @@ --- require r/have_gemini.require -disable_query_log; -show variables like "have_gemini"; -enable_query_log; diff --git a/mysql-test/install_test_db.sh b/mysql-test/install_test_db.sh index f810d2d9ad4..acf8cebc723 100644 --- a/mysql-test/install_test_db.sh +++ b/mysql-test/install_test_db.sh @@ -195,7 +195,7 @@ then fi mysqld_boot=" $execdir/mysqld --no-defaults --bootstrap --skip-grant-tables \ - --basedir=$basedir --datadir=$ldata --skip-innodb --skip-bdb --skip-gemini $EXTRA_ARG" + --basedir=$basedir --datadir=$ldata --skip-innodb --skip-bdb $EXTRA_ARG" echo "running $mysqld_boot" if $mysqld_boot << END_OF_DATA diff --git a/mysql-test/mysql-test-run.sh b/mysql-test/mysql-test-run.sh index 5cb3da9df25..8f824d82a08 100644 --- a/mysql-test/mysql-test-run.sh +++ b/mysql-test/mysql-test-run.sh @@ -124,7 +124,7 @@ MASTER_RUNNING=0 MASTER_MYPORT=9306 SLAVE_RUNNING=0 SLAVE_MYPORT=9307 -MYSQL_MANAGER_PORT=9308 +MYSQL_MANAGER_PORT=9305 # needs to be out of the way of slaves MYSQL_MANAGER_PW_FILE=$MYSQL_TEST_DIR/var/tmp/manager.pwd MYSQL_MANAGER_LOG=$MYSQL_TEST_DIR/var/log/manager.log MYSQL_MANAGER_USER=root @@ -473,6 +473,13 @@ mysql_install_db () { error "Could not install slave test DBs" exit 1 fi + + for slave_num in 1 2 ; + do + mkdir -p var/slave$slave_num-data/mysql + mkdir -p var/slave$slave_num-data/test + cp var/slave-data/mysql/* var/slave$slave_num-data/mysql + done # Give mysqld some time to die. sleep $SLEEP_TIME return 0 @@ -531,7 +538,8 @@ start_manager() $MYSQL_MANAGER --log=$MYSQL_MANAGER_LOG --port=$MYSQL_MANAGER_PORT \ --password-file=$MYSQL_MANAGER_PW_FILE abort_if_failed "Could not start MySQL manager" - mysqltest_manager_args="--manager-user=$MYSQL_MANAGER_USER \ + mysqltest_manager_args="--manager-host=localhost \ + --manager-user=$MYSQL_MANAGER_USER \ --manager-password=$MYSQL_MANAGER_PW \ --manager-port=$MYSQL_MANAGER_PORT \ --manager-wait-timeout=$START_WAIT_TIMEOUT" @@ -592,8 +600,8 @@ start_master() if [ -z "$DO_BENCH" ] then master_args="--no-defaults --log-bin=$MYSQL_TEST_DIR/var/log/master-bin \ - --server-id=1 \ - --basedir=$MY_BASEDIR \ + --server-id=1 --rpl-recovery-rank=1 \ + --basedir=$MY_BASEDIR --init-rpl-role=master \ --port=$MASTER_MYPORT \ --exit-info=256 \ --datadir=$MASTER_MYDDIR \ @@ -609,8 +617,8 @@ start_master() $EXTRA_MASTER_OPT $EXTRA_MASTER_MYSQLD_OPT" else master_args="--no-defaults --log-bin=$MYSQL_TEST_DIR/var/log/master-bin \ - --server-id=1 \ - --basedir=$MY_BASEDIR \ + --server-id=1 --rpl-recovery-rank=1 \ + --basedir=$MY_BASEDIR --init-rpl-role=master \ --port=$MASTER_MYPORT \ --datadir=$MASTER_MYDDIR \ --pid-file=$MASTER_MYPID \ @@ -647,10 +655,33 @@ start_master() start_slave() { [ x$SKIP_SLAVE = x1 ] && return - [ x$SLAVE_RUNNING = 1 ] && return - + eval "this_slave_running=\$SLAVE$1_RUNNING" + [ x$this_slave_running = 1 ] && return + #when testing fail-safe replication, we will have more than one slave + #in this case, we start secondary slaves with an argument + slave_ident="slave$1" + if [ -n "$1" ] ; + then + slave_server_id=`$EXPR 2 + $1` + slave_rpl_rank=$slave_server_id + slave_port=`expr $SLAVE_MYPORT + $1` + slave_log="$SLAVE_MYLOG.$1" + slave_err="$SLAVE_MYERR.$1" + slave_datadir="var/$slave_ident-data/" + slave_pid="$MYRUN_DIR/mysqld-$slave_ident.pid" + slave_sock="$SLAVE_MYSOCK-$1" + else + slave_server_id=2 + slave_rpl_rank=2 + slave_port=$SLAVE_MYPORT + slave_log=$SLAVE_MYLOG + slave_err=$SLAVE_MYERR + slave_datadir=$SLAVE_MYDDIR + slave_pid=$SLAVE_MYPID + slave_sock="$SLAVE_MYSOCK" + fi # Remove stale binary logs - $RM -f $MYSQL_TEST_DIR/var/log/slave-bin.* + $RM -f $MYSQL_TEST_DIR/var/log/$slave_ident-bin.* #run slave initialization shell script if one exists if [ -f "$slave_init_script" ] ; @@ -664,51 +695,51 @@ start_slave() --master-host=127.0.0.1 \ --master-password= \ --master-port=$MASTER_MYPORT \ - --server-id=2" + --server-id=$slave_server_id --rpl-recovery-rank=$slave_rpl_rank" else master_info=$SLAVE_MASTER_INFO fi - $RM -f $SLAVE_MYDDIR/log.* + $RM -f $slave_datadir/log.* slave_args="--no-defaults $master_info \ --exit-info=256 \ - --log-bin=$MYSQL_TEST_DIR/var/log/slave-bin \ + --log-bin=$MYSQL_TEST_DIR/var/log/$slave_ident-bin \ --log-slave-updates \ - --log=$SLAVE_MYLOG \ + --log=$slave_log \ --basedir=$MY_BASEDIR \ - --datadir=$SLAVE_MYDDIR \ - --pid-file=$SLAVE_MYPID \ - --port=$SLAVE_MYPORT \ - --socket=$SLAVE_MYSOCK \ + --datadir=$slave_datadir \ + --pid-file=$slave_pid \ + --port=$slave_port \ + --socket=$slave_sock \ --character-sets-dir=$CHARSETSDIR \ --default-character-set=$CHARACTER_SET \ - --core \ + --core --init-rpl-role=slave \ --tmpdir=$MYSQL_TMP_DIR \ --language=$LANGUAGE \ --skip-innodb --skip-slave-start \ --slave-load-tmpdir=$SLAVE_LOAD_TMPDIR \ --report-host=127.0.0.1 --report-user=root \ - --report-port=$SLAVE_MYPORT \ + --report-port=$slave_port \ --master-retry-count=5 \ $SMALL_SERVER \ $EXTRA_SLAVE_OPT $EXTRA_SLAVE_MYSQLD_OPT" - CUR_MYERR=$SLAVE_MYERR - CUR_MYSOCK=$SLAVE_MYSOCK + CUR_MYERR=$slave_err + CUR_MYSOCK=$slave_sock if [ x$DO_DDD = x1 ] then $ECHO "set args $master_args" > $GDB_SLAVE_INIT - manager_launch slave ddd -display $DISPLAY --debugger \ + manager_launch $slave_ident ddd -display $DISPLAY --debugger \ "gdb -x $GDB_SLAVE_INIT" $SLAVE_MYSQLD elif [ x$DO_GDB = x1 ] then $ECHO "set args $slave_args" > $GDB_SLAVE_INIT - manager_launch slave $XTERM -display $DISPLAY -title "Slave" -e gdb -x \ + manager_launch $slave_ident $XTERM -display $DISPLAY -title "Slave" -e gdb -x \ $GDB_SLAVE_INIT $SLAVE_MYSQLD else - manager_launch slave $SLAVE_MYSQLD $slave_args + manager_launch $slave_ident $SLAVE_MYSQLD $slave_args fi - SLAVE_RUNNING=1 + eval "SLAVE$1_RUNNING=1" } mysql_start () { @@ -721,23 +752,31 @@ mysql_start () { stop_slave () { - if [ x$SLAVE_RUNNING = x1 ] + eval "this_slave_running=\$SLAVE$1_RUNNING" + slave_ident="slave$1" + if [ -n "$1" ] ; then - manager_term slave - if [ $? != 0 ] && [ -f $SLAVE_MYPID ] + slave_pid="$MYRUN_DIR/mysqld-$slave_ident.pid" + else + slave_pid=$SLAVE_MYPID + fi + if [ x$this_slave_running = x1 ] + then + manager_term $slave_ident + if [ $? != 0 ] && [ -f $slave_pid ] then # try harder! $ECHO "slave not cooperating with mysqladmin, will try manual kill" - kill `$CAT $SLAVE_MYPID` + kill `$CAT $slave_pid` sleep $SLEEP_TIME if [ -f $SLAVE_MYPID ] ; then $ECHO "slave refused to die. Sending SIGKILL" - kill -9 `$CAT $SLAVE_MYPID` - $RM -f $SLAVE_MYPID + kill -9 `$CAT $slave_pid` + $RM -f $slave_pid else $ECHO "slave responded to SIGTERM " fi fi - SLAVE_RUNNING=0 + eval "SLAVE$1_RUNNING=0" fi } @@ -771,6 +810,8 @@ mysql_stop () stop_master $ECHO "Master shutdown finished" stop_slave + stop_slave 1 + stop_slave 2 $ECHO "Slave shutdown finished" return 1 @@ -800,6 +841,7 @@ run_testcase () slave_init_script=$TESTDIR/$tname-slave.sh slave_master_info_file=$TESTDIR/$tname-slave-master-info.opt SKIP_SLAVE=`$EXPR \( $tname : rpl \) = 0` + many_slaves=`$EXPR \( $tname : rpl_failsafe \) != 0` if [ -n "$SKIP_TEST" ] ; then SKIP_THIS_TEST=`$EXPR \( $tname : "$SKIP_TEST" \) != 0` if [ x$SKIP_THIS_TEST = x1 ] ; @@ -874,6 +916,10 @@ run_testcase () stop_slave start_slave fi + if [ x$many_slaves = x1 ]; then + start_slave 1 + start_slave 2 + fi fi cd $MYSQL_TEST_DIR diff --git a/mysql-test/r/gemini.result b/mysql-test/r/gemini.result deleted file mode 100644 index 0b43b4f5192..00000000000 --- a/mysql-test/r/gemini.result +++ /dev/null @@ -1,370 +0,0 @@ -id code name -1 1 Tim -2 1 Monty -3 2 David -4 2 Erik -5 3 Sasha -6 3 Jeremy -7 4 Matt -id code name -2 1 Monty -3 2 David -4 2 Erik -5 3 Sasha -6 3 Jeremy -7 4 Matt -8 1 Sinisa -id code name -3 2 David -4 2 Erik -5 3 Sasha -6 3 Jeremy -7 4 Matt -8 1 Sinisa -12 1 Ralph -id parent_id level -8 102 2 -9 102 2 -15 102 2 -id parent_id level -1001 100 0 -1003 101 1 -1004 101 1 -1008 102 2 -1009 102 2 -1017 103 2 -1022 104 2 -1024 104 2 -1028 105 2 -1029 105 2 -1030 105 2 -1031 106 2 -1032 106 2 -1033 106 2 -1203 107 2 -1202 107 2 -1020 103 2 -1157 100 0 -1193 105 2 -1040 107 2 -1002 101 1 -1015 102 2 -1006 101 1 -1034 106 2 -1035 106 2 -1016 103 2 -1007 101 1 -1036 107 2 -1018 103 2 -1026 105 2 -1027 105 2 -1183 104 2 -1038 107 2 -1025 105 2 -1037 107 2 -1021 104 2 -1019 103 2 -1005 101 1 -1179 105 2 -id parent_id level -1001 100 0 -1003 101 1 -1004 101 1 -1008 102 2 -1010 102 2 -1017 103 2 -1023 104 2 -1024 104 2 -1028 105 2 -1029 105 2 -1030 105 2 -1031 106 2 -1032 106 2 -1033 106 2 -1204 107 2 -1203 107 2 -1020 103 2 -1158 100 0 -1194 105 2 -1041 107 2 -1002 101 1 -1015 102 2 -1006 101 1 -1034 106 2 -1035 106 2 -1016 103 2 -1007 101 1 -1036 107 2 -1018 103 2 -1026 105 2 -1027 105 2 -1184 104 2 -1039 107 2 -1025 105 2 -1038 107 2 -1022 104 2 -1019 103 2 -1005 101 1 -1180 105 2 -id parent_id level -1008 102 2 -1010 102 2 -1015 102 2 -table type possible_keys key key_len ref rows Extra -t1 ref level level 1 const 6 where used; Using index -table type possible_keys key key_len ref rows Extra -t1 ref level level 1 const 6 where used -table type possible_keys key key_len ref rows Extra -t1 ref level level 1 const 6 where used -level id -1 1003 -1 1004 -1 1002 -1 1006 -1 1007 -1 1005 -level id parent_id -1 1003 101 -1 1004 101 -1 1002 101 -1 1006 101 -1 1007 101 -1 1005 101 -gesuchnr benutzer_id -1 1 -2 1 -a -2 -user_id name phone ref_email detail -10292 sanjeev 29153373 sansh777@hotmail.com xxx -10292 shirish 2333604 shirish@yahoo.com ddsds -10292 sonali 323232 sonali@bolly.com filmstar -user_id name phone ref_email detail -10292 sanjeev 29153373 sansh777@hotmail.com xxx -10292 shirish 2333604 shirish@yahoo.com ddsds -10292 sonali 323232 sonali@bolly.com filmstar -user_id name phone ref_email detail -10292 sanjeev 29153373 sansh777@hotmail.com xxx -10292 shirish 2333604 shirish@yahoo.com ddsds -10292 sonali 323232 sonali@bolly.com filmstar -10293 shirish 2333604 shirish@yahoo.com ddsds -user_id name phone ref_email detail -10293 shirish 2333604 shirish@yahoo.com ddsds -user_id name phone ref_email detail -10291 sanjeev 29153373 sansh777@hotmail.com xxx -a b -1 3 -2 3 -3 3 -a b -1 3 -2 3 -3 3 -a b -a b -1 3 -2 3 -3 3 -a b -1 3 -2 3 -3 3 -id ggid email passwd -1 test1 xxx -id ggid email passwd -1 test1 xxx -id ggid email passwd -2 test2 yyy -id parent_id level -8 102 2 -9 102 2 -15 102 2 -id parent_id level -1001 100 0 -1003 101 1 -1004 101 1 -1008 102 2 -1024 102 2 -1017 103 2 -1022 104 2 -1024 104 2 -1028 105 2 -1029 105 2 -1030 105 2 -1031 106 2 -1032 106 2 -1033 106 2 -1203 107 2 -1202 107 2 -1020 103 2 -1157 100 0 -1193 105 2 -1040 107 2 -1002 101 1 -1015 102 2 -1006 101 1 -1034 106 2 -1035 106 2 -1016 103 2 -1007 101 1 -1036 107 2 -1018 103 2 -1026 105 2 -1027 105 2 -1183 104 2 -1038 107 2 -1025 105 2 -1037 107 2 -1021 104 2 -1019 103 2 -1005 101 1 -1179 105 2 -id parent_id level -1002 100 0 -1004 101 1 -1005 101 1 -1009 102 2 -1025 102 2 -1018 103 2 -1023 104 2 -1025 104 2 -1029 105 2 -1030 105 2 -1031 105 2 -1032 106 2 -1033 106 2 -1034 106 2 -1204 107 2 -1203 107 2 -1021 103 2 -1158 100 0 -1194 105 2 -1041 107 2 -1003 101 1 -1016 102 2 -1007 101 1 -1035 106 2 -1036 106 2 -1017 103 2 -1008 101 1 -1037 107 2 -1019 103 2 -1027 105 2 -1028 105 2 -1184 104 2 -1039 107 2 -1026 105 2 -1038 107 2 -1022 104 2 -1020 103 2 -1006 101 1 -1180 105 2 -id parent_id level -1009 102 2 -1025 102 2 -1016 102 2 -table type possible_keys key key_len ref rows Extra -t1 ref level level 1 const 6 where used; Using index -level id -1 1004 -1 1005 -1 1003 -1 1007 -1 1008 -1 1006 -level id parent_id -1 1004 101 -1 1005 101 -1 1003 101 -1 1007 101 -1 1008 101 -1 1006 101 -level id -1 1003 -1 1004 -1 1005 -1 1006 -1 1007 -1 1008 -id parent_id level -1002 100 0 -1009 102 2 -1025 102 2 -1018 103 2 -1023 104 2 -1025 104 2 -1029 105 2 -1030 105 2 -1031 105 2 -1032 106 2 -1033 106 2 -1034 106 2 -1204 107 2 -1203 107 2 -1021 103 2 -1158 100 0 -1194 105 2 -1041 107 2 -1016 102 2 -1035 106 2 -1036 106 2 -1017 103 2 -1037 107 2 -1019 103 2 -1027 105 2 -1028 105 2 -1184 104 2 -1039 107 2 -1026 105 2 -1038 107 2 -1022 104 2 -1020 103 2 -1180 105 2 -count(*) -1 -a -1 -2 -3 -test for rollback -test for rollback -n after rollback -4 after rollback -n after commit -4 after commit -5 after commit -n after commit -4 after commit -5 after commit -6 after commit -n -4 -5 -6 -7 -afterbegin_id afterbegin_nom -1 first -2 hamdouni -afterrollback_id afterrollback_nom -1 first -afterautocommit0_id afterautocommit0_nom -1 first -3 mysql -afterrollback_id afterrollback_nom -1 first -id val -id val -pippo 12 -id val -ID NAME -1 Jochen -_userid -marc@anyware.co.uk -_userid -marc@anyware.co.uk -f1 -65 -379 -468 -469 -508 diff --git a/mysql-test/r/have_gemini.require b/mysql-test/r/have_gemini.require deleted file mode 100644 index 0ffe0e40d3b..00000000000 --- a/mysql-test/r/have_gemini.require +++ /dev/null @@ -1,2 +0,0 @@ -Variable_name Value -have_gemini YES diff --git a/mysql-test/r/innodb.result b/mysql-test/r/innodb.result index 596edb84705..20874981840 100644 --- a/mysql-test/r/innodb.result +++ b/mysql-test/r/innodb.result @@ -169,8 +169,8 @@ test.t1 optimize error The handler for the table doesn't support check/repair show keys from t1; Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Comment t1 0 PRIMARY 1 id A 2 NULL NULL -t1 1 parent_id 1 parent_id A 4 NULL NULL -t1 1 level 1 level A 4 NULL NULL +t1 1 parent_id 1 parent_id A 2 NULL NULL +t1 1 level 1 level A 2 NULL NULL drop table t1; CREATE TABLE t1 ( gesuchnr int(11) DEFAULT '0' NOT NULL, @@ -211,7 +211,7 @@ Table Op Msg_type Msg_text test.t1 analyze error The handler for the table doesn't support check/repair show keys from t1; Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Comment -t1 1 skr 1 a A 3 NULL NULL +t1 1 skr 1 a A 1 NULL NULL drop table t1; create table t1 (a int,b varchar(20),key(a)) type=innodb; insert into t1 values (1,""), (2,"testing"); diff --git a/mysql-test/r/isolation.result b/mysql-test/r/isolation.result deleted file mode 100644 index 60b71e217bb..00000000000 --- a/mysql-test/r/isolation.result +++ /dev/null @@ -1,61 +0,0 @@ -f1 -test1 -bar -f1 -test2 -bar -f1 -test3 -bar -f1 -f1 -test4 -bar -f1 -test5 -bar -f1 -test6 -bar -f1 -test7 -bar -f1 -test8 -bar -f1 -test9 -bar -f1 -test10 -bar -f1 -test11 -bar -f1 -test12 -bar -f1 -test13 -bar -f1 -test14 -bar -f1 -test15 -bar -f1 -test16 -bar -f1 -test17 -bar -f1 -test18 -bar -f1 -test19 -bar -f1 -test20 -bar diff --git a/mysql-test/r/rpl_failsafe.result b/mysql-test/r/rpl_failsafe.result new file mode 100644 index 00000000000..596d6ef7191 --- /dev/null +++ b/mysql-test/r/rpl_failsafe.result @@ -0,0 +1,29 @@ +slave stop; +reset master; +reset slave; +drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9; +slave start; +show variables like 'rpl_recovery_rank'; +Variable_name Value +rpl_recovery_rank 1 +show status like 'Rpl_status'; +Variable_name Value +Rpl_status AUTH_MASTER +show variables like 'rpl_recovery_rank'; +Variable_name Value +rpl_recovery_rank 2 +show status like 'Rpl_status'; +Variable_name Value +Rpl_status IDLE_SLAVE +show variables like 'rpl_recovery_rank'; +Variable_name Value +rpl_recovery_rank 3 +show status like 'Rpl_status'; +Variable_name Value +Rpl_status IDLE_SLAVE +show variables like 'rpl_recovery_rank'; +Variable_name Value +rpl_recovery_rank 4 +show status like 'Rpl_status'; +Variable_name Value +Rpl_status IDLE_SLAVE diff --git a/mysql-test/std_data/gemini.dat b/mysql-test/std_data/gemini.dat deleted file mode 100644 index c2e1045f5ac..00000000000 --- a/mysql-test/std_data/gemini.dat +++ /dev/null @@ -1,5 +0,0 @@ -65,-1,1 -379,-1,1 -468,-1,1 -469,-1,1 -508,-1,1 diff --git a/mysql-test/t/gemini.test b/mysql-test/t/gemini.test deleted file mode 100644 index 9d4451c3551..00000000000 --- a/mysql-test/t/gemini.test +++ /dev/null @@ -1,355 +0,0 @@ --- source include/have_gemini.inc - -# -# Small basic test with ignore -# - -drop table if exists t1; -create table t1 (id int unsigned not null auto_increment, code tinyint unsigned not null, name char(20) not null, primary key (id), key (code), unique (name)) type=gemini; - -insert into t1 (code, name) values (1, 'Tim'), (1, 'Monty'), (2, 'David'), (2, 'Erik'), (3, 'Sasha'), (3, 'Jeremy'), (4, 'Matt'); -select id, code, name from t1 order by id; - -update ignore t1 set id = 8, name = 'Sinisa' where id < 3; -select id, code, name from t1 order by id; -update ignore t1 set id = id + 10, name = 'Ralph' where id < 4; -select id, code, name from t1 order by id; - -drop table t1; - -# -# A bit bigger test -# - -CREATE TABLE t1 ( - id int(11) NOT NULL auto_increment, - parent_id int(11) DEFAULT '0' NOT NULL, - level tinyint(4) DEFAULT '0' NOT NULL, - PRIMARY KEY (id), - KEY parent_id (parent_id), - KEY level (level) -) type=gemini; -INSERT INTO t1 VALUES (1,0,0),(3,1,1),(4,1,1),(8,2,2),(9,2,2),(17,3,2),(22,4,2),(24,4,2),(28,5,2),(29,5,2),(30,5,2),(31,6,2),(32,6,2),(33,6,2),(203,7,2),(202,7,2),(20,3,2),(157,0,0),(193,5,2),(40,7,2),(2,1,1),(15,2,2),(6,1,1),(34,6,2),(35,6,2),(16,3,2),(7,1,1),(36,7,2),(18,3,2),(26,5,2),(27,5,2),(183,4,2),(38,7,2),(25,5,2),(37,7,2),(21,4,2),(19,3,2),(5,1,1),(179,5,2); -update t1 set parent_id=parent_id+100; -select * from t1 where parent_id=102; -update t1 set id=id+1000; -!$1062 update t1 set id=1024 where id=1009; -select * from t1; -update ignore t1 set id=id+1; # This will change all rows -select * from t1; -update ignore t1 set id=1023 where id=1010; -select * from t1 where parent_id=102; -explain select level from t1 where level=1; -explain select level,id from t1 where level=1; -explain select level,id,parent_id from t1 where level=1; -select level,id from t1 where level=1; -select level,id,parent_id from t1 where level=1; -drop table t1; - -# -# Test replace -# - -CREATE TABLE t1 ( - gesuchnr int(11) DEFAULT '0' NOT NULL, - benutzer_id int(11) DEFAULT '0' NOT NULL, - PRIMARY KEY (gesuchnr,benutzer_id) -) type=gemini; - -replace into t1 (gesuchnr,benutzer_id) values (2,1); -replace into t1 (gesuchnr,benutzer_id) values (1,1); -replace into t1 (gesuchnr,benutzer_id) values (1,1); -select * from t1; -drop table t1; - -# -# test delete using hidden_primary_key -# - -create table t1 (a int) type=gemini; -insert into t1 values (1), (2); -delete from t1 where a = 1; -select * from t1; -drop table t1; - -# -# Test auto_increment on sub key -# - -#create table t1 (a char(10) not null, b int not null auto_increment, primary key(a,b)) type=gemini; -#insert into t1 values ("a",1),("b",2),("a",2),("c",1); -#insert into t1 values ("a",NULL),("b",NULL),("c",NULL),("e",NULL); -#insert into t1 (a) values ("a"),("b"),("c"),("d"); -#insert into t1 (a) values ('k'),('d'); -#insert into t1 (a) values ("a"); -#insert into t1 values ("d",last_insert_id()); -#select * from t1; -#drop table t1; - -# -# Test when reading on part of unique key -# -CREATE TABLE t1 ( - user_id int(10) DEFAULT '0' NOT NULL, - name varchar(100), - phone varchar(100), - ref_email varchar(100) DEFAULT '' NOT NULL, - detail varchar(200), - PRIMARY KEY (user_id,ref_email) -)type=gemini; - -INSERT INTO t1 VALUES (10292,'sanjeev','29153373','sansh777@hotmail.com','xxx'),(10292,'shirish','2333604','shirish@yahoo.com','ddsds'),(10292,'sonali','323232','sonali@bolly.com','filmstar'); -select * from t1 where user_id=10292; -INSERT INTO t1 VALUES (10291,'sanjeev','29153373','sansh777@hotmail.com','xxx'),(10293,'shirish','2333604','shirish@yahoo.com','ddsds'); -select * from t1 where user_id=10292; -select * from t1 where user_id>=10292; -select * from t1 where user_id>10292; -select * from t1 where user_id<10292; -drop table t1; - -# -# Test that keys are created in right order -# - Needs ANALYZE TABLE to work - MikeF 2/12/01 -# -#CREATE TABLE t1 (a int not null, b int not null,c int not null, -#key(a),primary key(a,b), unique(c),key(a),unique(b)) type = gemini; -#show index from t1; -#drop table t1; - -# -# Test of ALTER TABLE and gemini tables -# - -#create table t1 (col1 int not null, col2 char(4) not null, primary key(col1)); -#alter table t1 type=gemini; -#insert into t1 values ('1','1'),('5','2'),('2','3'),('3','4'),('4','4'); -#select * from t1; -#update t1 set col2='7' where col1='4'; -#select * from t1; -#alter table t1 add co3 int not null; -#select * from t1; -#update t1 set col2='9' where col1='2'; -#select * from t1; -#drop table t1; - -# -# INSERT INTO gemini tables -# - -create table t1 (a int not null , b int, primary key (a)) type = gemini; -create table t2 (a int not null , b int, primary key (a)) type = myisam; -insert into t1 VALUES (1,3) , (2,3), (3,3); -select * from t1; -insert into t2 select * from t1; -select * from t2; -delete from t1 where b = 3; -select * from t1; -insert into t1 select * from t2; -select * from t1; -select * from t2; -drop table t1,t2; - -# -# Search on unique key -# - -CREATE TABLE t1 ( - id int(11) NOT NULL auto_increment, - ggid varchar(32) binary DEFAULT '' NOT NULL, - email varchar(64) DEFAULT '' NOT NULL, - passwd varchar(32) binary DEFAULT '' NOT NULL, - PRIMARY KEY (id), - UNIQUE ggid (ggid) -) TYPE=gemini; - -insert into t1 (ggid,passwd) values ('test1','xxx'); -insert into t1 (ggid,passwd) values ('test2','yyy'); - -select * from t1 where ggid='test1'; -select * from t1 where passwd='xxx'; -select * from t1 where id=2; -drop table t1; - -# -# ORDER BY on not primary key -# - -#CREATE TABLE t1 ( -# user_name varchar(12), - #password text, - #subscribed char(1), - #user_id int(11) DEFAULT '0' NOT NULL, - #quota bigint(20), - #weight double, - #access_date date, - #access_time time, - #approved datetime, - #dummy_primary_key int(11) NOT NULL auto_increment, - #PRIMARY KEY (dummy_primary_key) -#) TYPE=gemini; -#INSERT INTO t1 VALUES ('user_0','somepassword','N',0,0,0,'2000-09-07','23:06:59','2000-09-07 23:06:59',1); -#INSERT INTO t1 VALUES ('user_1','somepassword','Y',1,1,1,'2000-09-07','23:06:59','2000-09-07 23:06:59',2); -#INSERT INTO t1 VALUES ('user_2','somepassword','N',2,2,1.4142135623731,'2000-09-07','23:06:59','2000-09-07 23:06:59',3); -#INSERT INTO t1 VALUES ('user_3','somepassword','Y',3,3,1.7320508075689,'2000-09-07','23:06:59','2000-09-07 23:06:59',4); -#INSERT INTO t1 VALUES ('user_4','somepassword','N',4,4,2,'2000-09-07','23:06:59','2000-09-07 23:06:59',5); -#select user_name, password , subscribed, user_id, quota, weight, access_date, access_time, approved, dummy_primary_key from t1 order by user_name; -#drop table t1; - -# -# Testing of tables without primary keys -# - -CREATE TABLE t1 ( - id int(11) NOT NULL auto_increment, - parent_id int(11) DEFAULT '0' NOT NULL, - level tinyint(4) DEFAULT '0' NOT NULL, - KEY (id), - KEY parent_id (parent_id), - KEY level (level) -) type=gemini; -INSERT INTO t1 VALUES (1,0,0),(3,1,1),(4,1,1),(8,2,2),(9,2,2),(17,3,2),(22,4,2),(24,4,2),(28,5,2),(29,5,2),(30,5,2),(31,6,2),(32,6,2),(33,6,2),(203,7,2),(202,7,2),(20,3,2),(157,0,0),(193,5,2),(40,7,2),(2,1,1),(15,2,2),(6,1,1),(34,6,2),(35,6,2),(16,3,2),(7,1,1),(36,7,2),(18,3,2),(26,5,2),(27,5,2),(183,4,2),(38,7,2),(25,5,2),(37,7,2),(21,4,2),(19,3,2),(5,1,1); -INSERT INTO t1 values (179,5,2); -update t1 set parent_id=parent_id+100; -select * from t1 where parent_id=102; -update t1 set id=id+1000; -update t1 set id=1024 where id=1009; -select * from t1; -update ignore t1 set id=id+1; # This will change all rows -select * from t1; -update ignore t1 set id=1023 where id=1010; -select * from t1 where parent_id=102; -explain select level from t1 where level=1; -select level,id from t1 where level=1; -select level,id,parent_id from t1 where level=1; -select level,id from t1 where level=1 order by id; -delete from t1 where level=1; -select * from t1; -drop table t1; - -# -# Test of index only reads -# -CREATE TABLE t1 ( - sca_code char(6) NOT NULL, - cat_code char(6) NOT NULL, - sca_desc varchar(50), - lan_code char(2) NOT NULL, - sca_pic varchar(100), - sca_sdesc varchar(50), - sca_sch_desc varchar(16), - PRIMARY KEY (sca_code, cat_code, lan_code) -) type = gemini ; - -INSERT INTO t1 ( sca_code, cat_code, sca_desc, lan_code, sca_pic, sca_sdesc, sca_sch_desc) VALUES ( 'PD', 'J', 'PENDANT', 'EN', NULL, NULL, 'PENDANT'),( 'RI', 'J', 'RING', 'EN', NULL, NULL, 'RING'); -select count(*) from t1 where sca_code = 'PD'; -drop table t1; - -# -# Test of opening table twice -# -CREATE TABLE t1 (a int not null, primary key (a)) type=gemini; -insert into t1 values(1),(2),(3); -select t1.a from t1 natural join t1 as t2 order by t1.a; -drop table t1; - -# -# Test rollback -# - -select "test for rollback"; -create table t1 (n int not null primary key) type=gemini; -set autocommit=0; -insert into t1 values (4); -commit; -insert into t1 values (5); -rollback; -select n, "after rollback" from t1; -insert into t1 values (5); -commit; -select n, "after commit" from t1; -commit; -insert into t1 values (6); -!$1062 insert into t1 values (4); -commit; -select n, "after commit" from t1; -set autocommit=1; -insert into t1 values (7); -!$1062 insert into t1 values (4); -select n from t1; -# nop -rollback; -drop table t1; - -# -# Testing transactions -# - -create table t1 ( id int NOT NULL PRIMARY KEY, nom varchar(64)) type=gemini; -insert into t1 values(1,'first'); -begin; -insert into t1 values(2,'hamdouni'); -select id as afterbegin_id,nom as afterbegin_nom from t1; -rollback; -select id as afterrollback_id,nom as afterrollback_nom from t1; -set autocommit=0; -insert into t1 values(3,'mysql'); -select id as afterautocommit0_id,nom as afterautocommit0_nom from t1; -rollback; -select id as afterrollback_id,nom as afterrollback_nom from t1; -set autocommit=1; -drop table t1; - -# -# Simple not autocommit test -# - -CREATE TABLE t1 (id char(8) not null primary key, val int not null) type=gemini; -insert into t1 values ('pippo', 12); -!$1062 insert into t1 values ('pippo', 12); # Gives error -delete from t1; -delete from t1 where id = 'pippo'; -select * from t1; - -insert into t1 values ('pippo', 12); -set autocommit=0; -delete from t1; -rollback; -select * from t1; -delete from t1; -commit; -select * from t1; -drop table t1; -set autocommit=1; - -# -# The following simple tests failed at some point -# - -CREATE TABLE t1 (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR(64)) TYPE=gemini; -INSERT INTO t1 VALUES (1, 'Jochen'); -select * from t1; -drop table t1; - -CREATE TABLE t1 ( _userid VARCHAR(60) NOT NULL PRIMARY KEY) TYPE=gemini; -set autocommit=0; -INSERT INTO t1 SET _userid='marc@anyware.co.uk'; -COMMIT; -SELECT * FROM t1; -SELECT _userid FROM t1 WHERE _userid='marc@anyware.co.uk'; -drop table t1; -set autocommit=1; - -# -# Test of load data infile -# - -CREATE TABLE if not exists `t1` ( - `f1` int(11) unsigned NOT NULL default '0', - `f2` tinyint(3) unsigned NOT NULL default '0', - `f3` tinyint(3) unsigned NOT NULL default '0', - PRIMARY KEY (`f1`) -) TYPE=Gemini; -lock table t1 write; -load data infile ''../../std_data/gemini.dat' ignore into table t1 fields terminated by ','; -select f1 from t1; -drop table t1; diff --git a/mysql-test/t/isolation.test b/mysql-test/t/isolation.test deleted file mode 100644 index 2a1a0ee79be..00000000000 --- a/mysql-test/t/isolation.test +++ /dev/null @@ -1,209 +0,0 @@ -source include/have_gemini.inc -source include/master-slave.inc; -connection master; -drop table if exists t1; -create table t1 (f1 char(20) not null) type = gemini; -set autocommit=0; - -insert into t1 (f1) values ("test1"), ("bar"); -connection master1; -!$1030 select * from t1; -connection master; -commit; - -drop table if exists t1; -create table t1 (f1 char(20) not null) type = gemini; -insert into t1 (f1) values ("test2"), ("bar"); -connection master1; -set transaction isolation level serializable; ---error 1218 -select * from t1; -connection master; -commit; - -drop table if exists t1; -create table t1 (f1 char(20) not null) type = gemini; -insert into t1 (f1) values ("test3"), ("bar"); -connection master1; -set transaction isolation level read uncommitted; -select * from t1; -connection master; -commit; - -drop table if exists t1; -create table t1 (f1 char(20) not null) type = gemini; -connection master1; -set transaction isolation level read committed; -select * from t1; -connection master; -commit; - -drop table if exists t1; -create table t1 (f1 char(20) not null) type = gemini; -insert into t1 (f1) values ("test4"), ("bar"); -connection master1; -set transaction isolation level repeatable read; -select * from t1; -connection master; -commit; - -drop table if exists t1; -create table t1 (f1 char(20) not null) type = gemini; -set transaction isolation level serializable; -insert into t1 (f1) values ("test5"), ("bar"); -connection master1; -set transaction isolation level serializable; -select * from t1; -connection master; -commit; - -drop table if exists t1; -create table t1 (f1 char(20) not null) type = gemini; -set transaction isolation level serializable; -insert into t1 (f1) values ("test6"), ("bar"); -connection master1; -set transaction isolation level read uncommitted; -select * from t1; -connection master; -commit; - -drop table if exists t1; -create table t1 (f1 char(20) not null) type = gemini; -set transaction isolation level serializable; -insert into t1 (f1) values ("test7"), ("bar"); -connection master1; -set transaction isolation level read committed; -select * from t1; -connection master; -commit; - -drop table if exists t1; -create table t1 (f1 char(20) not null) type = gemini; -set transaction isolation level serializable; -insert into t1 (f1) values ("test8"), ("bar"); -connection master1; -set transaction isolation level repeatable read; -select * from t1; -connection master; -commit; - -drop table if exists t1; -create table t1 (f1 char(20) not null) type = gemini; -set transaction isolation level read uncommitted; -insert into t1 (f1) values ("test9"), ("bar"); -connection master1; -set transaction isolation level serializable; -select * from t1; -connection master; -commit; - -drop table if exists t1; -create table t1 (f1 char(20) not null) type = gemini; -set transaction isolation level read uncommitted; -insert into t1 (f1) values ("test10"), ("bar"); -connection master1; -set transaction isolation level read uncommitted; -select * from t1; -connection master; -commit; - -drop table if exists t1; -create table t1 (f1 char(20) not null) type = gemini; -set transaction isolation level read uncommitted; -insert into t1 (f1) values ("test11"), ("bar"); -connection master1; -set transaction isolation level read committed; -select * from t1; -connection master; -commit; - -drop table if exists t1; -create table t1 (f1 char(20) not null) type = gemini; -set transaction isolation level read uncommitted; -insert into t1 (f1) values ("test12"), ("bar"); -connection master1; -set transaction isolation level repeatable read; -select * from t1; -connection master; -commit; - -drop table if exists t1; -create table t1 (f1 char(20) not null) type = gemini; -set transaction isolation level read committed; -insert into t1 (f1) values ("test13"), ("bar"); -connection master1; -set transaction isolation level serializable; -select * from t1; -connection master; -commit; - -drop table if exists t1; -create table t1 (f1 char(20) not null) type = gemini; -set transaction isolation level read committed; -insert into t1 (f1) values ("test14"), ("bar"); -connection master1; -set transaction isolation level read uncommitted; -select * from t1; -connection master; -commit; - -drop table if exists t1; -create table t1 (f1 char(20) not null) type = gemini; -set transaction isolation level read committed; -insert into t1 (f1) values ("test15"), ("bar"); -connection master1; -set transaction isolation level read committed; -select * from t1; -connection master; -commit; - -drop table if exists t1; -create table t1 (f1 char(20) not null) type = gemini; -set transaction isolation level read committed; -insert into t1 (f1) values ("test16"), ("bar"); -connection master1; -set transaction isolation level repeatable read; -select * from t1; -connection master; -commit; - -drop table if exists t1; -create table t1 (f1 char(20) not null) type = gemini; -set transaction isolation level repeatable read; -insert into t1 (f1) values ("test17"), ("bar"); -connection master1; -set transaction isolation level serializable; -select * from t1; -connection master; -commit; - -drop table if exists t1; -create table t1 (f1 char(20) not null) type = gemini; -set transaction isolation level repeatable read; -insert into t1 (f1) values ("test18"), ("bar"); -connection master1; -set transaction isolation level read uncommitted; -select * from t1; -connection master; -commit; - -drop table if exists t1; -create table t1 (f1 char(20) not null) type = gemini; -set transaction isolation level repeatable read; -insert into t1 (f1) values ("test19"), ("bar"); -connection master1; -set transaction isolation level read committed; -select * from t1; -connection master; -commit; - -drop table if exists t1; -create table t1 (f1 char(20) not null) type = gemini; -set transaction isolation level repeatable read; -insert into t1 (f1) values ("test20"), ("bar"); -connection master1; -set transaction isolation level repeatable read; -select * from t1; -connection master; -commit; -drop table t1; diff --git a/mysql-test/t/rpl_failsafe.test b/mysql-test/t/rpl_failsafe.test new file mode 100644 index 00000000000..f93dbf4b118 --- /dev/null +++ b/mysql-test/t/rpl_failsafe.test @@ -0,0 +1,15 @@ +source include/master-slave.inc; +connect (slave_sec,localhost,root,,test,0,slave.sock-1); +connect (slave_ter,localhost,root,,test,0,slave.sock-2); +connection master; +show variables like 'rpl_recovery_rank'; +show status like 'Rpl_status'; +connection slave; +show variables like 'rpl_recovery_rank'; +show status like 'Rpl_status'; +connection slave_sec; +show variables like 'rpl_recovery_rank'; +show status like 'Rpl_status'; +connection slave_ter; +show variables like 'rpl_recovery_rank'; +show status like 'Rpl_status'; diff --git a/mysys/mf_sort.c b/mysys/mf_sort.c index 754a1deb1a7..383959d0de7 100644 --- a/mysys/mf_sort.c +++ b/mysys/mf_sort.c @@ -25,7 +25,7 @@ void my_string_ptr_sort(void *base, uint items, size_s size) #if INT_MAX > 65536L uchar **ptr=0; - if (size <= 20 && items >= 1000 && + if (size <= 20 && items >= 1000 && items < 100000 && (ptr= (uchar**) my_malloc(items*sizeof(char*),MYF(0)))) { radixsort_for_str_ptr((uchar**) base,items,size,ptr); diff --git a/scripts/make_binary_distribution.sh b/scripts/make_binary_distribution.sh index 35be819cd2e..24dee288706 100644 --- a/scripts/make_binary_distribution.sh +++ b/scripts/make_binary_distribution.sh @@ -66,9 +66,12 @@ for i in extra/comp_err extra/replace extra/perror extra/resolveip \ myisam/myisampack sql/mysqld sql/mysqlbinlog \ client/mysql sql/mysqld client/mysqlshow client/mysqlcheck \ client/mysqladmin client/mysqldump client/mysqlimport client/mysqltest \ + client/mysqlmanagerc client/mysqlmanager-pwgen tools/mysqlmanager \ client/.libs/mysql client/.libs/mysqlshow client/.libs/mysqladmin \ client/.libs/mysqldump client/.libs/mysqlimport client/.libs/mysqltest \ - client/.libs/mysqlcheck + client/.libs/mysqlcheck \ + client/.libs/mysqlmanagerc client/libs/mysqlmanager-pwgen \ + tools/.libs/mysqlmanager do if [ -f $i ] then @@ -88,7 +91,7 @@ do fi done -for i in libmysql/.libs/libmysqlclient.a libmysql/.libs/libmysqlclient.so* libmysql/libmysqlclient.* libmysql_r/.libs/libmysqlclient_r.a libmysql_r/.libs/libmysqlclient_r.so* libmysql_r/libmysqlclient_r.* mysys/libmysys.a strings/libmystrings.a dbug/libdbug.a libmysqld/.libs/libmysqld.a libmysqld/.libs/libmysqld.so* libmysqld/libmysqld.a libmysqld/libmysqld.a +for i in libmysql/.libs/libmysqlclient.a libmysql/.libs/libmysqlclient.so* libmysql/libmysqlclient.* libmysql_r/.libs/libmysqlclient_r.a libmysql_r/.libs/libmysqlclient_r.so* libmysql_r/libmysqlclient_r.* mysys/libmysys.a strings/libmystrings.a dbug/libdbug.a libmysqld/.libs/libmysqld.a libmysqld/.libs/libmysqld.so* libmysqld/libmysqld.a do if [ -f $i ] then diff --git a/scripts/mysql_install_db.sh b/scripts/mysql_install_db.sh index c99126cdf53..0da457582f1 100644 --- a/scripts/mysql_install_db.sh +++ b/scripts/mysql_install_db.sh @@ -294,7 +294,7 @@ fi echo "Installing all prepared tables" if eval "$execdir/mysqld $defaults --bootstrap --skip-grant-tables \ - --basedir=$basedir --datadir=$ldata --skip-innodb --skip-gemini --skip-bdb $args" << END_OF_DATA + --basedir=$basedir --datadir=$ldata --skip-innodb --skip-bdb $args" << END_OF_DATA use mysql; $c_d $i_d diff --git a/sql-bench/test-insert.sh b/sql-bench/test-insert.sh index 655e38b1b0e..9dc3d9aa7c2 100644 --- a/sql-bench/test-insert.sh +++ b/sql-bench/test-insert.sh @@ -917,13 +917,19 @@ print "Time for update_with_key (" . ($opt_loop_count*3) . "): " . timestr(timediff($end_time, $loop_time),"all") . "\n"; $loop_time=new Benchmark; -for ($i=0 ; $i < $opt_loop_count*3 ; $i+=3) +$count=0; +for ($i=1 ; $i < $opt_loop_count*3 ; $i+=3) { $sth = $dbh->do("update bench1 set dummy1='updated' where id=$i") or die $DBI::errstr; + $end_time=new Benchmark; + last if ($estimated=predict_query_time($loop_time,$end_time,\$i,$tests, + $opt_loop_count)); } - -$end_time=new Benchmark; -print "Time for update_with_key_prefix (" . ($opt_loop_count) . "): " . +if ($estimated) +{ print "Estimated time"; } +else +{ print "Time"; } +print " for update_with_key_prefix (" . ($opt_loop_count) . "): " . timestr(timediff($end_time, $loop_time),"all") . "\n"; print "\nTesting update of all rows\n"; diff --git a/sql/Makefile.am b/sql/Makefile.am index a11b1d0c252..4621443f4d2 100644 --- a/sql/Makefile.am +++ b/sql/Makefile.am @@ -21,7 +21,7 @@ MYSQLDATAdir = $(localstatedir) MYSQLSHAREdir = $(pkgdatadir) MYSQLBASEdir= $(prefix) INCLUDES = @MT_INCLUDES@ \ - @bdb_includes@ @innodb_includes@ @gemini_includes@ \ + @bdb_includes@ @innodb_includes@ \ -I$(srcdir)/../include \ -I$(srcdir)/../regex \ -I$(srcdir) -I../include -I. $(openssl_includes) @@ -30,8 +30,7 @@ SUBDIRS = share libexec_PROGRAMS = mysqld noinst_PROGRAMS = gen_lex_hash gen_lex_hash_LDFLAGS = @NOINST_LDFLAGS@ -LDADD = ../isam/libnisam.a \ - ../merge/libmerge.a \ +LDADD = @isam_libs@ \ ../myisam/libmyisam.a \ ../myisammrg/libmyisammrg.a \ ../heap/libheap.a \ @@ -43,7 +42,7 @@ LDADD = ../isam/libnisam.a \ mysqld_LDADD = @MYSQLD_EXTRA_LDFLAGS@ \ @bdb_libs@ @innodb_libs@ @pstack_libs@ \ - @gemini_libs@ @innodb_system_libs@ \ + @innodb_system_libs@ \ $(LDADD) $(CXXLDFLAGS) $(WRAPLIBS) @LIBDL@ @openssl_libs@ noinst_HEADERS = item.h item_func.h item_sum.h item_cmpfunc.h \ item_strfunc.h item_timefunc.h item_uniq.h \ @@ -53,7 +52,7 @@ noinst_HEADERS = item.h item_func.h item_sum.h item_cmpfunc.h \ field.h handler.h \ ha_isammrg.h ha_isam.h ha_myisammrg.h\ ha_heap.h ha_myisam.h ha_berkeley.h ha_innobase.h \ - ha_gemini.h opt_range.h opt_ft.h \ + opt_range.h opt_ft.h \ sql_select.h structs.h table.h sql_udf.h hash_filo.h\ lex.h lex_symbol.h sql_acl.h sql_crypt.h \ log_event.h mini_client.h sql_repl.h slave.h \ @@ -75,14 +74,14 @@ mysqld_SOURCES = sql_lex.cc sql_handler.cc \ time.cc opt_range.cc opt_sum.cc opt_ft.cc \ records.cc filesort.cc handler.cc \ ha_heap.cc ha_myisam.cc ha_myisammrg.cc \ - ha_berkeley.cc ha_innobase.cc ha_gemini.cc \ + ha_berkeley.cc ha_innobase.cc \ ha_isam.cc ha_isammrg.cc \ sql_db.cc sql_table.cc sql_rename.cc sql_crypt.cc \ sql_load.cc mf_iocache.cc field_conv.cc sql_show.cc \ sql_udf.cc sql_analyse.cc sql_analyse.h sql_cache.cc \ slave.cc sql_repl.cc sql_union.cc \ mini_client.cc mini_client_errors.c \ - stacktrace.c + stacktrace.c repl_failsafe.h repl_failsafe.cc gen_lex_hash_SOURCES = gen_lex_hash.cc gen_lex_hash_LDADD = $(LDADD) $(CXXLDFLAGS) diff --git a/sql/field.cc b/sql/field.cc index b34f58439db..85a5076e09a 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -4086,60 +4086,6 @@ const char *Field_blob::unpack(char *to, const char *from) return from+length; } - -#ifdef HAVE_GEMINI_DB -/* Blobs in Gemini tables are stored separately from the rows which contain -** them (except for tiny blobs, which are stored in the row). For all other -** blob types (blob, mediumblob, longblob), the row contains the length of -** the blob data and a blob id. These methods (pack_id, get_id, and -** unpack_id) handle packing and unpacking blob fields in Gemini rows. -*/ -char *Field_blob::pack_id(char *to, const char *from, ulonglong id, uint max_length) -{ - char *save=ptr; - ptr=(char*) from; - ulong length=get_length(); // Length of from string - if (length > max_length) - { - ptr=to; - length=max_length; - store_length(length); // Store max length - ptr=(char*) from; - } - else - memcpy(to,from,packlength); // Copy length - if (length) - { - int8store(to+packlength, id); - } - ptr=save; // Restore org row pointer - return to+packlength+sizeof(id); -} - - -ulonglong Field_blob::get_id(const char *from) -{ - ulonglong id = 0; - ulong length=get_length(from); - if (length) - id=uint8korr(from+packlength); - return id; -} - - -const char *Field_blob::unpack_id(char *to, const char *from, const char *bdata) -{ - memcpy(to,from,packlength); - ulong length=get_length(from); - from+=packlength; - if (length) - memcpy_fixed(to+packlength, &bdata, sizeof(bdata)); - else - bzero(to+packlength,sizeof(bdata)); - return from+sizeof(ulonglong); -} -#endif /* HAVE_GEMINI_DB */ - /* Keys for blobs are like keys on varchars */ int Field_blob::pack_cmp(const char *a, const char *b, uint key_length) diff --git a/sql/field.h b/sql/field.h index 8f60b7e008b..47bf5cc02ef 100644 --- a/sql/field.h +++ b/sql/field.h @@ -883,21 +883,6 @@ public: } char *pack(char *to, const char *from, uint max_length= ~(uint) 0); const char *unpack(char *to, const char *from); -#ifdef HAVE_GEMINI_DB - char *pack_id(char *to, const char *from, ulonglong id, - uint max_length= ~(uint) 0); - ulonglong get_id(const char *from); - const char *unpack_id(char *to, const char *from, const char *bdata); - inline void get_ptr_from_key_image(char **str,char *key_str) - { - *str = key_str + sizeof(uint16); - } - inline uint get_length_from_key_image(char *key_str) - { - return uint2korr(key_str); - } - enum_field_types blobtype() { return (packlength == 1 ? FIELD_TYPE_TINY_BLOB : FIELD_TYPE_BLOB);} -#endif char *pack_key(char *to, const char *from, uint max_length); char *pack_key_from_key_image(char* to, const char *from, uint max_length); int pack_cmp(const char *a, const char *b, uint key_length); diff --git a/sql/ha_gemini.cc b/sql/ha_gemini.cc deleted file mode 100644 index a60841c3fe6..00000000000 --- a/sql/ha_gemini.cc +++ /dev/null @@ -1,3630 +0,0 @@ -/* Copyright (C) 2000 MySQL AB & NuSphere Corporation - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - -/* This file is based on ha_berkeley.cc */ - -#ifdef __GNUC__ -#pragma implementation // gcc: Class implementation -#endif - -#include "mysql_priv.h" -#ifdef HAVE_GEMINI_DB -#include "ha_gemini.h" -#include "dbconfig.h" -#include "dsmpub.h" -#include "recpub.h" -#include "vststat.h" - -#include <m_ctype.h> -#include <myisampack.h> -#include <m_string.h> -#include <assert.h> -#include <hash.h> -#include <stdarg.h> -#include "geminikey.h" - -#define gemini_msg MSGD_CALLBACK - -pthread_mutex_t gem_mutex; - -static HASH gem_open_tables; -static GEM_SHARE *get_share(const char *table_name, TABLE *table); -static int free_share(GEM_SHARE *share, bool mutex_is_locked); -static byte* gem_get_key(GEM_SHARE *share,uint *length, - my_bool not_used __attribute__((unused))); -static void gemini_lock_table_overflow_error(dsmContext_t *pcontext); - -const char *ha_gemini_ext=".gmd"; -const char *ha_gemini_idx_ext=".gmi"; - -bool gemini_skip=0; -long gemini_options = 0; -long gemini_buffer_cache; -long gemini_io_threads; -long gemini_log_cluster_size; -long gemini_locktablesize; -long gemini_lock_wait_timeout; -long gemini_spin_retries; -long gemini_connection_limit; -char *gemini_basedir; - -const char gemini_dbname[] = "gemini"; -dsmContext_t *pfirstContext = NULL; - -ulong gemini_recovery_options = GEMINI_RECOVERY_FULL; -/* bits in gemini_recovery_options */ -const char *gemini_recovery_names[] = -{ "FULL", "NONE", "FORCE" }; -TYPELIB gemini_recovery_typelib= {array_elements(gemini_recovery_names)-1,"", - gemini_recovery_names}; - -const int start_of_name = 2; /* Name passed as ./<db>/<table-name> - and we're not interested in the ./ */ -static const int keyBufSize = MAXKEYSZ + FULLKEYHDRSZ + MAX_REF_PARTS + 16; - -static int gemini_tx_begin(THD *thd); -static void print_msg(THD *thd, const char *table_name, const char *op_name, - const char *msg_type, const char *fmt, ...); - -static int gemini_helper_threads(dsmContext_t *pContext); -pthread_handler_decl(gemini_watchdog,arg ); -pthread_handler_decl(gemini_rl_writer,arg ); -pthread_handler_decl(gemini_apw,arg); - -/* General functions */ - -bool gemini_init(void) -{ - dsmStatus_t rc = 0; - char pmsgsfile[MAXPATHN]; - - DBUG_ENTER("gemini_init"); - - gemini_basedir=mysql_home; - /* If datadir isn't set, bail out */ - if (*mysql_real_data_home == '\0') - { - goto badret; - } - - /* dsmContextCreate and dsmContextSetString(DSM_TAGDB_DBNAME) must - ** be the first DSM calls we make so that we can log any errors which - ** occur in subsequent DSM calls. DO NOT INSERT ANY DSM CALLS IN - ** BETWEEN THIS COMMENT AND THE COMMENT THAT SAYS "END OF CODE..." - */ - /* Gotta connect to the database regardless of the operation */ - rc = dsmContextCreate(&pfirstContext); - if( rc != 0 ) - { - gemini_msg(pfirstContext, "dsmContextCreate failed %l",rc); - goto badret; - } - /* This call will also open the log file */ - rc = dsmContextSetString(pfirstContext, DSM_TAGDB_DBNAME, - strlen(gemini_dbname), (TEXT *)gemini_dbname); - if( rc != 0 ) - { - gemini_msg(pfirstContext, "Dbname tag failed %l", rc); - goto badret; - } - /* END OF CODE NOT TO MESS WITH */ - - fn_format(pmsgsfile, GEM_MSGS_FILE, language, ".db", 2 | 4); - rc = dsmContextSetString(pfirstContext, DSM_TAGDB_MSGS_FILE, - strlen(pmsgsfile), (TEXT *)pmsgsfile); - if( rc != 0 ) - { - gemini_msg(pfirstContext, "MSGS_DIR tag failed %l", rc); - goto badret; - } - - strxmov(pmsgsfile, gemini_basedir, GEM_SYM_FILE, NullS); - rc = dsmContextSetString(pfirstContext, DSM_TAGDB_SYMFILE, - strlen(pmsgsfile), (TEXT *)pmsgsfile); - if( rc != 0 ) - { - gemini_msg(pfirstContext, "SYMFILE tag failed %l", rc); - goto badret; - } - - rc = dsmContextSetLong(pfirstContext,DSM_TAGDB_ACCESS_TYPE,DSM_ACCESS_STARTUP); - if ( rc != 0 ) - { - gemini_msg(pfirstContext, "ACCESS TAG set failed %l",rc); - goto badret; - } - rc = dsmContextSetLong(pfirstContext,DSM_TAGDB_ACCESS_ENV, DSM_SQL_ENGINE); - if( rc != 0 ) - { - gemini_msg(pfirstContext, "ACCESS_ENV set failed %l",rc); - goto badret; - } - - rc = dsmContextSetString(pfirstContext, DSM_TAGDB_DATADIR, - strlen(mysql_real_data_home), - (TEXT *)mysql_real_data_home); - if( rc != 0 ) - { - gemini_msg(pfirstContext, "Datadir tag failed %l", rc); - goto badret; - } - - rc = dsmContextSetLong(pfirstContext, DSM_TAGDB_MAX_USERS, - gemini_connection_limit); - if(rc != 0) - { - gemini_msg(pfirstContext, "MAX_USERS tag set failed %l",rc); - goto badret; - } - - rc = dsmContextSetLong(pfirstContext, DSM_TAGDB_DEFAULT_LOCK_TIMEOUT, - gemini_lock_wait_timeout); - if(rc != 0) - { - gemini_msg(pfirstContext, "MAX_LOCK_ENTRIES tag set failed %l",rc); - goto badret; - } - - rc = dsmContextSetLong(pfirstContext, DSM_TAGDB_MAX_LOCK_ENTRIES, - gemini_locktablesize); - if(rc != 0) - { - gemini_msg(pfirstContext, "MAX_LOCK_ENTRIES tag set failed %l",rc); - goto badret; - } - - rc = dsmContextSetLong(pfirstContext, DSM_TAGDB_SPIN_AMOUNT, - gemini_spin_retries); - if(rc != 0) - { - gemini_msg(pfirstContext, "SPIN_AMOUNT tag set failed %l",rc); - goto badret; - } - - /* blocksize is hardcoded to 8K. Buffer cache is in bytes - need to convert this to 8K blocks */ - gemini_buffer_cache = gemini_buffer_cache / 8192; - rc = dsmContextSetLong(pfirstContext, DSM_TAGDB_DB_BUFFERS, - gemini_buffer_cache); - if(rc != 0) - { - gemini_msg(pfirstContext, "DB_BUFFERS tag set failed %l",rc); - goto badret; - } - - rc = dsmContextSetLong(pfirstContext, DSM_TAGDB_FLUSH_AT_COMMIT, - ((gemini_options & GEMOPT_FLUSH_LOG) ? 0 : 1)); - if(rc != 0) - { - gemini_msg(pfirstContext, "FLush_Log_At_Commit tag set failed %l",rc); - goto badret; - } - rc = dsmContextSetLong(pfirstContext, DSM_TAGDB_DIRECT_IO, - ((gemini_options & GEMOPT_UNBUFFERED_IO) ? 1 : 0)); - if(rc != 0) - { - gemini_msg(pfirstContext, "DIRECT_IO tag set failed %l",rc); - goto badret; - } - - rc = dsmContextSetLong(pfirstContext, DSM_TAGDB_CRASH_PROTECTION, - ((gemini_recovery_options & GEMINI_RECOVERY_FULL) ? 1 : 0)); - if(rc != 0) - { - gemini_msg(pfirstContext, "CRASH_PROTECTION tag set failed %l",rc); - goto badret; - } - - if (gemini_recovery_options & GEMINI_RECOVERY_FORCE) - { - rc = dsmContextSetLong(pfirstContext, DSM_TAGDB_FORCE_ACCESS, 1); - if(rc != 0) - { - printf("CRASH_PROTECTION tag set failed %ld",rc); - goto badret; - } - } - - /* cluster size will come in bytes, need to convert it to - 16 K units. */ - gemini_log_cluster_size = (gemini_log_cluster_size + 16383) / 16384; - rc = dsmContextSetLong(pfirstContext, DSM_TAGDB_BI_CLUSTER_SIZE, - gemini_log_cluster_size); - - if(rc != 0) - { - gemini_msg(pfirstContext, "CRASH_PROTECTION tag set failed %l",rc); - goto badret; - } - - rc = dsmUserConnect(pfirstContext,(TEXT *)"Multi-user", - DSM_DB_OPENDB | DSM_DB_OPENFILE); - if( rc != 0 ) - { - /* Message is output in dbenv() */ - goto badret; - } - /* Set access to shared for subsequent user connects */ - rc = dsmContextSetLong(pfirstContext,DSM_TAGDB_ACCESS_TYPE,DSM_ACCESS_SHARED); - - rc = gemini_helper_threads(pfirstContext); - - - (void) hash_init(&gem_open_tables,32,0,0, - (hash_get_key) gem_get_key,0,0); - pthread_mutex_init(&gem_mutex,NULL); - - - DBUG_RETURN(0); - -badret: - gemini_skip = 1; - DBUG_RETURN(0); -} - -static int gemini_helper_threads(dsmContext_t *pContext) -{ - int rc = 0; - int i; - pthread_attr_t thr_attr; - - pthread_t hThread; - DBUG_ENTER("gemini_helper_threads"); - - (void) pthread_attr_init(&thr_attr); -#if !defined(HAVE_DEC_3_2_THREADS) - pthread_attr_setscope(&thr_attr,PTHREAD_SCOPE_SYSTEM); - (void) pthread_attr_setdetachstate(&thr_attr,PTHREAD_CREATE_DETACHED); - pthread_attr_setstacksize(&thr_attr,32768); -#endif - rc = pthread_create (&hThread, &thr_attr, gemini_watchdog, (void *)pContext); - if (rc) - { - gemini_msg(pContext, "Can't Create gemini watchdog thread"); - goto done; - } - if(!gemini_io_threads) - goto done; - - rc = pthread_create(&hThread, &thr_attr, gemini_rl_writer, (void *)pContext); - if(rc) - { - gemini_msg(pContext, "Can't create Gemini recovery log writer thread"); - goto done; - } - - for(i = gemini_io_threads - 1;i;i--) - { - rc = pthread_create(&hThread, &thr_attr, gemini_apw, (void *)pContext); - if(rc) - { - gemini_msg(pContext, "Can't create Gemini database page writer thread"); - goto done; - } - } -done: - - DBUG_RETURN(rc); -} - -pthread_handler_decl(gemini_watchdog,arg ) -{ - int rc = 0; - dsmContext_t *pcontext = (dsmContext_t *)arg; - dsmContext_t *pmyContext = NULL; - - - rc = dsmContextCopy(pcontext,&pmyContext, DSMCONTEXTDB); - if( rc != 0 ) - { - gemini_msg(pcontext, "dsmContextCopy failed for Gemini watchdog %d",rc); - - return 0; - } - rc = dsmUserConnect(pmyContext,NULL,0); - - if( rc != 0 ) - { - gemini_msg(pcontext, "dsmUserConnect failed for Gemini watchdog %d",rc); - - return 0; - } - - my_thread_init(); - pthread_detach_this_thread(); - - while(rc == 0) - { - rc = dsmDatabaseProcessEvents(pmyContext); - if(!rc) - rc = dsmWatchdog(pmyContext); - sleep(1); - } - rc = dsmUserDisconnect(pmyContext,0); - my_thread_end(); - return 0; -} - -pthread_handler_decl(gemini_rl_writer,arg ) -{ - int rc = 0; - dsmContext_t *pcontext = (dsmContext_t *)arg; - dsmContext_t *pmyContext = NULL; - - - rc = dsmContextCopy(pcontext,&pmyContext, DSMCONTEXTDB); - if( rc != 0 ) - { - gemini_msg(pcontext, "dsmContextCopy failed for Gemini recovery log writer %d",rc); - - return 0; - } - rc = dsmUserConnect(pmyContext,NULL,0); - - if( rc != 0 ) - { - gemini_msg(pcontext, "dsmUserConnect failed for Gemini recovery log writer %d",rc); - - return 0; - } - - my_thread_init(); - pthread_detach_this_thread(); - - while(rc == 0) - { - rc = dsmRLwriter(pmyContext); - } - rc = dsmUserDisconnect(pmyContext,0); - my_thread_end(); - return 0; -} - -pthread_handler_decl(gemini_apw,arg ) -{ - int rc = 0; - dsmContext_t *pcontext = (dsmContext_t *)arg; - dsmContext_t *pmyContext = NULL; - - my_thread_init(); - pthread_detach_this_thread(); - - rc = dsmContextCopy(pcontext,&pmyContext, DSMCONTEXTDB); - if( rc != 0 ) - { - gemini_msg(pcontext, "dsmContextCopy failed for Gemini page writer %d",rc); - my_thread_end(); - return 0; - } - rc = dsmUserConnect(pmyContext,NULL,0); - - if( rc != 0 ) - { - gemini_msg(pcontext, "dsmUserConnect failed for Gemini page writer %d",rc); - my_thread_end(); - return 0; - } - - while(rc == 0) - { - rc = dsmAPW(pmyContext); - } - rc = dsmUserDisconnect(pmyContext,0); - my_thread_end(); - return 0; -} - -int gemini_set_option_long(int optid, long optval) -{ - dsmStatus_t rc = 0; - - switch (optid) - { - case GEM_OPTID_SPIN_RETRIES: - /* If we don't have a context yet, skip the set and just save the - ** value in gemini_spin_retries for a later gemini_init(). This - ** may not ever happen, but we're covered if it does. - */ - if (pfirstContext) - { - rc = dsmContextSetLong(pfirstContext, DSM_TAGDB_SPIN_AMOUNT, - optval); - } - if (rc) - { - gemini_msg(pfirstContext, "SPIN_AMOUNT tag set failed %l",rc); - } - else - { - gemini_spin_retries = optval; - } - break; - } - - return rc; -} - -static int gemini_connect(THD *thd) -{ - DBUG_ENTER("gemini_connect"); - - dsmStatus_t rc; - - rc = dsmContextCopy(pfirstContext,(dsmContext_t **)&thd->gemini.context, - DSMCONTEXTDB); - if( rc != 0 ) - { - gemini_msg(pfirstContext, "dsmContextCopy failed %l",rc); - - return(rc); - } - rc = dsmUserConnect((dsmContext_t *)thd->gemini.context,NULL,0); - - if( rc != 0 ) - { - gemini_msg(pfirstContext, "dsmUserConnect failed %l",rc); - - return(rc); - } - rc = (dsmStatus_t)gemini_tx_begin(thd); - - DBUG_RETURN(rc); -} - -void gemini_disconnect(THD *thd) -{ - dsmStatus_t rc; - - if(thd->gemini.context) - { - rc = dsmUserDisconnect((dsmContext_t *)thd->gemini.context,0); - } - return; -} - -bool gemini_end(void) -{ - dsmStatus_t rc; - THD *thd; - - DBUG_ENTER("gemini_end"); - - hash_free(&gem_open_tables); - pthread_mutex_destroy(&gem_mutex); - if(pfirstContext) - { - rc = dsmShutdownSet(pfirstContext, DSM_SHUTDOWN_NORMAL); - sleep(2); - rc = dsmContextSetLong(pfirstContext,DSM_TAGDB_ACCESS_TYPE,DSM_ACCESS_STARTUP); - rc = dsmShutdown(pfirstContext, DSMNICEBIT,DSMNICEBIT); - } - DBUG_RETURN(0); -} - -bool gemini_flush_logs() -{ - DBUG_ENTER("gemini_flush_logs"); - - DBUG_RETURN(0); -} - -static int gemini_tx_begin(THD *thd) -{ - dsmStatus_t rc; - DBUG_ENTER("gemini_tx_begin"); - - thd->gemini.savepoint = 1; - - rc = dsmTransaction((dsmContext_t *)thd->gemini.context, - &thd->gemini.savepoint,DSMTXN_START,0,NULL); - if(!rc) - thd->gemini.needSavepoint = 1; - - thd->gemini.tx_isolation = thd->tx_isolation; - - DBUG_PRINT("trans",("beginning transaction")); - DBUG_RETURN(rc); -} - -int gemini_commit(THD *thd) -{ - dsmStatus_t rc; - LONG txNumber = 0; - - DBUG_ENTER("gemini_commit"); - - if(!thd->gemini.context) - DBUG_RETURN(0); - - rc = dsmTransaction((dsmContext_t *)thd->gemini.context, - 0,DSMTXN_COMMIT,0,NULL); - if(!rc) - rc = gemini_tx_begin(thd); - - thd->gemini.lock_count = 0; - - DBUG_PRINT("trans",("ending transaction")); - DBUG_RETURN(rc); -} - -int gemini_rollback(THD *thd) -{ - dsmStatus_t rc; - LONG txNumber; - - DBUG_ENTER("gemini_rollback"); - DBUG_PRINT("trans",("aborting transaction")); - - if(!thd->gemini.context) - DBUG_RETURN(0); - - thd->gemini.savepoint = 0; - rc = dsmTransaction((dsmContext_t *)thd->gemini.context, - &thd->gemini.savepoint,DSMTXN_ABORT,0,NULL); - if(!rc) - rc = gemini_tx_begin(thd); - - thd->gemini.lock_count = 0; - - DBUG_RETURN(rc); -} - -int gemini_rollback_to_savepoint(THD *thd) -{ - dsmStatus_t rc = 0; - DBUG_ENTER("gemini_rollback_to_savepoint"); - if(thd->gemini.savepoint > 1) - { - rc = dsmTransaction((dsmContext_t *)thd->gemini.context, - &thd->gemini.savepoint,DSMTXN_UNSAVE,0,NULL); - } - DBUG_RETURN(rc); -} - -int gemini_recovery_logging(THD *thd, bool on) -{ - int error; - int noLogging; - - if(!thd->gemini.context) - return 0; - - if(on) - noLogging = 0; - else - noLogging = 1; - - error = dsmContextSetLong((dsmContext_t *)thd->gemini.context, - DSM_TAGCONTEXT_NO_LOGGING,noLogging); - return error; -} - -/* gemDataType - translates from mysql data type constant to gemini - key services data type contstant */ -int gemDataType ( int mysqlType ) -{ - switch (mysqlType) - { - case FIELD_TYPE_LONG: - case FIELD_TYPE_TINY: - case FIELD_TYPE_SHORT: - case FIELD_TYPE_TIMESTAMP: - case FIELD_TYPE_LONGLONG: - case FIELD_TYPE_INT24: - case FIELD_TYPE_DATE: - case FIELD_TYPE_TIME: - case FIELD_TYPE_DATETIME: - case FIELD_TYPE_YEAR: - case FIELD_TYPE_NEWDATE: - case FIELD_TYPE_ENUM: - case FIELD_TYPE_SET: - return GEM_INT; - case FIELD_TYPE_DECIMAL: - return GEM_DECIMAL; - case FIELD_TYPE_FLOAT: - return GEM_FLOAT; - case FIELD_TYPE_DOUBLE: - return GEM_DOUBLE; - case FIELD_TYPE_TINY_BLOB: - return GEM_TINYBLOB; - case FIELD_TYPE_MEDIUM_BLOB: - return GEM_MEDIUMBLOB; - case FIELD_TYPE_LONG_BLOB: - return GEM_LONGBLOB; - case FIELD_TYPE_BLOB: - return GEM_BLOB; - case FIELD_TYPE_VAR_STRING: - case FIELD_TYPE_STRING: - return GEM_CHAR; - } - return -1; -} - -/***************************************************************************** -** Gemini tables -*****************************************************************************/ - -const char **ha_gemini::bas_ext() const -{ static const char *ext[]= { ha_gemini_ext, ha_gemini_idx_ext, NullS }; - return ext; -} - - -int ha_gemini::open(const char *name, int mode, uint test_if_locked) -{ - dsmObject_t tableId = 0; - THD *thd; - char name_buff[FN_REFLEN]; - char tabname_buff[FN_REFLEN]; - char dbname_buff[FN_REFLEN]; - unsigned i,nameLen; - LONG txNumber; - dsmStatus_t rc; - - DBUG_ENTER("ha_gemini::open"); - - thd = current_thd; - /* Init shared structure */ - if (!(share=get_share(name,table))) - { - DBUG_RETURN(1); /* purecov: inspected */ - } - thr_lock_data_init(&share->lock,&lock,(void*) 0); - - ref_length = sizeof(dsmRecid_t); - - if(thd->gemini.context == NULL) - { - /* Need to get this thread a connection into the database */ - rc = gemini_connect(thd); - if(rc) - return rc; - } - if (!(rec_buff=(byte*)my_malloc(table->rec_buff_length, - MYF(MY_WME)))) - { - DBUG_RETURN(1); - } - - /* separate out the name of the table and the database (a VST must be - ** created in the mysql database) - */ - rc = gemini_parse_table_name(name, dbname_buff, tabname_buff); - if (rc == 0) - { - if (strcmp(dbname_buff, "mysql") == 0) - { - tableId = gemini_is_vst(tabname_buff); - } - } - sprintf(name_buff, "%s.%s", dbname_buff, tabname_buff); - - /* if it's not a VST, get the table number the regular way */ - if (!tableId) - { - rc = dsmObjectNameToNum((dsmContext_t *)thd->gemini.context, - (dsmText_t *)name_buff, - &tableId); - if (rc) - { - gemini_msg((dsmContext_t *)thd->gemini.context, - "Unable to find table number for %s", name_buff); - DBUG_RETURN(rc); - } - } - tableNumber = tableId; - - if(!rc) - rc = index_open(name_buff); - - fixed_length_row=!(table->db_create_options & HA_OPTION_PACK_RECORD); - key_read = 0; - using_ignore = 0; - - /* Get the gemini table status -- we want to know if the table - crashed while being in the midst of a repair operation */ - rc = dsmTableStatus((dsmContext_t *)thd->gemini.context, - tableNumber,&tableStatus); - if(tableStatus == DSM_OBJECT_IN_REPAIR) - tableStatus = HA_ERR_CRASHED; - - pthread_mutex_lock(&share->mutex); - share->use_count++; - pthread_mutex_unlock(&share->mutex); - - if (table->blob_fields) - { - /* Allocate room for the blob ids from an unpacked row. Note that - ** we may not actually need all of this space because tiny blobs - ** are stored in the packed row, not in a separate storage object - ** like larger blobs. But we allocate an entry for all blobs to - ** keep the code simpler. - */ - pBlobDescs = (gemBlobDesc_t *)my_malloc( - table->blob_fields * sizeof(gemBlobDesc_t), - MYF(MY_WME | MY_ZEROFILL)); - } - else - { - pBlobDescs = 0; - } - - get_index_stats(thd); - info(HA_STATUS_CONST); - - DBUG_RETURN (rc); -} - -/* Look up and store the object numbers for the indexes on this table */ -int ha_gemini::index_open(char *tableName) -{ - dsmStatus_t rc = 0; - int nameLen; - - DBUG_ENTER("ha_gemini::index_open"); - if(table->keys) - { - THD *thd = current_thd; - dsmObject_t objectNumber; - if (!(pindexNumbers=(dsmIndex_t *)my_malloc(table->keys*sizeof(dsmIndex_t), - MYF(MY_WME)))) - { - DBUG_RETURN(1); - } - nameLen = strlen(tableName); - tableName[nameLen] = '.'; - nameLen++; - - for( uint i = 0; i < table->keys && !rc; i++) - { - strcpy(&tableName[nameLen],table->key_info[i].name); - rc = dsmObjectNameToNum((dsmContext_t *)thd->gemini.context, - (dsmText_t *)tableName, - &objectNumber); - if (rc) - { - gemini_msg((dsmContext_t *)thd->gemini.context, - "Unable to file Index number for %s", tableName); - DBUG_RETURN(rc); - } - pindexNumbers[i] = objectNumber; - } - } - else - pindexNumbers = 0; - - DBUG_RETURN(rc); -} - -int ha_gemini::close(void) -{ - DBUG_ENTER("ha_gemini::close"); - my_free((char*)rec_buff,MYF(MY_ALLOW_ZERO_PTR)); - rec_buff = 0; - my_free((char *)pindexNumbers,MYF(MY_ALLOW_ZERO_PTR)); - pindexNumbers = 0; - - if (pBlobDescs) - { - for (uint i = 0; i < table->blob_fields; i++) - { - my_free((char*)pBlobDescs[i].pBlob, MYF(MY_ALLOW_ZERO_PTR)); - } - my_free((char *)pBlobDescs, MYF(0)); - pBlobDescs = 0; - } - - DBUG_RETURN(free_share(share, 0)); -} - - -int ha_gemini::write_row(byte * record) -{ - int error = 0; - dsmRecord_t dsmRecord; - THD *thd; - - DBUG_ENTER("write_row"); - - if(tableStatus == HA_ERR_CRASHED) - DBUG_RETURN(tableStatus); - - thd = current_thd; - - statistic_increment(ha_write_count,&LOCK_status); - if (table->time_stamp) - update_timestamp(record+table->time_stamp-1); - - if(thd->gemini.needSavepoint || using_ignore) - { - thd->gemini.savepoint++; - error = dsmTransaction((dsmContext_t *)thd->gemini.context, - &thd->gemini.savepoint, - DSMTXN_SAVE, 0, 0); - if (error) - DBUG_RETURN(error); - thd->gemini.needSavepoint = 0; - } - - if (table->next_number_field && record == table->record[0]) - { - if(thd->next_insert_id) - { - ULONG64 nr; - /* A set insert-id statement so set the auto-increment value if this - value is higher than it's current value */ - error = dsmTableAutoIncrement((dsmContext_t *)thd->gemini.context, - tableNumber, (ULONG64 *)&nr,1); - if(thd->next_insert_id > nr) - { - error = dsmTableAutoIncrementSet((dsmContext_t *)thd->gemini.context, - tableNumber, - (ULONG64)thd->next_insert_id); - } - } - - update_auto_increment(); - } - - dsmRecord.table = tableNumber; - dsmRecord.maxLength = table->rec_buff_length; - - if ((error=pack_row((byte **)&dsmRecord.pbuffer, (int *)&dsmRecord.recLength, - record, FALSE))) - { - DBUG_RETURN(error); - } - - error = dsmRecordCreate((dsmContext_t *)thd->gemini.context, - &dsmRecord,0); - - if(!error) - { - error = handleIndexEntries(record, dsmRecord.recid,KEY_CREATE); - if(error == HA_ERR_FOUND_DUPP_KEY && using_ignore) - { - dsmStatus_t rc; - rc = dsmTransaction((dsmContext_t *)thd->gemini.context, - &thd->gemini.savepoint,DSMTXN_UNSAVE,0,NULL); - thd->gemini.needSavepoint = 1; - } - } - if(error == DSM_S_RQSTREJ) - error = HA_ERR_LOCK_WAIT_TIMEOUT; - - DBUG_RETURN(error); -} - -longlong ha_gemini::get_auto_increment() -{ - longlong nr; - int error; - int update; - THD *thd=current_thd; - - if(thd->lex.sql_command == SQLCOM_SHOW_TABLES) - update = 0; - else - update = 1; - - error = dsmTableAutoIncrement((dsmContext_t *)thd->gemini.context, - tableNumber, (ULONG64 *)&nr, - update); - return nr; -} - -/* Put or delete index entries for a row */ -int ha_gemini::handleIndexEntries(const byte * record, dsmRecid_t recid, - enum_key_string_options option) -{ - dsmStatus_t rc = 0; - - DBUG_ENTER("handleIndexEntries"); - - for (uint i = 0; i < table->keys && rc == 0; i++) - { - rc = handleIndexEntry(record, recid,option, i); - } - DBUG_RETURN(rc); -} - -int ha_gemini::handleIndexEntry(const byte * record, dsmRecid_t recid, - enum_key_string_options option,uint keynr) -{ - dsmStatus_t rc = 0; - KEY *key_info; - int keyStringLen; - bool thereIsAnull; - THD *thd; - - AUTOKEY(theKey,keyBufSize); - - DBUG_ENTER("handleIndexEntry"); - - thd = current_thd; - key_info=table->key_info+keynr; - thereIsAnull = FALSE; - rc = createKeyString(record, key_info, theKey.akey.keystr, - sizeof(theKey.apad),&keyStringLen, - (short)pindexNumbers[keynr], - &thereIsAnull); - if(!rc) - { - theKey.akey.index = pindexNumbers[keynr]; - theKey.akey.keycomps = (COUNT)key_info->key_parts; - - /* We have to subtract three here since cxKeyPrepare - expects that the three lead bytes of the header are - not counted in this length -- But cxKeyPrepare also - expects that these three bytes are present in the keystr */ - theKey.akey.keyLen = (COUNT)keyStringLen - FULLKEYHDRSZ; - theKey.akey.unknown_comp = (dsmBoolean_t)thereIsAnull; - theKey.akey.word_index = 0; - theKey.akey.descending_key =0; - if(option == KEY_CREATE) - { - rc = dsmKeyCreate((dsmContext_t *)thd->gemini.context, &theKey.akey, - (dsmTable_t)tableNumber, recid, NULL); - if(rc == DSM_S_IXDUPKEY) - { - last_dup_key=keynr; - rc = HA_ERR_FOUND_DUPP_KEY; - } - } - else if(option == KEY_DELETE) - { - rc = dsmKeyDelete((dsmContext_t *)thd->gemini.context, &theKey.akey, - (dsmTable_t)tableNumber, recid, 0, NULL); - } - else - { - /* KEY_CHECK */ - dsmCursid_t aCursorId; - int error; - - rc = dsmCursorCreate((dsmContext_t *)thd->gemini.context, - (dsmTable_t)tableNumber, - (dsmIndex_t)pindexNumbers[keynr], - &aCursorId,NULL); - - rc = dsmCursorFind((dsmContext_t *)thd->gemini.context, - &aCursorId,&theKey.akey,NULL,DSMDBKEY, - DSMFINDFIRST,DSM_LK_SHARE,0, - &lastRowid,0); - error = dsmCursorDelete((dsmContext_t *)thd->gemini.context, - &aCursorId, 0); - - } - } - DBUG_RETURN(rc); -} - -int ha_gemini::createKeyString(const byte * record, KEY *pkeyinfo, - unsigned char *pkeyBuf, int bufSize, - int *pkeyStringLen, - short geminiIndexNumber, - bool *thereIsAnull) -{ - dsmStatus_t rc = 0; - int componentLen; - int fieldType; - int isNull; - uint key_part_length; - - KEY_PART_INFO *key_part; - - DBUG_ENTER("createKeyString"); - - rc = gemKeyInit(pkeyBuf,pkeyStringLen, geminiIndexNumber); - - for(uint i = 0; i < pkeyinfo->key_parts && rc == 0; i++) - { - unsigned char *pos; - - key_part = pkeyinfo->key_part + i; - key_part_length = key_part->length; - fieldType = gemDataType(key_part->field->type()); - switch (fieldType) - { - case GEM_CHAR: - { - /* Save the current ptr to the field in case we're building a key - to remove an old key value when an indexed character column - gets updated. */ - char *ptr = key_part->field->ptr; - key_part->field->ptr = (char *)record + key_part->offset; - key_part->field->sort_string((char*)rec_buff, key_part->length); - key_part->field->ptr = ptr; - pos = (unsigned char *)rec_buff; - } - break; - - case GEM_TINYBLOB: - case GEM_BLOB: - case GEM_MEDIUMBLOB: - case GEM_LONGBLOB: - ((Field_blob*)key_part->field)->get_ptr((char**)&pos); - key_part_length = ((Field_blob*)key_part->field)->get_length( - (char*)record + key_part->offset); - break; - - default: - pos = (unsigned char *)record + key_part->offset; - break; - } - - isNull = record[key_part->null_offset] & key_part->null_bit; - if(isNull) - *thereIsAnull = TRUE; - - rc = gemFieldToIdxComponent(pos, - (unsigned long) key_part_length, - fieldType, - isNull , - key_part->field->flags & UNSIGNED_FLAG, - pkeyBuf + *pkeyStringLen, - bufSize, - &componentLen); - *pkeyStringLen += componentLen; - } - DBUG_RETURN(rc); -} - - -int ha_gemini::update_row(const byte * old_record, byte * new_record) -{ - int error = 0; - dsmRecord_t dsmRecord; - unsigned long savepoint; - THD *thd = current_thd; - DBUG_ENTER("update_row"); - - statistic_increment(ha_update_count,&LOCK_status); - if (table->time_stamp) - update_timestamp(new_record+table->time_stamp-1); - - if(thd->gemini.needSavepoint || using_ignore) - { - thd->gemini.savepoint++; - error = dsmTransaction((dsmContext_t *)thd->gemini.context, - &thd->gemini.savepoint, - DSMTXN_SAVE, 0, 0); - if (error) - DBUG_RETURN(error); - thd->gemini.needSavepoint = 0; - } - for (uint keynr=0 ; keynr < table->keys ; keynr++) - { - if(key_cmp(keynr,old_record, new_record,FALSE)) - { - error = handleIndexEntry(old_record,lastRowid,KEY_DELETE,keynr); - if(error) - DBUG_RETURN(error); - error = handleIndexEntry(new_record, lastRowid, KEY_CREATE, keynr); - if(error) - { - if (using_ignore && error == HA_ERR_FOUND_DUPP_KEY) - { - dsmStatus_t rc; - rc = dsmTransaction((dsmContext_t *)thd->gemini.context, - &thd->gemini.savepoint,DSMTXN_UNSAVE,0,NULL); - thd->gemini.needSavepoint = 1; - } - DBUG_RETURN(error); - } - } - } - - dsmRecord.table = tableNumber; - dsmRecord.recid = lastRowid; - dsmRecord.maxLength = table->rec_buff_length; - - if ((error=pack_row((byte **)&dsmRecord.pbuffer, (int *)&dsmRecord.recLength, - new_record, TRUE))) - { - DBUG_RETURN(error); - } - error = dsmRecordUpdate((dsmContext_t *)thd->gemini.context, - &dsmRecord, 0, NULL); - - DBUG_RETURN(error); -} - - -int ha_gemini::delete_row(const byte * record) -{ - int error = 0; - dsmRecord_t dsmRecord; - THD *thd = current_thd; - dsmContext_t *pcontext = (dsmContext_t *)thd->gemini.context; - DBUG_ENTER("delete_row"); - - statistic_increment(ha_delete_count,&LOCK_status); - - if(thd->gemini.needSavepoint) - { - thd->gemini.savepoint++; - error = dsmTransaction(pcontext, &thd->gemini.savepoint, DSMTXN_SAVE, 0, 0); - if (error) - DBUG_RETURN(error); - thd->gemini.needSavepoint = 0; - } - - dsmRecord.table = tableNumber; - dsmRecord.recid = lastRowid; - - error = handleIndexEntries(record, dsmRecord.recid,KEY_DELETE); - if(!error) - { - error = dsmRecordDelete(pcontext, &dsmRecord, 0, NULL); - } - - /* Delete any blobs associated with this row */ - if (table->blob_fields) - { - dsmBlob_t gemBlob; - - gemBlob.areaType = DSMOBJECT_BLOB; - gemBlob.blobObjNo = tableNumber; - for (uint i = 0; i < table->blob_fields; i++) - { - if (pBlobDescs[i].blobId) - { - gemBlob.blobId = pBlobDescs[i].blobId; - my_free((char *)pBlobDescs[i].pBlob, MYF(MY_ALLOW_ZERO_PTR)); - dsmBlobStart(pcontext, &gemBlob); - dsmBlobDelete(pcontext, &gemBlob, NULL); - /* according to DSM doc, no need to call dsmBlobEnd() */ - } - } - } - - DBUG_RETURN(error); -} - -int ha_gemini::index_init(uint keynr) -{ - int error = 0; - THD *thd; - DBUG_ENTER("index_init"); - thd = current_thd; - - lastRowid = 0; - active_index=keynr; - error = dsmCursorCreate((dsmContext_t *)thd->gemini.context, - (dsmTable_t)tableNumber, - (dsmIndex_t)pindexNumbers[keynr], - &cursorId,NULL); - pbracketBase = (dsmKey_t *)my_malloc(sizeof(dsmKey_t) + keyBufSize, - MYF(MY_WME)); - if(!pbracketBase) - DBUG_RETURN(1); - pbracketLimit = (dsmKey_t *)my_malloc(sizeof(dsmKey_t) + keyBufSize,MYF(MY_WME)); - if(!pbracketLimit) - { - my_free((char *)pbracketLimit,MYF(0)); - DBUG_RETURN(1); - } - pbracketBase->index = 0; - pbracketLimit->index = (dsmIndex_t)pindexNumbers[keynr]; - pbracketBase->descending_key = pbracketLimit->descending_key = 0; - pbracketBase->ksubstr = pbracketLimit->ksubstr = 0; - pbracketLimit->keycomps = pbracketBase->keycomps = 1; - - pfoundKey = (dsmKey_t *)my_malloc(sizeof(dsmKey_t) + keyBufSize,MYF(MY_WME)); - if(!pfoundKey) - { - my_free((char *)pbracketLimit,MYF(0)); - my_free((char *)pbracketBase,MYF(0)); - DBUG_RETURN(1); - } - - DBUG_RETURN(error); -} - -int ha_gemini::index_end() -{ - int error = 0; - THD *thd; - DBUG_ENTER("index_end"); - thd = current_thd; - error = dsmCursorDelete((dsmContext_t *)thd->gemini.context, - &cursorId, 0); - if(pbracketLimit) - my_free((char *)pbracketLimit,MYF(0)); - if(pbracketBase) - my_free((char *)pbracketBase,MYF(0)); - if(pfoundKey) - my_free((char *)pfoundKey,MYF(0)); - - pbracketLimit = 0; - pbracketBase = 0; - pfoundKey = 0; - DBUG_RETURN(error); -} - -/* This is only used to read whole keys */ - -int ha_gemini::index_read_idx(byte * buf, uint keynr, const byte * key, - uint key_len, enum ha_rkey_function find_flag) -{ - int error = 0; - DBUG_ENTER("index_read_idx"); - statistic_increment(ha_read_key_count,&LOCK_status); - - error = index_init(keynr); - if (!error) - error = index_read(buf,key,key_len,find_flag); - - if(error == HA_ERR_END_OF_FILE) - error = HA_ERR_KEY_NOT_FOUND; - - table->status = error ? STATUS_NOT_FOUND : 0; - DBUG_RETURN(error); -} - -int ha_gemini::pack_key( uint keynr, dsmKey_t *pkey, - const byte *key_ptr, uint key_length) -{ - KEY *key_info=table->key_info+keynr; - KEY_PART_INFO *key_part=key_info->key_part; - KEY_PART_INFO *end=key_part+key_info->key_parts; - int rc; - int componentLen; - DBUG_ENTER("pack_key"); - - rc = gemKeyInit(pkey->keystr,&componentLen, - (short)pindexNumbers[active_index]); - pkey->keyLen = componentLen; - - for (; key_part != end && (int) key_length > 0 && !rc; key_part++) - { - uint offset=0; - unsigned char *pos; - uint key_part_length = key_part->length; - - int fieldType; - if (key_part->null_bit) - { - offset=1; - if (*key_ptr != 0) // Store 0 if NULL - { - key_length-= key_part->store_length; - key_ptr+= key_part->store_length; - rc = gemFieldToIdxComponent( - (unsigned char *)key_ptr + offset, - (unsigned long) key_part_length, - 0, - 1 , /* Tells it to build a null component */ - key_part->field->flags & UNSIGNED_FLAG, - pkey->keystr + pkey->keyLen, - keyBufSize, - &componentLen); - pkey->keyLen += componentLen; - continue; - } - } - fieldType = gemDataType(key_part->field->type()); - switch (fieldType) - { - case GEM_CHAR: - key_part->field->store((char*)key_ptr + offset, key_part->length); - key_part->field->sort_string((char*)rec_buff, key_part->length); - pos = (unsigned char *)rec_buff; - break; - - case GEM_TINYBLOB: - case GEM_BLOB: - case GEM_MEDIUMBLOB: - case GEM_LONGBLOB: - ((Field_blob*)key_part->field)->get_ptr((char**)&pos); - key_part_length = ((Field_blob*)key_part->field)->get_length( - (char*)key_ptr + offset); - break; - - default: - pos = (unsigned char *)key_ptr + offset; - break; - } - - rc = gemFieldToIdxComponent( - pos, - (unsigned long) key_part_length, - fieldType, - 0 , - key_part->field->flags & UNSIGNED_FLAG, - pkey->keystr + pkey->keyLen, - keyBufSize, - &componentLen); - - key_ptr+=key_part->store_length; - key_length-=key_part->store_length; - pkey->keyLen += componentLen; - } - DBUG_RETURN(rc); -} - -void ha_gemini::unpack_key(char *record, dsmKey_t *key, uint index) -{ - KEY *key_info=table->key_info+index; - KEY_PART_INFO *key_part= key_info->key_part, - *end=key_part+key_info->key_parts; - int fieldIsNull, fieldType; - int rc = 0; - - char unsigned *pos= &key->keystr[FULLKEYHDRSZ+4/* 4 for the index number*/]; - - for ( ; key_part != end; key_part++) - { - fieldType = gemDataType(key_part->field->type()); - if(fieldType == GEM_CHAR) - { - /* Can't get data from character indexes since the sort weights - are in the index and not the characters. */ - key_read = 0; - } - rc = gemIdxComponentToField(pos, fieldType, - (unsigned char *)record + key_part->field->offset(), - //key_part->field->field_length, - key_part->length, - key_part->field->decimals(), - &fieldIsNull); - if(fieldIsNull) - { - record[key_part->null_offset] |= key_part->null_bit; - } - else if (key_part->null_bit) - { - record[key_part->null_offset]&= ~key_part->null_bit; - } - while(*pos++); /* Advance to next field in key by finding */ - /* a null byte */ - } -} - -int ha_gemini::index_read(byte * buf, const byte * key, - uint key_len, enum ha_rkey_function find_flag) -{ - int error = 0; - THD *thd; - int componentLen; - - DBUG_ENTER("index_read"); - statistic_increment(ha_read_key_count,&LOCK_status); - - - pbracketBase->index = (short)pindexNumbers[active_index]; - pbracketBase->keycomps = 1; - - - /* Its a greater than operation so create a base bracket - from the input key data. */ - error = pack_key(active_index, pbracketBase, key, key_len); - if(error) - goto errorReturn; - - if(find_flag == HA_READ_AFTER_KEY) - { - /* A greater than operation */ - error = gemKeyAddLow(pbracketBase->keystr + pbracketBase->keyLen, - &componentLen); - pbracketBase->keyLen += componentLen; - } - if(find_flag == HA_READ_KEY_EXACT) - { - /* Need to set up a high bracket for an equality operator - Which is a copy of the base bracket plus a hi lim term */ - bmove(pbracketLimit,pbracketBase,(size_t)pbracketBase->keyLen + sizeof(dsmKey_t)); - error = gemKeyAddHigh(pbracketLimit->keystr + pbracketLimit->keyLen, - &componentLen); - if(error) - goto errorReturn; - pbracketLimit->keyLen += componentLen; - } - else - { - /* Always add a high range -- except for HA_READ_KEY_EXACT this - is all we need for the upper index bracket */ - error = gemKeyHigh(pbracketLimit->keystr, &componentLen, - pbracketLimit->index); - - pbracketLimit->keyLen = componentLen; - } - /* We have to subtract the header size here since cxKeyPrepare - expects that the three lead bytes of the header are - not counted in this length -- But cxKeyPrepare also - expects that these three bytes are present in the keystr */ - pbracketBase->keyLen -= FULLKEYHDRSZ; - pbracketLimit->keyLen -= FULLKEYHDRSZ; - - thd = current_thd; - - error = findRow(thd, DSMFINDFIRST, buf); - -errorReturn: - if (error == DSM_S_ENDLOOP) - error = HA_ERR_KEY_NOT_FOUND; - - table->status = error ? STATUS_NOT_FOUND : 0; - DBUG_RETURN(error); -} - - -int ha_gemini::index_next(byte * buf) -{ - THD *thd; - int error = 1; - int keyStringLen=0; - dsmMask_t findMode; - DBUG_ENTER("index_next"); - - if(tableStatus == HA_ERR_CRASHED) - DBUG_RETURN(tableStatus); - - thd = current_thd; - - if(pbracketBase->index == 0) - { - error = gemKeyLow(pbracketBase->keystr, &keyStringLen, - pbracketLimit->index); - - pbracketBase->keyLen = (COUNT)keyStringLen - FULLKEYHDRSZ; - pbracketBase->index = pbracketLimit->index; - error = gemKeyHigh(pbracketLimit->keystr, &keyStringLen, - pbracketLimit->index); - pbracketLimit->keyLen = (COUNT)keyStringLen - FULLKEYHDRSZ; - - findMode = DSMFINDFIRST; - } - else - findMode = DSMFINDNEXT; - - error = findRow(thd,findMode,buf); - - if (error == DSM_S_ENDLOOP) - error = HA_ERR_END_OF_FILE; - - table->status = error ? STATUS_NOT_FOUND : 0; - DBUG_RETURN(error); -} - -int ha_gemini::index_next_same(byte * buf, const byte *key, uint keylen) -{ - int error = 0; - DBUG_ENTER("index_next_same"); - statistic_increment(ha_read_next_count,&LOCK_status); - DBUG_RETURN(index_next(buf)); -} - - -int ha_gemini::index_prev(byte * buf) -{ - int error = 0; - THD *thd = current_thd; - - DBUG_ENTER("index_prev"); - statistic_increment(ha_read_prev_count,&LOCK_status); - - error = findRow(thd, DSMFINDPREV, buf); - - if (error == DSM_S_ENDLOOP) - error = HA_ERR_END_OF_FILE; - - - table->status = error ? STATUS_NOT_FOUND : 0; - DBUG_RETURN(error); -} - - -int ha_gemini::index_first(byte * buf) -{ - DBUG_ENTER("index_first"); - statistic_increment(ha_read_first_count,&LOCK_status); - DBUG_RETURN(index_next(buf)); -} - -int ha_gemini::index_last(byte * buf) -{ - int error = 0; - THD *thd; - int keyStringLen; - dsmMask_t findMode; - thd = current_thd; - - DBUG_ENTER("index_last"); - statistic_increment(ha_read_last_count,&LOCK_status); - - error = gemKeyLow(pbracketBase->keystr, &keyStringLen, - pbracketLimit->index); - - pbracketBase->keyLen = (COUNT)keyStringLen - FULLKEYHDRSZ; - pbracketBase->index = pbracketLimit->index; - error = gemKeyHigh(pbracketLimit->keystr, &keyStringLen, - pbracketLimit->index); - pbracketLimit->keyLen = (COUNT)keyStringLen - FULLKEYHDRSZ; - - error = findRow(thd,DSMFINDLAST,buf); - - if (error == DSM_S_ENDLOOP) - error = HA_ERR_END_OF_FILE; - - table->status = error ? STATUS_NOT_FOUND : 0; - DBUG_RETURN(error); -} - -int ha_gemini::rnd_init(bool scan) -{ - THD *thd = current_thd; - - lastRowid = 0; - - return 0; -} - -int ha_gemini::rnd_end() -{ -/* - return gem_scan_end(); -*/ - return 0; -} - -int ha_gemini::rnd_next(byte *buf) -{ - int error = 0; - dsmRecord_t dsmRecord; - THD *thd; - - DBUG_ENTER("rnd_next"); - - if(tableStatus == HA_ERR_CRASHED) - DBUG_RETURN(tableStatus); - - thd = current_thd; - if(thd->gemini.tx_isolation == ISO_READ_COMMITTED && !(lockMode & DSM_LK_EXCL) - && lastRowid) - error = dsmObjectUnlock((dsmContext_t *)thd->gemini.context, - tableNumber, DSMOBJECT_RECORD, lastRowid, - lockMode | DSM_UNLK_FREE, 0); - - statistic_increment(ha_read_rnd_next_count,&LOCK_status); - dsmRecord.table = tableNumber; - dsmRecord.recid = lastRowid; - dsmRecord.pbuffer = (dsmBuffer_t *)rec_buff; - dsmRecord.recLength = table->reclength; - dsmRecord.maxLength = table->rec_buff_length; - - error = dsmTableScan((dsmContext_t *)thd->gemini.context, - &dsmRecord, DSMFINDNEXT, lockMode, 0); - - if(!error) - { - lastRowid = dsmRecord.recid; - error = unpack_row((char *)buf,(char *)dsmRecord.pbuffer); - } - if(!error) - ; - else - { - lastRowid = 0; - if (error == DSM_S_ENDLOOP) - error = HA_ERR_END_OF_FILE; - else if (error == DSM_S_RQSTREJ) - error = HA_ERR_LOCK_WAIT_TIMEOUT; - else if (error == DSM_S_LKTBFULL) - { - error = HA_ERR_LOCK_TABLE_FULL; - gemini_lock_table_overflow_error((dsmContext_t *)thd->gemini.context); - } - } - table->status = error ? STATUS_NOT_FOUND : 0; - DBUG_RETURN(error); -} - - -int ha_gemini::rnd_pos(byte * buf, byte *pos) -{ - int error; - int rc; - - THD *thd; - - statistic_increment(ha_read_rnd_count,&LOCK_status); - thd = current_thd; - memcpy((void *)&lastRowid,pos,ref_length); - if(thd->gemini.tx_isolation == ISO_READ_COMMITTED && !(lockMode & DSM_LK_EXCL)) - { - /* Lock the row */ - - error = dsmObjectLock((dsmContext_t *)thd->gemini.context, - (dsmObject_t)tableNumber,DSMOBJECT_RECORD,lastRowid, - lockMode, 1, 0); - if ( error ) - goto errorReturn; - } - error = fetch_row(thd->gemini.context, buf); - if(thd->gemini.tx_isolation == ISO_READ_COMMITTED && !(lockMode & DSM_LK_EXCL)) - { - /* Unlock the row */ - - rc = dsmObjectUnlock((dsmContext_t *)thd->gemini.context, - (dsmObject_t)tableNumber,DSMOBJECT_RECORD,lastRowid, - lockMode | DSM_UNLK_FREE , 0); - } - if(error == DSM_S_RMNOTFND) - error = HA_ERR_RECORD_DELETED; - - errorReturn: - table->status = error ? STATUS_NOT_FOUND : 0; - return error; -} - -int ha_gemini::fetch_row(void *gemini_context,const byte *buf) -{ - dsmStatus_t rc = 0; - dsmRecord_t dsmRecord; - - DBUG_ENTER("fetch_row"); - dsmRecord.table = tableNumber; - dsmRecord.recid = lastRowid; - dsmRecord.pbuffer = (dsmBuffer_t *)rec_buff; - dsmRecord.recLength = table->reclength; - dsmRecord.maxLength = table->rec_buff_length; - - rc = dsmRecordGet((dsmContext_t *)gemini_context, - &dsmRecord, 0); - - if(!rc) - { - rc = unpack_row((char *)buf,(char *)dsmRecord.pbuffer); - } - - DBUG_RETURN(rc); -} -int ha_gemini::findRow(THD *thd, dsmMask_t findMode, byte *buf) -{ - dsmStatus_t rc; - dsmKey_t *pkey; - - DBUG_ENTER("findRow"); - - if(thd->gemini.tx_isolation == ISO_READ_COMMITTED && !(lockMode & DSM_LK_EXCL) - && lastRowid) - rc = dsmObjectUnlock((dsmContext_t *)thd->gemini.context, - tableNumber, DSMOBJECT_RECORD, lastRowid, - lockMode | DSM_UNLK_FREE, 0); - if( key_read ) - pkey = pfoundKey; - else - pkey = 0; - - rc = dsmCursorFind((dsmContext_t *)thd->gemini.context, - &cursorId, - pbracketBase, - pbracketLimit, - DSMPARTIAL, - findMode, - lockMode, - NULL, - &lastRowid, - pkey); - if( rc ) - goto errorReturn; - - if(key_read) - { - unpack_key((char*)buf, pkey, active_index); - } - if(!key_read) /* unpack_key may have turned off key_read */ - { - rc = fetch_row((dsmContext_t *)thd->gemini.context,buf); - } - -errorReturn: - if(!rc) - ; - else - { - lastRowid = 0; - if(rc == DSM_S_RQSTREJ) - rc = HA_ERR_LOCK_WAIT_TIMEOUT; - else if (rc == DSM_S_LKTBFULL) - { - rc = HA_ERR_LOCK_TABLE_FULL; - gemini_lock_table_overflow_error((dsmContext_t *)thd->gemini.context); - } - } - - DBUG_RETURN(rc); -} - -void ha_gemini::position(const byte *record) -{ - memcpy(ref,&lastRowid,ref_length); -} - - -void ha_gemini::info(uint flag) -{ - DBUG_ENTER("info"); - - if ((flag & HA_STATUS_VARIABLE)) - { - THD *thd = current_thd; - dsmStatus_t error; - ULONG64 rows; - - if(thd->gemini.context == NULL) - { - /* Need to get this thread a connection into the database */ - error = gemini_connect(thd); - if(error) - DBUG_VOID_RETURN; - } - - error = dsmRowCount((dsmContext_t *)thd->gemini.context,tableNumber,&rows); - records = (ha_rows)rows; - deleted = 0; - } - if ((flag & HA_STATUS_CONST)) - { - ha_rows *rec_per_key = share->rec_per_key; - for (uint i = 0; i < table->keys; i++) - for(uint k=0; - k < table->key_info[i].key_parts; k++,rec_per_key++) - table->key_info[i].rec_per_key[k] = *rec_per_key; - } - if ((flag & HA_STATUS_ERRKEY)) - { - errkey=last_dup_key; - } - if ((flag & HA_STATUS_TIME)) - { - ; - } - if ((flag & HA_STATUS_AUTO)) - { - THD *thd = current_thd; - dsmStatus_t error; - - error = dsmTableAutoIncrement((dsmContext_t *)thd->gemini.context, - tableNumber, - (ULONG64 *)&auto_increment_value, - 0); - /* Should return the next auto-increment value that - will be given -- so we need to increment the one dsm - currently reports. */ - auto_increment_value++; - } - - DBUG_VOID_RETURN; -} - - -int ha_gemini::extra(enum ha_extra_function operation) -{ - switch (operation) - { - case HA_EXTRA_RESET: - case HA_EXTRA_RESET_STATE: - key_read=0; - using_ignore=0; - break; - case HA_EXTRA_KEYREAD: - key_read=1; // Query satisfied with key - break; - case HA_EXTRA_NO_KEYREAD: - key_read=0; - break; - case HA_EXTRA_IGNORE_DUP_KEY: - using_ignore=1; - break; - case HA_EXTRA_NO_IGNORE_DUP_KEY: - using_ignore=0; - break; - - default: - break; - } - return 0; -} - - -int ha_gemini::reset(void) -{ - key_read=0; // Reset to state after open - return 0; -} - - -/* - As MySQL will execute an external lock for every new table it uses - we can use this to start the transactions. -*/ - -int ha_gemini::external_lock(THD *thd, int lock_type) -{ - dsmStatus_t rc = 0; - LONG txNumber; - - DBUG_ENTER("ha_gemini::external_lock"); - - if (lock_type != F_UNLCK) - { - if (!thd->gemini.lock_count) - { - thd->gemini.lock_count = 1; - thd->gemini.tx_isolation = thd->tx_isolation; - } - // lockMode has already been set in store_lock - // If the statement about to be executed calls for - // exclusive locks and we're running at read uncommitted - // isolation level then raise an error. - if(thd->gemini.tx_isolation == ISO_READ_UNCOMMITTED) - { - if(lockMode == DSM_LK_EXCL) - { - DBUG_RETURN(HA_ERR_READ_ONLY_TRANSACTION); - } - else - { - lockMode = DSM_LK_NOLOCK; - } - } - - if(thd->gemini.context == NULL) - { - /* Need to get this thread a connection into the database */ - rc = gemini_connect(thd); - if(rc) - return rc; - } - /* Set need savepoint flag */ - thd->gemini.needSavepoint = 1; - - if(rc) - DBUG_RETURN(rc); - - - if( thd->in_lock_tables || thd->gemini.tx_isolation == ISO_SERIALIZABLE ) - { - rc = dsmObjectLock((dsmContext_t *)thd->gemini.context, - (dsmObject_t)tableNumber,DSMOBJECT_TABLE,0, - lockMode, 1, 0); - if(rc == DSM_S_RQSTREJ) - rc = HA_ERR_LOCK_WAIT_TIMEOUT; - } - } - else /* lock_type == F_UNLK */ - { - /* Commit the tx if we're in auto-commit mode */ - if (!(thd->options & OPTION_NOT_AUTO_COMMIT)&& - !(thd->options & OPTION_BEGIN)) - gemini_commit(thd); - } - - DBUG_RETURN(rc); -} - - -THR_LOCK_DATA **ha_gemini::store_lock(THD *thd, THR_LOCK_DATA **to, - enum thr_lock_type lock_type) -{ - if (lock_type != TL_IGNORE && lock.type == TL_UNLOCK) - { - /* If we are not doing a LOCK TABLE, then allow multiple writers */ - if ((lock_type >= TL_WRITE_CONCURRENT_INSERT && - lock_type <= TL_WRITE) && - !thd->in_lock_tables) - lock_type = TL_WRITE_ALLOW_WRITE; - lock.type=lock_type; - } - if(table->reginfo.lock_type > TL_WRITE_ALLOW_READ) - lockMode = DSM_LK_EXCL; - else - lockMode = DSM_LK_SHARE; - - *to++= &lock; - return to; -} - -void ha_gemini::update_create_info(HA_CREATE_INFO *create_info) -{ - table->file->info(HA_STATUS_AUTO | HA_STATUS_CONST); - if (!(create_info->used_fields & HA_CREATE_USED_AUTO)) - { - create_info->auto_increment_value=auto_increment_value; - } -} - -int ha_gemini::create(const char *name, register TABLE *form, - HA_CREATE_INFO *create_info) -{ - THD *thd; - char name_buff[FN_REFLEN]; - char dbname_buff[FN_REFLEN]; - DBUG_ENTER("ha_gemini::create"); - dsmContext_t *pcontext; - dsmStatus_t rc; - dsmArea_t areaNumber; - dsmObject_t tableNumber = 0; - dsmDbkey_t dummy = 0; - unsigned i; - int baseNameLen; - dsmObject_t indexNumber; - - /* separate out the name of the table and the database (a VST must be - ** created in the mysql database) - */ - rc = gemini_parse_table_name(name, dbname_buff, name_buff); - if (rc == 0) - { - /* If the table is a VST, don't create areas or extents */ - if (strcmp(dbname_buff, "mysql") == 0) - { - tableNumber = gemini_is_vst(name_buff); - if (tableNumber) - { - return 0; - } - } - } - - thd = current_thd; - if(thd->gemini.context == NULL) - { - /* Need to get this thread a connection into the database */ - rc = gemini_connect(thd); - if(rc) - return rc; - } - pcontext = (dsmContext_t *)thd->gemini.context; - - if(thd->gemini.needSavepoint || using_ignore) - { - thd->gemini.savepoint++; - rc = dsmTransaction((dsmContext_t *)thd->gemini.context, - &thd->gemini.savepoint, - DSMTXN_SAVE, 0, 0); - if (rc) - DBUG_RETURN(rc); - thd->gemini.needSavepoint = 0; - } - - fn_format(name_buff, name, "", ha_gemini_ext, 2 | 4); - /* Create a storage area */ - rc = dsmAreaNew(pcontext,gemini_blocksize,DSMAREA_TYPE_DATA, - &areaNumber, gemini_recbits, - (dsmText_t *)"gemini_data_area"); - if( rc != 0 ) - { - gemini_msg(pcontext, "dsmAreaNew failed %l",rc); - return(rc); - } - - /* Create an extent */ - /* Don't pass in leading ./ in name_buff */ - rc = dsmExtentCreate(pcontext,areaNumber,1,15,5, - (dsmText_t *)&name_buff[start_of_name]); - if( rc != 0 ) - { - gemini_msg(pcontext, "dsmExtentCreate failed %l",rc); - return(rc); - } - - /* Create the table storage object */ - /* Change slashes in the name to periods */ - for( i = 0; i < strlen(name_buff); i++) - if(name_buff[i] == '/' || name_buff[i] == '\\') - name_buff[i] = '.'; - - /* Get rid of .gmd suffix */ - name_buff[strlen(name_buff) - 4] = '\0'; - - rc = dsmObjectCreate(pcontext, areaNumber, &tableNumber, - DSMOBJECT_MIXTABLE,0,0,0, - (dsmText_t *)&name_buff[start_of_name], - &dummy,&dummy); - - if (rc == 0 && table->blob_fields) - { - /* create a storage object record for blob fields */ - rc = dsmObjectCreate(pcontext, areaNumber, &tableNumber, - DSMOBJECT_BLOB,0,0,0, - (dsmText_t *)&name_buff[start_of_name], - &dummy,&dummy); - if( rc != 0 ) - { - gemini_msg(pcontext, "dsmObjectCreate for blob object failed %l",rc); - return(rc); - } - } - - if(rc == 0 && form->keys) - { - fn_format(name_buff, name, "", ha_gemini_idx_ext, 2 | 4); - /* Create a storage area */ - rc = dsmAreaNew(pcontext,gemini_blocksize,DSMAREA_TYPE_DATA, - &areaNumber, gemini_recbits, - (dsmText_t *)"gemini_index_area"); - if( rc != 0 ) - { - gemini_msg(pcontext, "dsmAreaNew failed %l",rc); - return(rc); - } - /* Create an extent */ - /* Don't pass in leading ./ in name_buff */ - rc = dsmExtentCreate(pcontext,areaNumber,1,15,5, - (dsmText_t *)&name_buff[start_of_name]); - if( rc != 0 ) - { - gemini_msg(pcontext, "dsmExtentCreate failed %l",rc); - return(rc); - } - - /* Change slashes in the name to periods */ - for( i = 0; i < strlen(name_buff); i++) - if(name_buff[i] == '/' || name_buff[i] == '\\') - name_buff[i] = '.'; - - /* Get rid of .gmi suffix */ - name_buff[strlen(name_buff) - 4] = '\0'; - - baseNameLen = strlen(name_buff); - name_buff[baseNameLen] = '.'; - baseNameLen++; - for( i = 0; i < form->keys; i++) - { - dsmObjectAttr_t indexUnique; - - indexNumber = DSMINDEX_INVALID; - /* Create a storage object record for each index */ - /* Add the index name so the object name is in the form - <db>.<table>.<index_name> */ - strcpy(&name_buff[baseNameLen],table->key_info[i].name); - if(table->key_info[i].flags & HA_NOSAME) - indexUnique = 1; - else - indexUnique = 0; - rc = dsmObjectCreate(pcontext, areaNumber, &indexNumber, - DSMOBJECT_MIXINDEX,indexUnique,tableNumber, - DSMOBJECT_MIXTABLE, - (dsmText_t *)&name_buff[start_of_name], - &dummy,&dummy); - - } - } - /* The auto_increment value is the next one to be given - out so give dsm one less than this value */ - if(create_info->auto_increment_value) - rc = dsmTableAutoIncrementSet(pcontext,tableNumber, - create_info->auto_increment_value-1); - - /* Get a table lock on this table in case this table is being - created as part of an alter table statement. We don't want - the alter table statement to abort because of a lock table overflow - */ - if (thd->lex.sql_command == SQLCOM_CREATE_INDEX || - thd->lex.sql_command == SQLCOM_ALTER_TABLE || - thd->lex.sql_command == SQLCOM_DROP_INDEX) - { - rc = dsmObjectLock(pcontext, - (dsmObject_t)tableNumber,DSMOBJECT_TABLE,0, - DSM_LK_EXCL, 1, 0); - /* and don't commit so we won't release the table on the table number - of the table being altered */ - } - else - { - if(!rc) - rc = gemini_commit(thd); - } - - DBUG_RETURN(rc); -} - -int ha_gemini::delete_table(const char *pname) -{ - THD *thd; - dsmStatus_t rc; - dsmContext_t *pcontext; - unsigned i,nameLen; - dsmArea_t indexArea = 0; - dsmArea_t tableArea = 0; - dsmObjectAttr_t objectAttr; - dsmObject_t associate; - dsmObjectType_t associateType; - dsmDbkey_t block, root; - int need_txn = 0; - dsmObject_t tableNum = 0; - char name_buff[FN_REFLEN]; - char dbname_buff[FN_REFLEN]; - DBUG_ENTER("ha_gemini::delete_table"); - - /* separate out the name of the table and the database (a VST must be - ** located in the mysql database) - */ - rc = gemini_parse_table_name(pname, dbname_buff, name_buff); - if (rc == 0) - { - /* If the table is a VST, there are no areas or extents to delete */ - if (strcmp(dbname_buff, "mysql") == 0) - { - tableNum = gemini_is_vst(name_buff); - if (tableNum) - { - return 0; - } - } - } - - thd = current_thd; - if(thd->gemini.context == NULL) - { - /* Need to get this thread a connection into the database */ - rc = gemini_connect(thd); - if(rc) - { - DBUG_RETURN(rc); - } - } - pcontext = (dsmContext_t *)thd->gemini.context; - - - bzero(name_buff, FN_REFLEN); - - nameLen = strlen(pname); - for( i = start_of_name; i < nameLen; i++) - { - if(pname[i] == '/' || pname[i] == '\\') - name_buff[i-start_of_name] = '.'; - else - name_buff[i-start_of_name] = pname[i]; - } - - rc = dsmObjectNameToNum(pcontext, (dsmText_t *)name_buff, - (dsmObject_t *)&tableNum); - if (rc) - { - gemini_msg(pcontext, "Unable to find table number for %s", name_buff); - rc = gemini_rollback(thd); - if (rc) - { - gemini_msg(pcontext, "Error in rollback %l",rc); - } - DBUG_RETURN(rc); - } - - rc = dsmObjectInfo(pcontext, tableNum, DSMOBJECT_MIXTABLE, tableNum, - &tableArea, &objectAttr, &associateType, &block, &root); - if (rc) - { - gemini_msg(pcontext, "Failed to get area number for table %d, %s, return %l", - tableNum, pname, rc); - rc = gemini_rollback(thd); - if (rc) - { - gemini_msg(pcontext, "Error in rollback %l",rc); - } - } - - indexArea = DSMAREA_INVALID; - - /* Delete the indexes and tables storage objects for with the table */ - rc = dsmObjectDeleteAssociate(pcontext, tableNum, &indexArea); - if (rc) - { - gemini_msg(pcontext, "Error deleting storage objects for table number %d, return %l", - (int)tableNum, rc); - - /* roll back txn and return */ - rc = gemini_rollback(thd); - if (rc) - { - gemini_msg(pcontext, "Error in rollback %l",rc); - } - DBUG_RETURN(rc); - } - - if (indexArea != DSMAREA_INVALID) - { - /* Delete the extents for both Index and Table */ - rc = dsmExtentDelete(pcontext, indexArea); - rc = dsmAreaDelete(pcontext, indexArea); - if (rc) - { - gemini_msg(pcontext, "Error deleting Index Area %l, return %l", indexArea, rc); - - /* roll back txn and return */ - rc = gemini_rollback(thd); - if (rc) - { - gemini_msg(pcontext, "Error in rollback %l",rc); - } - DBUG_RETURN(rc); - } - } - - rc = dsmExtentDelete(pcontext, tableArea); - rc = dsmAreaDelete(pcontext, tableArea); - if (rc) - { - gemini_msg(pcontext, "Error deleting table Area %l, name %s, return %l", - tableArea, pname, rc); - /* roll back txn and return */ - rc = gemini_rollback(thd); - if (rc) - { - gemini_msg(pcontext, "Error in rollback %l",rc); - } - DBUG_RETURN(rc); - } - - - /* Commit the transaction */ - rc = gemini_commit(thd); - if (rc) - { - gemini_msg(pcontext, "Failed to commit transaction %l",rc); - } - - - /* now remove all the files that need to be removed and - cause a checkpoint so recovery will work */ - rc = dsmExtentUnlink(pcontext); - - DBUG_RETURN(0); -} - - -int ha_gemini::rename_table(const char *pfrom, const char *pto) -{ - THD *thd; - dsmContext_t *pcontext; - dsmStatus_t rc; - char dbname_buff[FN_REFLEN]; - char name_buff[FN_REFLEN]; - char newname_buff[FN_REFLEN]; - char newextname_buff[FN_REFLEN]; - char newidxextname_buff[FN_REFLEN]; - unsigned i, nameLen; - dsmObject_t tableNum; - dsmArea_t indexArea = 0; - dsmArea_t tableArea = 0; - - DBUG_ENTER("ha_gemini::rename_table"); - - /* don't allow rename of VSTs */ - rc = gemini_parse_table_name(pfrom, dbname_buff, name_buff); - if (rc == 0) - { - /* If the table is a VST, don't create areas or extents */ - if (strcmp(dbname_buff, "mysql") == 0) - { - if (gemini_is_vst(name_buff)) - { - return DSM_S_CANT_RENAME_VST; - } - } - } - - thd = current_thd; - if (thd->gemini.context == NULL) - { - /* Need to get this thread a connection into the database */ - rc = gemini_connect(thd); - if (rc) - { - DBUG_RETURN(rc); - } - } - - pcontext = (dsmContext_t *)thd->gemini.context; - - /* change the slashes to dots in the old and new names */ - nameLen = strlen(pfrom); - for( i = start_of_name; i < nameLen; i++) - { - if(pfrom[i] == '/' || pfrom[i] == '\\') - name_buff[i-start_of_name] = '.'; - else - name_buff[i-start_of_name] = pfrom[i]; - } - name_buff[i-start_of_name] = '\0'; - - nameLen = strlen(pto); - for( i = start_of_name; i < nameLen; i++) - { - if(pto[i] == '/' || pto[i] == '\\') - newname_buff[i-start_of_name] = '.'; - else - newname_buff[i-start_of_name] = pto[i]; - } - newname_buff[i-start_of_name] = '\0'; - - /* generate new extent names (for table and index extents) */ - fn_format(newextname_buff, pto, "", ha_gemini_ext, 2 | 4); - fn_format(newidxextname_buff, pto, "", ha_gemini_idx_ext, 2 | 4); - - rc = dsmObjectNameToNum(pcontext, (dsmText_t *)name_buff, &tableNum); - if (rc) - { - gemini_msg(pcontext, "Unable to file Table number for %s", name_buff); - goto errorReturn; - } - - rc = dsmObjectRename(pcontext, tableNum, - (dsmText_t *)newname_buff, - (dsmText_t *)&newidxextname_buff[start_of_name], - (dsmText_t *)&newextname_buff[start_of_name], - &indexArea, &tableArea); - if (rc) - { - gemini_msg(pcontext, "Failed to rename %s to %s",name_buff,newname_buff); - goto errorReturn; - } - - /* Rename the physical table and index files (if necessary). - ** Close the file, rename it, and reopen it (have to do it this - ** way so rename works on Windows). - */ - if (!(rc = dsmAreaClose(pcontext, tableArea))) - { - if (!(rc = rename_file_ext(pfrom, pto, ha_gemini_ext))) - { - rc = dsmAreaOpen(pcontext, tableArea, 0); - if (rc) - { - gemini_msg(pcontext, "Failed to reopen area %d",tableArea); - } - } - } - - if (!rc && indexArea) - { - if (!(rc = dsmAreaClose(pcontext, indexArea))) - { - if (!(rc = rename_file_ext(pfrom, pto, ha_gemini_idx_ext))) - { - rc = dsmAreaOpen(pcontext, indexArea, 0); - if (rc) - { - gemini_msg(pcontext, "Failed to reopen area %d",tableArea); - } - } - } - } - -errorReturn: - DBUG_RETURN(rc); -} - - -/* - How many seeks it will take to read through the table - This is to be comparable to the number returned by records_in_range so - that we can decide if we should scan the table or use keys. -*/ - -double ha_gemini::scan_time() -{ - return (double)records / - (double)((gemini_blocksize / (double)table->reclength)); -} - -int ha_gemini::analyze(THD* thd, HA_CHECK_OPT* check_opt) -{ - int error; - uint saveIsolation; - dsmMask_t saveLockMode; - - check_opt->quick = TRUE; - check_opt->optimize = TRUE; // Tells check not to get table lock - saveLockMode = lockMode; - saveIsolation = thd->gemini.tx_isolation; - thd->gemini.tx_isolation = ISO_READ_UNCOMMITTED; - lockMode = DSM_LK_NOLOCK; - error = check(thd,check_opt); - lockMode = saveLockMode; - thd->gemini.tx_isolation = saveIsolation; - return (error); -} - -int ha_gemini::check(THD* thd, HA_CHECK_OPT* check_opt) -{ - int error = 0; - int checkStatus = HA_ADMIN_OK; - ha_rows indexCount; - byte *buf = 0, *indexBuf = 0, *prevBuf = 0; - int errorCount = 0; - - info(HA_STATUS_VARIABLE); // Makes sure row count is up to date - - /* Get a shared table lock */ - if(thd->gemini.needSavepoint) - { - /* We don't really need a savepoint here but do it anyway - just to keep the savepoint number correct. */ - thd->gemini.savepoint++; - error = dsmTransaction((dsmContext_t *)thd->gemini.context, - &thd->gemini.savepoint, - DSMTXN_SAVE, 0, 0); - if (error) - return(error); - thd->gemini.needSavepoint = 0; - } - buf = (byte*)my_malloc(table->rec_buff_length,MYF(MY_WME)); - indexBuf = (byte*)my_malloc(table->rec_buff_length,MYF(MY_WME)); - prevBuf = (byte*)my_malloc(table->rec_buff_length,MYF(MY_WME |MY_ZEROFILL )); - - /* Lock the table */ - if (!check_opt->optimize) - error = dsmObjectLock((dsmContext_t *)thd->gemini.context, - (dsmObject_t)tableNumber, - DSMOBJECT_TABLE,0, - DSM_LK_SHARE, 1, 0); - if(error) - { - gemini_msg((dsmContext_t *)thd->gemini.context, - "Failed to lock table %d, error %d",tableNumber, error); - return error; - } - - ha_rows *rec_per_key = share->rec_per_key; - /* If quick option just scan along index converting and counting entries */ - for (uint i = 0; i < table->keys; i++) - { - key_read = 1; // Causes data to be extracted from the keys - indexCount = 0; - // Clear the cardinality stats for this index - memset(table->key_info[i].rec_per_key,0, - sizeof(table->key_info[0].rec_per_key[0]) * - table->key_info[i].key_parts); - error = index_init(i); - error = index_first(indexBuf); - while(!error) - { - indexCount++; - if(!check_opt->quick) - { - /* Fetch row and compare to data produced from key */ - error = fetch_row(thd->gemini.context,buf); - if(!error) - { - if(key_cmp(i,buf,indexBuf,FALSE)) - { - - gemini_msg((dsmContext_t *)thd->gemini.context, - "Check Error! Key does not match row for rowid %d for index %s", - lastRowid,table->key_info[i].name); - print_msg(thd,table->real_name,"check","error", - "Key does not match row for rowid %d for index %s", - lastRowid,table->key_info[i].name); - checkStatus = HA_ADMIN_CORRUPT; - errorCount++; - if(errorCount > 1000) - goto error_return; - } - else if(error == DSM_S_RMNOTFND) - { - errorCount++; - checkStatus = HA_ADMIN_CORRUPT; - gemini_msg((dsmContext_t *)thd->gemini.context, - "Check Error! Key does not have a valid row pointer %d for index %s", - lastRowid,table->key_info[i].name); - print_msg(thd,table->real_name,"check","error", - "Key does not have a valid row pointer %d for index %s", - lastRowid,table->key_info[i].name); - if(errorCount > 1000) - goto error_return; - error = 0; - } - } - } - - key_cmp(i,indexBuf,prevBuf,TRUE); - bcopy((void *)indexBuf,(void *)prevBuf,table->rec_buff_length); - - if(!error) - error = index_next(indexBuf); - } - - for(uint j=1; j < table->key_info[i].key_parts; j++) - { - table->key_info[i].rec_per_key[j] += table->key_info[i].rec_per_key[j-1]; - } - for(uint k=0; k < table->key_info[i].key_parts; k++) - { - if (table->key_info[i].rec_per_key[k]) - table->key_info[i].rec_per_key[k] = - records / table->key_info[i].rec_per_key[k]; - *rec_per_key = table->key_info[i].rec_per_key[k]; - rec_per_key++; - } - - if(error == HA_ERR_END_OF_FILE) - { - /* Check count of rows */ - - if(records != indexCount) - { - /* Number of index entries does not agree with the number of - rows in the index. */ - checkStatus = HA_ADMIN_CORRUPT; - gemini_msg((dsmContext_t *)thd->gemini.context, - "Check Error! Total rows %d does not match total index entries %d for %s", - records, indexCount, - table->key_info[i].name); - print_msg(thd,table->real_name,"check","error", - "Total rows %d does not match total index entries %d for %s", - records, indexCount, - table->key_info[i].name); - } - } - else - { - checkStatus = HA_ADMIN_FAILED; - goto error_return; - } - index_end(); - } - if(!check_opt->quick) - { - /* Now scan the table and for each row generate the keys - and find them in the index */ - error = fullCheck(thd, buf); - if(error) - checkStatus = error; - } - // Store the key distribution information - error = saveKeyStats(thd); - -error_return: - my_free((char*)buf,MYF(MY_ALLOW_ZERO_PTR)); - my_free((char*)indexBuf,MYF(MY_ALLOW_ZERO_PTR)); - my_free((char*)prevBuf,MYF(MY_ALLOW_ZERO_PTR)); - - index_end(); - key_read = 0; - if(!check_opt->optimize) - { - error = dsmObjectUnlock((dsmContext_t *)thd->gemini.context, - (dsmObject_t)tableNumber, - DSMOBJECT_TABLE,0, - DSM_LK_SHARE,0); - if (error) - { - gemini_msg((dsmContext_t *)thd->gemini.context, - "Unable to unlock table %d", tableNumber); - } - } - - return checkStatus; -} - -int ha_gemini::saveKeyStats(THD *thd) -{ - dsmStatus_t rc = 0; - - /* Insert a row in the indexStats table for each column of - each index of the table */ - - for(uint i = 0; i < table->keys; i++) - { - for (uint j = 0; j < table->key_info[i].key_parts && !rc ;j++) - { - rc = dsmIndexStatsPut((dsmContext_t *)thd->gemini.context, - tableNumber, pindexNumbers[i], - j, (LONG64)table->key_info[i].rec_per_key[j]); - if (rc) - { - gemini_msg((dsmContext_t *)thd->gemini.context, - "Failed to update index stats for table %d, index %d", - tableNumber, pindexNumbers[i]); - } - } - } - return rc; -} - -int ha_gemini::fullCheck(THD *thd,byte *buf) -{ - int error; - int errorCount = 0; - int checkStatus = 0; - - lastRowid = 0; - - while(((error = rnd_next( buf)) != HA_ERR_END_OF_FILE) && errorCount <= 1000) - { - if(!error) - { - error = handleIndexEntries(buf,lastRowid,KEY_CHECK); - if(error) - { - /* Error finding an index entry for a row. */ - print_msg(thd,table->real_name,"check","error", - "Unable to find all index entries for row %d", - lastRowid); - errorCount++; - checkStatus = HA_ADMIN_CORRUPT; - error = 0; - } - } - else - { - /* Error reading a row */ - print_msg(thd,table->real_name,"check","error", - "Error reading row %d status = %d", - lastRowid,error); - errorCount++; - checkStatus = HA_ADMIN_CORRUPT; - error = 0; - } - } - - return checkStatus; -} - -int ha_gemini::repair(THD* thd, HA_CHECK_OPT* check_opt) -{ - int error; - dsmRecord_t dsmRecord; - byte *buf; - - if(thd->gemini.needSavepoint) - { - /* We don't really need a savepoint here but do it anyway - just to keep the savepoint number correct. */ - thd->gemini.savepoint++; - error = dsmTransaction((dsmContext_t *)thd->gemini.context, - &thd->gemini.savepoint, - DSMTXN_SAVE, 0, 0); - if (error) - { - gemini_msg((dsmContext_t *)thd->gemini.context, - "Error setting savepoint number %d, error %d", - thd->gemini.savepoint++, error); - return(error); - } - thd->gemini.needSavepoint = 0; - } - - - /* Lock the table */ - error = dsmObjectLock((dsmContext_t *)thd->gemini.context, - (dsmObject_t)tableNumber, - DSMOBJECT_TABLE,0, - DSM_LK_EXCL, 1, 0); - if(error) - { - gemini_msg((dsmContext_t *)thd->gemini.context, - "Failed to lock table %d, error %d",tableNumber, error); - return error; - } - - error = dsmContextSetLong((dsmContext_t *)thd->gemini.context, - DSM_TAGCONTEXT_NO_LOGGING,1); - - error = dsmTableReset((dsmContext_t *)thd->gemini.context, - (dsmTable_t)tableNumber, table->keys, - pindexNumbers); - if (error) - { - gemini_msg((dsmContext_t *)thd->gemini.context, - "dsmTableReset failed for table %d, error %d",tableNumber, error); - } - - buf = (byte*)my_malloc(table->rec_buff_length,MYF(MY_WME)); - dsmRecord.table = tableNumber; - dsmRecord.recid = 0; - dsmRecord.pbuffer = (dsmBuffer_t *)rec_buff; - dsmRecord.recLength = table->reclength; - dsmRecord.maxLength = table->rec_buff_length; - while(!error) - { - error = dsmTableScan((dsmContext_t *)thd->gemini.context, - &dsmRecord, DSMFINDNEXT, DSM_LK_NOLOCK, - 1); - if(!error) - { - if (!(error = unpack_row((char *)buf,(char *)dsmRecord.pbuffer))) - { - error = handleIndexEntries(buf,dsmRecord.recid,KEY_CREATE); - if(error == HA_ERR_FOUND_DUPP_KEY) - { - /* We don't want to stop on duplicate keys -- we're repairing - here so let's get as much repaired as possible. */ - error = 0; - } - } - } - } - error = dsmObjectUnlock((dsmContext_t *)thd->gemini.context, - (dsmObject_t)tableNumber, - DSMOBJECT_TABLE,0, - DSM_LK_EXCL,0); - if (error) - { - gemini_msg((dsmContext_t *)thd->gemini.context, - "Unable to unlock table %d", tableNumber); - } - - my_free((char*)buf,MYF(MY_ALLOW_ZERO_PTR)); - - error = dsmContextSetLong((dsmContext_t *)thd->gemini.context, - DSM_TAGCONTEXT_NO_LOGGING,0); - - return error; -} - - -int ha_gemini::restore(THD* thd, HA_CHECK_OPT *check_opt) -{ - dsmContext_t *pcontext = (dsmContext_t *)thd->gemini.context; - char* backup_dir = thd->lex.backup_dir; - char src_path[FN_REFLEN], dst_path[FN_REFLEN]; - char* table_name = table->real_name; - int error = 0; - int errornum; - const char* errmsg = ""; - dsmArea_t tableArea = 0; - dsmObjectAttr_t objectAttr; - dsmObject_t associate; - dsmObjectType_t associateType; - dsmDbkey_t block, root; - dsmStatus_t rc; - - rc = dsmObjectInfo(pcontext, tableNumber, DSMOBJECT_MIXTABLE, tableNumber, - &tableArea, &objectAttr, &associateType, &block, &root); - if (rc) - { - error = HA_ADMIN_FAILED; - errmsg = "Failed in dsmObjectInfo (.gmd) (Error %d)"; - errornum = rc; - gemini_msg(pcontext, errmsg ,errornum); - goto err; - } - - rc = dsmAreaFlush(pcontext, tableArea, FLUSH_BUFFERS | FLUSH_SYNC); - if (rc) - { - error = HA_ADMIN_FAILED; - errmsg = "Failed in dsmAreaFlush (.gmd) (Error %d)"; - errornum = rc; - gemini_msg(pcontext, errmsg ,errornum); - goto err; - } - - rc = dsmAreaClose(pcontext, tableArea); - if (rc) - { - error = HA_ADMIN_FAILED; - errmsg = "Failed in dsmAreaClose (.gmd) (Error %d)"; - errornum = rc; - gemini_msg(pcontext, errmsg ,errornum); - goto err; - } - - /* Restore the data file */ - if (!fn_format(src_path, table_name, backup_dir, ha_gemini_ext, 4 + 64)) - { - return HA_ADMIN_INVALID; - } - - if (my_copy(src_path, fn_format(dst_path, table->path, "", - ha_gemini_ext, 4), MYF(MY_WME))) - { - error = HA_ADMIN_FAILED; - errmsg = "Failed in my_copy (.gmd) (Error %d)"; - errornum = errno; - gemini_msg(pcontext, errmsg ,errornum); - goto err; - } - - rc = dsmAreaFlush(pcontext, tableArea, FREE_BUFFERS); - if (rc) - { - error = HA_ADMIN_FAILED; - errmsg = "Failed in dsmAreaFlush (.gmd) (Error %d)"; - errornum = rc; - gemini_msg(pcontext, errmsg ,errornum); - goto err; - } - - rc = dsmAreaOpen(pcontext, tableArea, 1); - if (rc) - { - error = HA_ADMIN_FAILED; - errmsg = "Failed in dsmAreaOpen (.gmd) (Error %d)"; - errornum = rc; - gemini_msg(pcontext, errmsg ,errornum); - goto err; - } - -#ifdef GEMINI_BACKUP_IDX - dsmArea_t indexArea = 0; - - rc = dsmObjectInfo(pcontext, tableNumber, DSMOBJECT_MIXINDEX, &indexArea, - &objectAttr, &associate, &associateType, &block, &root); - if (rc) - { - error = HA_ADMIN_FAILED; - errmsg = "Failed in dsmObjectInfo (.gmi) (Error %d)"; - errornum = rc; - gemini_msg(pcontext, errmsg ,errornum); - goto err; - } - - rc = dsmAreaClose(pcontext, indexArea); - if (rc) - { - error = HA_ADMIN_FAILED; - errmsg = "Failed in dsmAreaClose (.gmi) (Error %d)"; - errornum = rc; - gemini_msg(pcontext, errmsg ,errornum); - goto err; - } - - /* Restore the index file */ - if (!fn_format(src_path, table_name, backup_dir, ha_gemini_idx_ext, 4 + 64)) - { - return HA_ADMIN_INVALID; - } - - if (my_copy(src_path, fn_format(dst_path, table->path, "", - ha_gemini_idx_ext, 4), MYF(MY_WME))) - { - error = HA_ADMIN_FAILED; - errmsg = "Failed in my_copy (.gmi) (Error %d)"; - errornum = errno; - gemini_msg(pcontext, errmsg ,errornum); - goto err; - } - - rc = dsmAreaOpen(pcontext, indexArea, 1); - if (rc) - { - error = HA_ADMIN_FAILED; - errmsg = "Failed in dsmAreaOpen (.gmi) (Error %d)"; - errornum = rc; - gemini_msg(pcontext, errmsg ,errornum); - goto err; - } - - return HA_ADMIN_OK; -#else /* #ifdef GEMINI_BACKUP_IDX */ - HA_CHECK_OPT tmp_check_opt; - tmp_check_opt.init(); - /* The following aren't currently implemented in ha_gemini::repair - ** tmp_check_opt.quick = 1; - ** tmp_check_opt.flags |= T_VERY_SILENT; - */ - return (repair(thd, &tmp_check_opt)); -#endif /* #ifdef GEMINI_BACKUP_IDX */ - - err: - { -#if 0 - /* mi_check_print_error is in ha_myisam.cc, so none of the informative - ** error messages above is currently being printed - */ - MI_CHECK param; - myisamchk_init(¶m); - param.thd = thd; - param.op_name = (char*)"restore"; - param.table_name = table->table_name; - param.testflag = 0; - mi_check_print_error(¶m,errmsg, errornum); -#endif - return error; - } -} - - -int ha_gemini::backup(THD* thd, HA_CHECK_OPT *check_opt) -{ - dsmContext_t *pcontext = (dsmContext_t *)thd->gemini.context; - char* backup_dir = thd->lex.backup_dir; - char src_path[FN_REFLEN], dst_path[FN_REFLEN]; - char* table_name = table->real_name; - int error = 0; - int errornum; - const char* errmsg = ""; - dsmArea_t tableArea = 0; - dsmObjectAttr_t objectAttr; - dsmObject_t associate; - dsmObjectType_t associateType; - dsmDbkey_t block, root; - dsmStatus_t rc; - - rc = dsmObjectInfo(pcontext, tableNumber, DSMOBJECT_MIXTABLE, tableNumber, - &tableArea, &objectAttr, &associateType, &block, &root); - if (rc) - { - error = HA_ADMIN_FAILED; - errmsg = "Failed in dsmObjectInfo (.gmd) (Error %d)"; - errornum = rc; - goto err; - } - - /* Flush the buffers before backing up the table */ - dsmAreaFlush((dsmContext_t *)thd->gemini.context, tableArea, - FLUSH_BUFFERS | FLUSH_SYNC); - if (rc) - { - error = HA_ADMIN_FAILED; - errmsg = "Failed in dsmAreaFlush (.gmd) (Error %d)"; - errornum = rc; - gemini_msg(pcontext, errmsg ,errornum); - goto err; - } - - /* Backup the .FRM file */ - if (!fn_format(dst_path, table_name, backup_dir, reg_ext, 4 + 64)) - { - errmsg = "Failed in fn_format() for .frm file: errno = %d"; - error = HA_ADMIN_INVALID; - errornum = errno; - gemini_msg(pcontext, errmsg ,errornum); - goto err; - } - - if (my_copy(fn_format(src_path, table->path,"", reg_ext, 4), - dst_path, - MYF(MY_WME | MY_HOLD_ORIGINAL_MODES ))) - { - error = HA_ADMIN_FAILED; - errmsg = "Failed copying .frm file: errno = %d"; - errornum = errno; - gemini_msg(pcontext, errmsg ,errornum); - goto err; - } - - /* Backup the data file */ - if (!fn_format(dst_path, table_name, backup_dir, ha_gemini_ext, 4 + 64)) - { - errmsg = "Failed in fn_format() for .GMD file: errno = %d"; - error = HA_ADMIN_INVALID; - errornum = errno; - gemini_msg(pcontext, errmsg ,errornum); - goto err; - } - - if (my_copy(fn_format(src_path, table->path,"", ha_gemini_ext, 4), - dst_path, - MYF(MY_WME | MY_HOLD_ORIGINAL_MODES )) ) - { - errmsg = "Failed copying .GMD file: errno = %d"; - error= HA_ADMIN_FAILED; - errornum = errno; - gemini_msg(pcontext, errmsg ,errornum); - goto err; - } - -#ifdef GEMINI_BACKUP_IDX - dsmArea_t indexArea = 0; - - rc = dsmObjectInfo(pcontext, tableNumber, DSMOBJECT_MIXINDEX, &indexArea, - &objectAttr, &associate, &associateType, &block, &root); - if (rc) - { - error = HA_ADMIN_FAILED; - errmsg = "Failed in dsmObjectInfo (.gmi) (Error %d)"; - errornum = rc; - gemini_msg(pcontext, errmsg ,errornum); - goto err; - } - - /* Backup the index file */ - if (!fn_format(dst_path, table_name, backup_dir, ha_gemini_idx_ext, 4 + 64)) - { - errmsg = "Failed in fn_format() for .GMI file: errno = %d"; - error = HA_ADMIN_INVALID; - errornum = errno; - gemini_msg(pcontext, errmsg ,errornum); - goto err; - } - - if (my_copy(fn_format(src_path, table->path,"", ha_gemini_idx_ext, 4), - dst_path, - MYF(MY_WME | MY_HOLD_ORIGINAL_MODES )) ) - { - errmsg = "Failed copying .GMI file: errno = %d"; - error= HA_ADMIN_FAILED; - errornum = errno; - gemini_msg(pcontext, errmsg ,errornum); - goto err; - } -#endif /* #ifdef GEMINI_BACKUP_IDX */ - - return HA_ADMIN_OK; - - err: - { -#if 0 - /* mi_check_print_error is in ha_myisam.cc, so none of the informative - ** error messages above is currently being printed - */ - MI_CHECK param; - myisamchk_init(¶m); - param.thd = thd; - param.op_name = (char*)"backup"; - param.table_name = table->table_name; - param.testflag = 0; - mi_check_print_error(¶m,errmsg, errornum); -#endif - return error; - } -} - - -int ha_gemini::optimize(THD* thd, HA_CHECK_OPT *check_opt) -{ - return HA_ADMIN_ALREADY_DONE; -} - - -ha_rows ha_gemini::records_in_range(int keynr, - const byte *start_key,uint start_key_len, - enum ha_rkey_function start_search_flag, - const byte *end_key,uint end_key_len, - enum ha_rkey_function end_search_flag) -{ - int error; - int componentLen; - float pctInrange; - ha_rows rows = 5; - - DBUG_ENTER("records_in_range"); - - error = index_init(keynr); - if(error) - DBUG_RETURN(rows); - - pbracketBase->index = (short)pindexNumbers[keynr]; - pbracketBase->keycomps = 1; - - if(start_key) - { - error = pack_key(keynr, pbracketBase, start_key, start_key_len); - if(start_search_flag == HA_READ_AFTER_KEY) - { - /* A greater than operation */ - error = gemKeyAddLow(pbracketBase->keystr + pbracketBase->keyLen, - &componentLen); - pbracketBase->keyLen += componentLen; - } - } - else - { - error = gemKeyLow(pbracketBase->keystr, &componentLen, - pbracketBase->index); - pbracketBase->keyLen = componentLen; - - } - pbracketBase->keyLen -= FULLKEYHDRSZ; - - if(end_key) - { - error = pack_key(keynr, pbracketLimit, end_key, end_key_len); - if(!error && end_search_flag == HA_READ_AFTER_KEY) - { - error = gemKeyAddHigh(pbracketLimit->keystr + pbracketLimit->keyLen, - &componentLen); - pbracketLimit->keyLen += componentLen; - } - } - else - { - error = gemKeyHigh(pbracketLimit->keystr,&componentLen, - pbracketLimit->index); - pbracketLimit->keyLen = componentLen; - } - - pbracketLimit->keyLen -= FULLKEYHDRSZ; - error = dsmIndexRowsInRange((dsmContext_t *)current_thd->gemini.context, - pbracketBase,pbracketLimit, - tableNumber, - &pctInrange); - if(pctInrange >= 1) - rows = (ha_rows)pctInrange; - else - { - rows = (ha_rows)(records * pctInrange); - if(!rows && pctInrange > 0) - rows = 1; - } - index_end(); - - DBUG_RETURN(rows); -} - - -/* - Pack a row for storage. If the row is of fixed length, just store the - row 'as is'. - If not, we will generate a packed row suitable for storage. - This will only fail if we don't have enough memory to pack the row, which; - may only happen in rows with blobs, as the default row length is - pre-allocated. -*/ -int ha_gemini::pack_row(byte **pprow, int *ppackedLength, const byte *record, - bool update) -{ - THD *thd = current_thd; - dsmContext_t *pcontext = (dsmContext_t *)thd->gemini.context; - gemBlobDesc_t *pBlobDesc = pBlobDescs; - - if (fixed_length_row) - { - *pprow = (byte *)record; - *ppackedLength=(int)table->reclength; - return 0; - } - /* Copy null bits */ - memcpy(rec_buff, record, table->null_bytes); - byte *ptr=rec_buff + table->null_bytes; - - for (Field **field=table->field ; *field ; field++) - { -#ifdef GEMINI_TINYBLOB_IN_ROW - /* Tiny blobs (255 bytes or less) are stored in the row; larger - ** blobs are stored in a separate storage object (see ha_gemini::create). - */ - if ((*field)->type() == FIELD_TYPE_BLOB && - ((Field_blob*)*field)->blobtype() != FIELD_TYPE_TINY_BLOB) -#else - if ((*field)->type() == FIELD_TYPE_BLOB) -#endif - { - dsmBlob_t gemBlob; - char *blobptr; - - gemBlob.areaType = DSMOBJECT_BLOB; - gemBlob.blobObjNo = tableNumber; - gemBlob.blobId = 0; - gemBlob.totLength = gemBlob.segLength = - ((Field_blob*)*field)->get_length((char*)record + (*field)->offset()); - ((Field_blob*)*field)->get_ptr((char**) &blobptr); - gemBlob.pBuffer = (dsmBuffer_t *)blobptr; - gemBlob.blobContext.blobOffset = 0; - if (gemBlob.totLength) - { - dsmBlobStart(pcontext, &gemBlob); - if (update && pBlobDesc->blobId) - { - gemBlob.blobId = pBlobDesc->blobId; - dsmBlobUpdate(pcontext, &gemBlob, NULL); - } - else - { - dsmBlobPut(pcontext, &gemBlob, NULL); - } - dsmBlobEnd(pcontext, &gemBlob); - } - ptr = (byte*)((Field_blob*)*field)->pack_id((char*) ptr, - (char*)record + (*field)->offset(), (longlong)gemBlob.blobId); - - pBlobDesc++; - } - else - { - ptr=(byte*) (*field)->pack((char*) ptr, (char*)record + (*field)->offset()); - } - } - - *pprow=rec_buff; - *ppackedLength= (ptr - rec_buff); - return 0; -} - -int ha_gemini::unpack_row(char *record, char *prow) -{ - THD *thd = current_thd; - dsmContext_t *pcontext = (dsmContext_t *)thd->gemini.context; - gemBlobDesc_t *pBlobDesc = pBlobDescs; - - if (fixed_length_row) - { - /* If the table is a VST, the row is in Gemini internal format. - ** Convert the fields to MySQL format. - */ - if (RM_IS_VST(tableNumber)) - { - int i = 2; /* VST fields are numbered sequentially starting at 2 */ - long longValue; - char *fld; - unsigned long unknown; - - for (Field **field = table->field; *field; field++, i++) - { - switch ((*field)->type()) - { - case FIELD_TYPE_LONG: - case FIELD_TYPE_TINY: - case FIELD_TYPE_SHORT: - case FIELD_TYPE_TIMESTAMP: - case FIELD_TYPE_LONGLONG: - case FIELD_TYPE_INT24: - case FIELD_TYPE_DATE: - case FIELD_TYPE_TIME: - case FIELD_TYPE_DATETIME: - case FIELD_TYPE_YEAR: - case FIELD_TYPE_NEWDATE: - case FIELD_TYPE_ENUM: - case FIELD_TYPE_SET: - recGetLONG((dsmText_t *)prow, i, 0, &longValue, &unknown); - if (unknown) - { - (*field)->set_null(); - } - else - { - (*field)->set_notnull(); - (*field)->store((longlong)longValue); - } - break; - - case FIELD_TYPE_DECIMAL: - case FIELD_TYPE_DOUBLE: - case FIELD_TYPE_TINY_BLOB: - case FIELD_TYPE_MEDIUM_BLOB: - case FIELD_TYPE_LONG_BLOB: - case FIELD_TYPE_BLOB: - case FIELD_TYPE_VAR_STRING: - break; - - case FIELD_TYPE_STRING: - svcByteString_t stringFld; - - fld = (char *)my_malloc((*field)->field_length, MYF(MY_WME)); - stringFld.pbyte = (TEXT *)fld; - stringFld.size = (*field)->field_length; - recGetBYTES((dsmText_t *)prow, i, 0, &stringFld, &unknown); - if (unknown) - { - (*field)->set_null(); - } - else - { - (*field)->set_notnull(); - (*field)->store(fld, (*field)->field_length); - } - my_free(fld, MYF(MY_ALLOW_ZERO_PTR)); - break; - - default: - break; - } - } - } - else - { - memcpy(record,(char*) prow,table->reclength); - } - } - else - { - /* Copy null bits */ - const char *ptr= (const char*) prow; - memcpy(record, ptr, table->null_bytes); - ptr+=table->null_bytes; - - for (Field **field=table->field ; *field ; field++) - { -#ifdef GEMINI_TINYBLOB_IN_ROW - /* Tiny blobs (255 bytes or less) are stored in the row; larger - ** blobs are stored in a separate storage object (see ha_gemini::create). - */ - if ((*field)->type() == FIELD_TYPE_BLOB && - ((Field_blob*)*field)->blobtype() != FIELD_TYPE_TINY_BLOB) -#else - if ((*field)->type() == FIELD_TYPE_BLOB) -#endif - { - dsmBlob_t gemBlob; - - gemBlob.areaType = DSMOBJECT_BLOB; - gemBlob.blobObjNo = tableNumber; - gemBlob.blobId = (dsmBlobId_t)(((Field_blob*)*field)->get_id(ptr)); - if (gemBlob.blobId) - { - gemBlob.totLength = - gemBlob.segLength = ((Field_blob*)*field)->get_length(ptr); - /* Allocate memory to store the blob. This memory is freed - ** the next time unpack_row is called for this table. - */ - gemBlob.pBuffer = (dsmBuffer_t *)my_malloc(gemBlob.totLength, - MYF(0)); - if (!gemBlob.pBuffer) - { - return HA_ERR_OUT_OF_MEM; - } - gemBlob.blobContext.blobOffset = 0; - dsmBlobStart(pcontext, &gemBlob); - dsmBlobGet(pcontext, &gemBlob, NULL); - dsmBlobEnd(pcontext, &gemBlob); - } - else - { - gemBlob.pBuffer = 0; - } - ptr = ((Field_blob*)*field)->unpack_id(record + (*field)->offset(), - ptr, (char *)gemBlob.pBuffer); - pBlobDesc->blobId = gemBlob.blobId; - my_free((char*)pBlobDesc->pBlob, MYF(MY_ALLOW_ZERO_PTR)); - pBlobDesc->pBlob = gemBlob.pBuffer; - pBlobDesc++; - } - else - { - ptr= (*field)->unpack(record + (*field)->offset(), ptr); - } - } - } - - return 0; -} - -int ha_gemini::key_cmp(uint keynr, const byte * old_row, - const byte * new_row, bool updateStats) -{ - KEY_PART_INFO *key_part=table->key_info[keynr].key_part; - KEY_PART_INFO *end=key_part+table->key_info[keynr].key_parts; - - for ( uint i = 0 ; key_part != end ; key_part++, i++) - { - if (key_part->null_bit) - { - if ((old_row[key_part->null_offset] & key_part->null_bit) != - (new_row[key_part->null_offset] & key_part->null_bit)) - { - if(updateStats) - table->key_info[keynr].rec_per_key[i]++; - return 1; - } - else if((old_row[key_part->null_offset] & key_part->null_bit) && - (new_row[key_part->null_offset] & key_part->null_bit)) - /* Both are null */ - continue; - } - if (key_part->key_part_flag & (HA_BLOB_PART | HA_VAR_LENGTH)) - { - if (key_part->field->cmp_binary((char*)(old_row + key_part->offset), - (char*)(new_row + key_part->offset), - (ulong) key_part->length)) - { - if(updateStats) - table->key_info[keynr].rec_per_key[i]++; - return 1; - } - } - else - { - if (memcmp(old_row+key_part->offset, new_row+key_part->offset, - key_part->length)) - { - /* Check for special case of -0 which causes table check - to find an invalid key when comparing the the index - value of 0 to the -0 stored in the row */ - if(key_part->field->type() == FIELD_TYPE_DECIMAL) - { - double fieldValue; - char *ptr = key_part->field->ptr; - - key_part->field->ptr = (char *)old_row + key_part->offset; - fieldValue = key_part->field->val_real(); - if(fieldValue == 0) - { - key_part->field->ptr = (char *)new_row + key_part->offset; - fieldValue = key_part->field->val_real(); - if(fieldValue == 0) - { - key_part->field->ptr = ptr; - continue; - } - } - key_part->field->ptr = ptr; - } - if(updateStats) - { - table->key_info[keynr].rec_per_key[i]++; - } - return 1; - } - } - } - return 0; -} - -int gemini_parse_table_name(const char *fullname, char *dbname, char *tabname) -{ - char *namestart; - char *nameend; - - /* separate out the name of the table and the database - */ - namestart = (char *)strchr(fullname + start_of_name, '/'); - if (!namestart) - { - /* if on Windows, slashes go the other way */ - namestart = (char *)strchr(fullname + start_of_name, '\\'); - } - nameend = (char *)strchr(fullname + start_of_name, '.'); - /* sometimes fullname has an extension, sometimes it doesn't */ - if (!nameend) - { - nameend = (char *)fullname + strlen(fullname); - } - strncpy(dbname, fullname + start_of_name, - (namestart - fullname) - start_of_name); - dbname[(namestart - fullname) - start_of_name] = '\0'; - strncpy(tabname, namestart + 1, (nameend - namestart) - 1); - tabname[nameend - namestart - 1] = '\0'; - - return 0; -} - -/* PROGRAM: gemini_is_vst - if the name is the name of a VST, return - * its number - * - * RETURNS: Table number if a match is found - * 0 if not a VST - */ -int -gemini_is_vst(const char *pname) /* IN the name */ -{ - int tablenum = 0; - - for (int i = 0; i < vstnumfils; i++) - { - if (strcmp(pname, vstfil[i].filename) == 0) - { - tablenum = vstfil[i].filnum; - break; - } - } - - return tablenum; -} - -static void print_msg(THD *thd, const char *table_name, const char *op_name, - const char *msg_type, const char *fmt, ...) -{ - String* packet = &thd->packet; - packet->length(0); - char msgbuf[256]; - msgbuf[0] = 0; - va_list args; - va_start(args,fmt); - - my_vsnprintf(msgbuf, sizeof(msgbuf), fmt, args); - msgbuf[sizeof(msgbuf) - 1] = 0; // healthy paranoia - - DBUG_PRINT(msg_type,("message: %s",msgbuf)); - - net_store_data(packet, table_name); - net_store_data(packet, op_name); - net_store_data(packet, msg_type); - net_store_data(packet, msgbuf); - if (my_net_write(&thd->net, (char*)thd->packet.ptr(), - thd->packet.length())) - thd->killed=1; -} - -/* Load shared area with rows per key statistics */ -void -ha_gemini::get_index_stats(THD *thd) -{ - dsmStatus_t rc = 0; - ha_rows *rec_per_key = share->rec_per_key; - - for(uint i = 0; i < table->keys && !rc; i++) - { - for (uint j = 0; j < table->key_info[i].key_parts && !rc;j++) - { - LONG64 rows_per_key; - rc = dsmIndexStatsGet((dsmContext_t *)thd->gemini.context, - tableNumber, pindexNumbers[i],(int)j, - &rows_per_key); - if (rc) - { - gemini_msg((dsmContext_t *)thd->gemini.context, - "Index Statistics faild for table %d index %d, error %d", - tableNumber, pindexNumbers[i], rc); - } - *rec_per_key = (ha_rows)rows_per_key; - rec_per_key++; - } - } - return; -} - -/**************************************************************************** - Handling the shared GEM_SHARE structure that is needed to provide - a global in memory storage location of the rec_per_key stats used - by the optimizer. -****************************************************************************/ - -static byte* gem_get_key(GEM_SHARE *share,uint *length, - my_bool not_used __attribute__((unused))) -{ - *length=share->table_name_length; - return (byte*) share->table_name; -} - -static GEM_SHARE *get_share(const char *table_name, TABLE *table) -{ - GEM_SHARE *share; - - pthread_mutex_lock(&gem_mutex); - uint length=(uint) strlen(table_name); - if (!(share=(GEM_SHARE*) hash_search(&gem_open_tables, (byte*) table_name, - length))) - { - ha_rows *rec_per_key; - char *tmp_name; - - if ((share=(GEM_SHARE *) - my_multi_malloc(MYF(MY_WME | MY_ZEROFILL), - &share, sizeof(*share), - &rec_per_key, table->key_parts * sizeof(ha_rows), - &tmp_name, length+1, - NullS))) - { - share->rec_per_key = rec_per_key; - share->table_name = tmp_name; - share->table_name_length=length; - strcpy(share->table_name,table_name); - if (hash_insert(&gem_open_tables, (byte*) share)) - { - pthread_mutex_unlock(&gem_mutex); - my_free((gptr) share,0); - return 0; - } - thr_lock_init(&share->lock); - pthread_mutex_init(&share->mutex,NULL); - } - } - pthread_mutex_unlock(&gem_mutex); - return share; -} - -static int free_share(GEM_SHARE *share, bool mutex_is_locked) -{ - pthread_mutex_lock(&gem_mutex); - if (mutex_is_locked) - pthread_mutex_unlock(&share->mutex); - if (!--share->use_count) - { - hash_delete(&gem_open_tables, (byte*) share); - thr_lock_delete(&share->lock); - pthread_mutex_destroy(&share->mutex); - my_free((gptr) share, MYF(0)); - } - pthread_mutex_unlock(&gem_mutex); - return 0; -} - -static void gemini_lock_table_overflow_error(dsmContext_t *pcontext) -{ - gemini_msg(pcontext, "The total number of locks exceeds the lock table size"); - gemini_msg(pcontext, "Either increase gemini_lock_table_size or use a"); - gemini_msg(pcontext, "different transaction isolation level"); -} - -#endif /* HAVE_GEMINI_DB */ diff --git a/sql/ha_gemini.h b/sql/ha_gemini.h deleted file mode 100644 index 006401271c6..00000000000 --- a/sql/ha_gemini.h +++ /dev/null @@ -1,208 +0,0 @@ -/* Copyright (C) 2000 MySQL AB & NuSphere Corporation - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - - -#ifdef __GNUC__ -#pragma interface /* gcc class implementation */ -#endif - -#include "gem_my_global.h" -#include "dstd.h" -#include "dsmpub.h" - -/* class for the the gemini handler */ - -enum enum_key_string_options{KEY_CREATE,KEY_DELETE,KEY_CHECK}; -typedef struct st_gemini_share { - ha_rows *rec_per_key; - THR_LOCK lock; - pthread_mutex_t mutex; - char *table_name; - uint table_name_length,use_count; -} GEM_SHARE; - -typedef struct gemBlobDesc -{ - dsmBlobId_t blobId; - dsmBuffer_t *pBlob; -} gemBlobDesc_t; - -class ha_gemini: public handler -{ - /* define file as an int for now until we have a real file struct */ - int file; - uint int_option_flag; - int tableNumber; - dsmIndex_t *pindexNumbers; // dsm object numbers for the indexes on this table - dsmRecid_t lastRowid; - uint last_dup_key; - bool fixed_length_row, key_read, using_ignore; - byte *rec_buff; - dsmKey_t *pbracketBase; - dsmKey_t *pbracketLimit; - dsmKey_t *pfoundKey; - dsmMask_t tableStatus; // Crashed/repair status - gemBlobDesc_t *pBlobDescs; - - int index_open(char *tableName); - int pack_row(byte **prow, int *ppackedLength, const byte *record, - bool update); - int unpack_row(char *record, char *prow); - int findRow(THD *thd, dsmMask_t findMode, byte *buf); - int fetch_row(void *gemini_context, const byte *buf); - int handleIndexEntries(const byte * record, dsmRecid_t recid, - enum_key_string_options option); - - int handleIndexEntry(const byte * record, dsmRecid_t recid, - enum_key_string_options option,uint keynr); - - int createKeyString(const byte * record, KEY *pkeyinfo, - unsigned char *pkeyBuf, int bufSize, - int *pkeyStringLen, short geminiIndexNumber, - bool *thereIsAnull); - int fullCheck(THD *thd,byte *buf); - - int pack_key( uint keynr, dsmKey_t *pkey, - const byte *key_ptr, uint key_length); - - void unpack_key(char *record, dsmKey_t *key, uint index); - - int key_cmp(uint keynr, const byte * old_row, - const byte * new_row, bool updateStats); - - int saveKeyStats(THD *thd); - void get_index_stats(THD *thd); - - short cursorId; /* cursorId of active index cursor if any */ - dsmMask_t lockMode; /* Shared or exclusive */ - - /* FIXFIX Don't know why we need this because I don't know what - store_lock method does but we core dump without this */ - THR_LOCK_DATA lock; - GEM_SHARE *share; - - public: - ha_gemini(TABLE *table): handler(table), file(0), - int_option_flag(HA_READ_NEXT | HA_READ_PREV | - HA_REC_NOT_IN_SEQ | - HA_KEYPOS_TO_RNDPOS | HA_READ_ORDER | HA_LASTKEY_ORDER | - HA_LONGLONG_KEYS | HA_NULL_KEY | HA_HAVE_KEY_READ_ONLY | - HA_BLOB_KEY | - HA_NO_TEMP_TABLES | HA_NO_FULLTEXT_KEY | - /*HA_NOT_EXACT_COUNT | */ - /*HA_KEY_READ_WRONG_STR |*/ HA_DROP_BEFORE_CREATE), - pbracketBase(0),pbracketLimit(0),pfoundKey(0), - cursorId(0) - { - } - ~ha_gemini() {} - const char *table_type() const { return "Gemini"; } - const char **bas_ext() const; - ulong option_flag() const { return int_option_flag; } - uint max_record_length() const { return MAXRECSZ; } - uint max_keys() const { return MAX_KEY-1; } - uint max_key_parts() const { return MAX_REF_PARTS; } - uint max_key_length() const { return MAXKEYSZ / 2; } - bool fast_key_read() { return 1;} - bool has_transactions() { return 1;} - - int open(const char *name, int mode, uint test_if_locked); - int close(void); - double scan_time(); - int write_row(byte * buf); - int update_row(const byte * old_data, byte * new_data); - int delete_row(const byte * buf); - int index_init(uint index); - int index_end(); - int index_read(byte * buf, const byte * key, - uint key_len, enum ha_rkey_function find_flag); - int index_read_idx(byte * buf, uint index, const byte * key, - uint key_len, enum ha_rkey_function find_flag); - int index_next(byte * buf); - int index_next_same(byte * buf, const byte *key, uint keylen); - int index_prev(byte * buf); - int index_first(byte * buf); - int index_last(byte * buf); - int rnd_init(bool scan=1); - int rnd_end(); - int rnd_next(byte *buf); - int rnd_pos(byte * buf, byte *pos); - void position(const byte *record); - void info(uint); - int extra(enum ha_extra_function operation); - int reset(void); - int analyze(THD* thd, HA_CHECK_OPT* check_opt); - int check(THD* thd, HA_CHECK_OPT* check_opt); - int repair(THD* thd, HA_CHECK_OPT* check_opt); - int restore(THD* thd, HA_CHECK_OPT* check_opt); - int backup(THD* thd, HA_CHECK_OPT* check_opt); - int optimize(THD* thd, HA_CHECK_OPT* check_opt); - int external_lock(THD *thd, int lock_type); - virtual longlong get_auto_increment(); - void position(byte *record); - ha_rows records_in_range(int inx, - const byte *start_key,uint start_key_len, - enum ha_rkey_function start_search_flag, - const byte *end_key,uint end_key_len, - enum ha_rkey_function end_search_flag); - void update_create_info(HA_CREATE_INFO *create_info); - int create(const char *name, register TABLE *form, - HA_CREATE_INFO *create_info); - int delete_table(const char *name); - int rename_table(const char* from, const char* to); - THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to, - enum thr_lock_type lock_type); -}; - -#define GEMOPT_FLUSH_LOG 0x00000001 -#define GEMOPT_UNBUFFERED_IO 0x00000002 - -#define GEMINI_RECOVERY_FULL 0x00000001 -#define GEMINI_RECOVERY_NONE 0x00000002 -#define GEMINI_RECOVERY_FORCE 0x00000004 - -#define GEM_OPTID_SPIN_RETRIES 1 - -extern bool gemini_skip; -extern SHOW_COMP_OPTION have_gemini; -extern long gemini_options; -extern long gemini_buffer_cache; -extern long gemini_io_threads; -extern long gemini_log_cluster_size; -extern long gemini_locktablesize; -extern long gemini_lock_wait_timeout; -extern long gemini_spin_retries; -extern long gemini_connection_limit; -extern char *gemini_basedir; -extern TYPELIB gemini_recovery_typelib; -extern ulong gemini_recovery_options; - -bool gemini_init(void); -bool gemini_end(void); -bool gemini_flush_logs(void); -int gemini_commit(THD *thd); -int gemini_rollback(THD *thd); -int gemini_recovery_logging(THD *thd, bool on); -void gemini_disconnect(THD *thd); -int gemini_rollback_to_savepoint(THD *thd); -int gemini_parse_table_name(const char *fullname, char *dbname, char *tabname); -int gemini_is_vst(const char *pname); -int gemini_set_option_long(int optid, long optval); - -const int gemini_blocksize = BLKSIZE; -const int gemini_recbits = DEFAULT_RECBITS; - -extern "C" void uttrace(void); diff --git a/sql/ha_innobase.cc b/sql/ha_innobase.cc index e7d98dbe406..7e11fbe46d1 100644 --- a/sql/ha_innobase.cc +++ b/sql/ha_innobase.cc @@ -161,7 +161,23 @@ convert_error_code_to_mysql( } else if (error == (int) DB_DEADLOCK) { - return(1000000); + return(HA_ERR_LOCK_DEADLOCK); + + } else if (error == (int) DB_LOCK_WAIT_TIMEOUT) { + + return(1000001); + + } else if (error == (int) DB_NO_REFERENCED_ROW) { + + return(1000010); + + } else if (error == (int) DB_ROW_IS_REFERENCED) { + + return(1000011); + + } else if (error == (int) DB_CANNOT_ADD_CONSTRAINT) { + + return(1000012); } else if (error == (int) DB_OUT_OF_FILE_SPACE) { @@ -178,7 +194,6 @@ convert_error_code_to_mysql( } else if (error == (int) DB_TOO_BIG_RECORD) { return(HA_ERR_TO_BIG_ROW); - } else { dbug_assert(0); @@ -220,7 +235,7 @@ innobase_mysql_print_thd( } if (thd->query) { - printf(" %-.100s", thd->query); + printf("\n%-.100s", thd->query); } printf("\n"); @@ -526,9 +541,24 @@ innobase_init(void) { int err; bool ret; + char current_lib[2], *default_path; DBUG_ENTER("innobase_init"); + /* + When using the embedded server, the datadirectory is not + in the current directory. + */ + if (!mysql_embedded) + default_path=mysql_real_data_home; + else + { + /* It's better to use current lib, to keep path's short */ + current_lib[0]=FN_CURLIB; + current_lib[1]=FN_LIBCHAR; + default_path=current_lib; + } + if (specialflag & SPECIAL_NO_PRIOR) { srv_set_thread_priorities = FALSE; } else { @@ -544,10 +574,10 @@ innobase_init(void) MYF(MY_WME)); srv_data_home = (innobase_data_home_dir ? innobase_data_home_dir : - mysql_real_data_home); + default_path); srv_logs_home = (char*) ""; srv_arch_dir = (innobase_log_arch_dir ? innobase_log_arch_dir : - mysql_real_data_home); + default_path); ret = innobase_parse_data_file_paths_and_sizes(); @@ -557,7 +587,7 @@ innobase_init(void) } if (!innobase_log_group_home_dir) - innobase_log_group_home_dir= mysql_real_data_home; + innobase_log_group_home_dir= default_path; ret = innobase_parse_log_group_home_dirs(); if (ret == FALSE) { @@ -586,6 +616,15 @@ innobase_init(void) srv_lock_wait_timeout = (ulint) innobase_lock_wait_timeout; srv_print_verbose_log = mysql_embedded ? 0 : 1; + if (strcmp(default_charset_info->name, "latin1") == 0) { + /* Store the character ordering table to InnoDB. + For non-latin1 charsets we use the MySQL comparison + functions, and consequently we do not need to know + the ordering internally in InnoDB. */ + + memcpy(srv_latin1_ordering, + default_charset_info->sort_order, 256); + } err = innobase_start_or_create_for_mysql(); @@ -636,7 +675,7 @@ innobase_flush_logs(void) DBUG_ENTER("innobase_flush_logs"); - log_make_checkpoint_at(ut_dulint_max, TRUE); + log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP); DBUG_RETURN(result); } @@ -869,10 +908,10 @@ ha_innobase::open( if (NULL == (ib_table = dict_table_get(norm_name, NULL))) { fprintf(stderr, -"Cannot find table %s from the internal data dictionary\n" -"of InnoDB though the .frm file for the table exists. Maybe you have deleted\n" -"and created again an InnoDB database but forgotten to delete the\n" -"corresponding .frm files of old InnoDB tables?\n", +"InnoDB: Cannot find table %s from the internal data dictionary\n" +"InnoDB: of InnoDB though the .frm file for the table exists. Maybe you\n" +"InnoDB: have deleted and recreated InnoDB data files but have forgotten\n" +"InnoDB: to delete the corresponding .frm files of InnoDB tables?\n", norm_name); free_share(share); @@ -1392,8 +1431,36 @@ ha_innobase::write_row( current value and the value supplied by the user, if the auto_inc counter is already initialized for the table */ + + /* We have to use the transactional lock mechanism + on the auto-inc counter of the table to ensure + that replication and roll-forward of the binlog + exactly imitates also the given auto-inc values. + The lock is released at each SQL statement's + end. */ + + error = row_lock_table_autoinc_for_mysql(prebuilt); + + if (error != DB_SUCCESS) { + + error = convert_error_code_to_mysql(error); + goto func_exit; + } + dict_table_autoinc_update(prebuilt->table, auto_inc); } else { + if (!prebuilt->trx->auto_inc_lock) { + + error = row_lock_table_autoinc_for_mysql( + prebuilt); + if (error != DB_SUCCESS) { + + error = convert_error_code_to_mysql( + error); + goto func_exit; + } + } + auto_inc = dict_table_autoinc_get(prebuilt->table); /* If auto_inc is now != 0 the autoinc counter @@ -1451,7 +1518,7 @@ ha_innobase::write_row( /* Tell InnoDB server that there might be work for utility threads: */ - +func_exit: innobase_active_small(); DBUG_RETURN(error); @@ -1728,7 +1795,7 @@ ha_innobase::index_init( } /********************************************************************** -?????????????????????????????????? */ +Currently does nothing. */ int ha_innobase::index_end(void) @@ -2290,6 +2357,15 @@ ha_innobase::external_lock( trx_search_latch_release_if_reserved(trx); } + if (trx->auto_inc_lock) { + + /* If we had reserved the auto-inc lock for + some table in this SQL statement, we release + it now */ + + row_unlock_table_autoinc_for_mysql(trx); + } + if (!(thd->options & (OPTION_NOT_AUTO_COMMIT | OPTION_BEGIN))) { innobase_commit(thd, trx); @@ -2452,7 +2528,9 @@ ha_innobase::create( const char* name, /* in: table name */ TABLE* form, /* in: information on table columns and indexes */ - HA_CREATE_INFO* create_info) /* in: ??????? */ + HA_CREATE_INFO* create_info) /* in: more information of the + created table, contains also the + create statement string */ { int error; dict_table_t* innobase_table; @@ -2543,6 +2621,19 @@ ha_innobase::create( } } + error = row_table_add_foreign_constraints(trx, + create_info->create_statement, norm_name); + + error = convert_error_code_to_mysql(error); + + if (error) { + trx_commit_for_mysql(trx); + + trx_free_for_mysql(trx); + + DBUG_RETURN(error); + } + trx_commit_for_mysql(trx); innobase_table = dict_table_get(norm_name, NULL); @@ -2563,8 +2654,8 @@ ha_innobase::create( Drops a table from an InnoDB database. Before calling this function, MySQL calls innobase_commit to commit the transaction of the current user. Then the current user cannot have locks set on the table. Drop table -operation inside InnoDB will wait sleeping in a loop until no other -user has locks on the table. */ +operation inside InnoDB will remove all locks any user has on the table +inside InnoDB. */ int ha_innobase::delete_table( @@ -2606,6 +2697,53 @@ ha_innobase::delete_table( DBUG_RETURN(error); } +/********************************************************************* +Removes all tables in the named database inside InnoDB. */ + +int +innobase_drop_database( +/*===================*/ + /* out: error number */ + char* path) /* in: database path; inside InnoDB the name + of the last directory in the path is used as + the database name: for example, in 'mysql/data/test' + the database name is 'test' */ +{ + ulint len = 0; + trx_t* trx; + char* ptr; + int error; + char namebuf[10000]; + + ptr = strend(path) - 2; + + while (ptr >= path && *ptr != '\\' && *ptr != '/') { + ptr--; + len++; + } + + ptr++; + + memcpy(namebuf, ptr, len); + namebuf[len] = '/'; + namebuf[len + 1] = '\0'; + + trx = trx_allocate_for_mysql(); + + error = row_drop_database_for_mysql(namebuf, trx); + + /* Tell the InnoDB server that there might be work for + utility threads: */ + + srv_active_wake_master_thread(); + + trx_free_for_mysql(trx); + + error = convert_error_code_to_mysql(error); + + return(error); +} + /************************************************************************* Renames an InnoDB table. */ @@ -2742,12 +2880,13 @@ improve the algorithm of filesort.cc. */ ha_rows ha_innobase::estimate_number_of_rows(void) /*======================================*/ - /* out: upper bound of rows, currently 32-bit int - or uint */ + /* out: upper bound of rows */ { row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; - dict_table_t* ib_table; - + dict_index_t* index; + ulonglong estimate; + ulonglong data_file_length; + if (prebuilt->trx) { prebuilt->trx->op_info = (char*) "estimating upper bound of table size"; @@ -2755,21 +2894,21 @@ ha_innobase::estimate_number_of_rows(void) DBUG_ENTER("info"); - ib_table = prebuilt->table; - - dict_update_statistics(ib_table); - - data_file_length = ((ulonglong) - ib_table->stat_clustered_index_size) - * UNIV_PAGE_SIZE; + dict_update_statistics(prebuilt->table); - /* The minimum clustered index record size is 20 bytes */ + index = dict_table_get_first_index_noninline(prebuilt->table); + + data_file_length = ((ulonglong) index->stat_n_leaf_pages) + * UNIV_PAGE_SIZE; + /* Calculate a minimum length for a clustered index record */ + estimate = data_file_length / dict_index_calc_min_rec_len(index); + if (prebuilt->trx) { prebuilt->trx->op_info = (char*) ""; } - - return((ha_rows) (1000 + data_file_length / 20)); + + return((ha_rows) estimate); } /************************************************************************* @@ -2784,10 +2923,10 @@ ha_innobase::scan_time() { row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; - /* In the following formula we assume that scanning 5 pages + /* In the following formula we assume that scanning 10 pages takes the same time as a disk seek: */ - return((double) (1 + prebuilt->table->stat_clustered_index_size / 5)); + return((double) (prebuilt->table->stat_clustered_index_size / 10)); } /************************************************************************* @@ -2802,8 +2941,9 @@ ha_innobase::info( row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; dict_table_t* ib_table; dict_index_t* index; - uint rec_per_key; - uint i; + ulong rec_per_key; + ulong j; + ulong i; DBUG_ENTER("info"); @@ -2821,7 +2961,7 @@ ha_innobase::info( } if (flag & HA_STATUS_VARIABLE) { - records = ib_table->stat_n_rows; + records = (ha_rows)ib_table->stat_n_rows; deleted = 0; data_file_length = ((ulonglong) ib_table->stat_clustered_index_size) @@ -2847,16 +2987,24 @@ ha_innobase::info( } for (i = 0; i < table->keys; i++) { - if (index->stat_n_diff_key_vals == 0) { - rec_per_key = records; - } else { - rec_per_key = records / - index->stat_n_diff_key_vals; + for (j = 0; j < table->key_info[i].key_parts; j++) { + + if (index->stat_n_diff_key_vals[j + 1] == 0) { + + rec_per_key = records; + } else { + rec_per_key = (ulong)(records / + index->stat_n_diff_key_vals[j + 1]); + } + + if (rec_per_key == 0) { + rec_per_key = 1; + } + + table->key_info[i].rec_per_key[j] + = rec_per_key; } - - table->key_info[i].rec_per_key[ - table->key_info[i].key_parts - 1] - = rec_per_key; + index = dict_table_get_next_index_noninline(index); } } diff --git a/sql/ha_innobase.h b/sql/ha_innobase.h index daa987dd757..3b0144a4fca 100644 --- a/sql/ha_innobase.h +++ b/sql/ha_innobase.h @@ -177,3 +177,5 @@ uint innobase_get_free_space(void); int innobase_commit(THD *thd, void* trx_handle); int innobase_rollback(THD *thd, void* trx_handle); int innobase_close_connection(THD *thd); +int innobase_drop_database(char *path); + diff --git a/sql/handler.cc b/sql/handler.cc index 5a41498aff1..742bcd3aa0a 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -35,9 +35,6 @@ #ifdef HAVE_INNOBASE_DB #include "ha_innobase.h" #endif -#ifdef HAVE_GEMINI_DB -#include "ha_gemini.h" -#endif #include <myisampack.h> #include <errno.h> @@ -81,10 +78,6 @@ enum db_type ha_checktype(enum db_type database_type) case DB_TYPE_INNODB: return(innodb_skip ? DB_TYPE_MYISAM : database_type); #endif -#ifdef HAVE_GEMINI_DB - case DB_TYPE_GEMINI: - return(gemini_skip ? DB_TYPE_MYISAM : database_type); -#endif #ifndef NO_HASH case DB_TYPE_HASH: #endif @@ -123,10 +116,6 @@ handler *get_new_handler(TABLE *table, enum db_type db_type) case DB_TYPE_INNODB: return new ha_innobase(table); #endif -#ifdef HAVE_GEMINI_DB - case DB_TYPE_GEMINI: - return new ha_gemini(table); -#endif case DB_TYPE_HEAP: return new ha_heap(table); case DB_TYPE_MYISAM: @@ -162,17 +151,6 @@ int ha_init() have_innodb=SHOW_OPTION_DISABLED; } #endif -#ifdef HAVE_GEMINI_DB - if (!gemini_skip) - { - if (gemini_init()) - return -1; - if (!gemini_skip) // If we couldn't use handler - opt_using_transactions=1; - else - have_gemini=SHOW_OPTION_DISABLED; - } -#endif return 0; } @@ -200,13 +178,16 @@ int ha_panic(enum ha_panic_function flag) if (!innodb_skip) error|=innobase_end(); #endif -#ifdef HAVE_GEMINI_DB - if (!gemini_skip) - error|=gemini_end(); -#endif return error; } /* ha_panic */ +void ha_drop_database(char* path) +{ +#ifdef HAVE_INNOBASE_DB + if (!innodb_skip) + innobase_drop_database(path); +#endif +} void ha_close_connection(THD* thd) { @@ -214,12 +195,6 @@ void ha_close_connection(THD* thd) if (!innodb_skip) innobase_close_connection(thd); #endif -#ifdef HAVE_GEMINI_DB - if (!gemini_skip && thd->gemini.context) - { - gemini_disconnect(thd); - } -#endif /* HAVE_GEMINI_DB */ } /* @@ -285,20 +260,6 @@ int ha_commit_trans(THD *thd, THD_TRANS* trans) trans->innodb_active_trans=0; } #endif -#ifdef HAVE_GEMINI_DB - /* Commit the transaction in behalf of the commit statement - or if we're in auto-commit mode */ - if((trans == &thd->transaction.all) || - (!(thd->options & (OPTION_NOT_AUTO_COMMIT | OPTION_BEGIN)))) - { - error=gemini_commit(thd); - if (error) - { - my_error(ER_ERROR_DURING_COMMIT, MYF(0), error); - error=1; - } - } -#endif if (error && trans == &thd->transaction.all && mysql_bin_log.is_open()) sql_print_error("Error: Got error during commit; Binlog is not up to date!"); thd->tx_isolation=thd->session_tx_isolation; @@ -337,18 +298,6 @@ int ha_rollback_trans(THD *thd, THD_TRANS *trans) trans->innodb_active_trans=0; } #endif -#ifdef HAVE_GEMINI_DB - if((trans == &thd->transaction.stmt) && - (thd->options & (OPTION_NOT_AUTO_COMMIT | OPTION_BEGIN))) - error = gemini_rollback_to_savepoint(thd); - else - error=gemini_rollback(thd); - if (error) - { - my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), error); - error=1; - } -#endif if (trans == &thd->transaction.all) reinit_io_cache(&thd->transaction.trans_log, WRITE_CACHE, (my_off_t) 0, 0, 1); @@ -359,17 +308,6 @@ int ha_rollback_trans(THD *thd, THD_TRANS *trans) DBUG_RETURN(error); } -void ha_set_spin_retries(uint retries) -{ -#ifdef HAVE_GEMINI_DB - if (!gemini_skip) - { - gemini_set_option_long(GEM_OPTID_SPIN_RETRIES, retries); - } -#endif /* HAVE_GEMINI_DB */ -} - - bool ha_flush_logs() { bool result=0; @@ -751,22 +689,6 @@ int handler::rename_table(const char * from, const char * to) DBUG_RETURN(0); } -int ha_commit_rename(THD *thd) -{ - int error=0; -#ifdef HAVE_GEMINI_DB - /* Gemini needs to commit the rename; otherwise a rollback will change - ** the table names back internally but the physical files will still - ** have the new names. - */ - if (ha_commit_stmt(thd)) - error= -1; - if (ha_commit(thd)) - error= -1; -#endif - return error; -} - /* Tell the handler to turn on or off logging to the handler's recovery log */ @@ -775,14 +697,6 @@ int ha_recovery_logging(THD *thd, bool on) int error=0; DBUG_ENTER("ha_recovery_logging"); -#ifdef USING_TRANSACTIONS - if (opt_using_transactions) - { -#ifdef HAVE_GEMINI_DB - error = gemini_recovery_logging(thd, on); -#endif - } -#endif DBUG_RETURN(error); } diff --git a/sql/handler.h b/sql/handler.h index f0806ea3bea..5d74a1ada7d 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -25,7 +25,7 @@ #define NO_HASH /* Not yet implemented */ #endif -#if defined(HAVE_BERKELEY_DB) || defined(HAVE_INNOBASE_DB) || defined(HAVE_GEMINI_DB) +#if defined(HAVE_BERKELEY_DB) || defined(HAVE_INNOBASE_DB) #define USING_TRANSACTIONS #endif @@ -132,7 +132,6 @@ enum row_type { ROW_TYPE_NOT_USED=-1, ROW_TYPE_DEFAULT, ROW_TYPE_FIXED, typedef struct st_thd_trans { void *bdb_tid; void *innobase_tid; - void *gemini_tid; bool innodb_active_trans; } THD_TRANS; @@ -149,6 +148,7 @@ typedef struct st_ha_create_information ulonglong auto_increment_value; char *comment,*password; char *data_file_name, *index_file_name; + char *create_statement; uint options; /* OR of HA_CREATE_ options */ uint raid_type,raid_chunks; ulong raid_chunksize; @@ -323,17 +323,6 @@ public: enum thr_lock_type lock_type)=0; }; -#ifdef HAVE_GEMINI_DB -struct st_gemini -{ - void *context; - unsigned long savepoint; - bool needSavepoint; - uint tx_isolation; - uint lock_count; -}; -#endif - /* Some extern variables used with handlers */ extern const char *ha_row_type[]; @@ -357,6 +346,7 @@ enum db_type ha_checktype(enum db_type database_type); int ha_create_table(const char *name, HA_CREATE_INFO *create_info, bool update_create_info); int ha_delete_table(enum db_type db_type, const char *path); +void ha_drop_database(char* path); void ha_key_cache(void); int ha_start_stmt(THD *thd); int ha_commit_trans(THD *thd, THD_TRANS *trans); @@ -364,5 +354,4 @@ int ha_rollback_trans(THD *thd, THD_TRANS *trans); int ha_autocommit_or_rollback(THD *thd, int error); void ha_set_spin_retries(uint retries); bool ha_flush_logs(void); -int ha_commit_rename(THD *thd); int ha_recovery_logging(THD *thd, bool on); diff --git a/sql/lex.h b/sql/lex.h index 72d77e18910..8976826fe1e 100644 --- a/sql/lex.h +++ b/sql/lex.h @@ -153,8 +153,6 @@ static SYMBOL symbols[] = { { "FULL", SYM(FULL),0,0}, { "FULLTEXT", SYM(FULLTEXT_SYM),0,0}, { "FUNCTION", SYM(UDF_SYM),0,0}, - { "GEMINI", SYM(GEMINI_SYM),0,0}, - { "GEMINI_SPIN_RETRIES", SYM(GEMINI_SPIN_RETRIES),0,0}, { "GLOBAL", SYM(GLOBAL_SYM),0,0}, { "GRANT", SYM(GRANT),0,0}, { "GRANTS", SYM(GRANTS),0,0}, diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 8c93f4d46e8..ca9c0bae682 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -21,6 +21,7 @@ #include "sql_acl.h" #include "slave.h" #include "sql_repl.h" +#include "repl_failsafe.h" #include "stacktrace.h" #ifdef HAVE_BERKELEY_DB #include "ha_berkeley.h" @@ -28,9 +29,6 @@ #ifdef HAVE_INNOBASE_DB #include "ha_innobase.h" #endif -#ifdef HAVE_GEMINI_DB -#include "ha_gemini.h" -#endif #include "ha_myisam.h" #include <nisam.h> #include <thr_alarm.h> @@ -183,11 +181,6 @@ SHOW_COMP_OPTION have_berkeley_db=SHOW_OPTION_YES; #else SHOW_COMP_OPTION have_berkeley_db=SHOW_OPTION_NO; #endif -#ifdef HAVE_GEMINI_DB -SHOW_COMP_OPTION have_gemini=SHOW_OPTION_YES; -#else -SHOW_COMP_OPTION have_gemini=SHOW_OPTION_NO; -#endif #ifdef HAVE_INNOBASE_DB SHOW_COMP_OPTION have_innodb=SHOW_OPTION_YES; #else @@ -332,9 +325,8 @@ const char *sql_mode_str="OFF"; const char *default_tx_isolation_name; enum_tx_isolation default_tx_isolation=ISO_READ_COMMITTED; -#ifdef HAVE_GEMINI_DB -const char *gemini_recovery_options_str="FULL"; -#endif +uint rpl_recovery_rank=0; + my_string mysql_unix_port=NULL, mysql_tmpdir=NULL, allocated_mysql_tmpdir=NULL; ulong my_bind_addr; /* the address we bind to */ DATE_FORMAT dayord; @@ -1238,9 +1230,6 @@ the thread stack. Please read http://www.mysql.com/doc/L/i/Linux.html\n\n", #ifdef HAVE_STACKTRACE if(!(test_flags & TEST_NO_STACKTRACE)) { -#ifdef HAVE_GEMINI_DB - utrace(); -#endif print_stacktrace(thd ? (gptr) thd->thread_stack : (gptr) 0, thread_stack); } @@ -1696,6 +1685,7 @@ int main(int argc, char **argv) (void) pthread_mutex_init(&LOCK_slave, MY_MUTEX_INIT_FAST); (void) pthread_mutex_init(&LOCK_server_id, MY_MUTEX_INIT_FAST); (void) pthread_mutex_init(&LOCK_user_conn, MY_MUTEX_INIT_FAST); + (void) pthread_mutex_init(&LOCK_rpl_status, MY_MUTEX_INIT_FAST); (void) pthread_cond_init(&COND_thread_count,NULL); (void) pthread_cond_init(&COND_refresh,NULL); (void) pthread_cond_init(&COND_thread_cache,NULL); @@ -1704,6 +1694,7 @@ int main(int argc, char **argv) (void) pthread_cond_init(&COND_binlog_update, NULL); (void) pthread_cond_init(&COND_slave_stopped, NULL); (void) pthread_cond_init(&COND_slave_start, NULL); + (void) pthread_cond_init(&COND_rpl_status, NULL); init_signals(); if (set_default_charset_by_name(default_charset, MYF(MY_WME))) @@ -2598,17 +2589,16 @@ enum options { OPT_INNODB_FLUSH_LOG_AT_TRX_COMMIT, OPT_INNODB_FLUSH_METHOD, OPT_SAFE_SHOW_DB, - OPT_GEMINI_SKIP, OPT_INNODB_SKIP, + OPT_INNODB_SKIP, OPT_SKIP_SAFEMALLOC, OPT_TEMP_POOL, OPT_TX_ISOLATION, - OPT_GEMINI_FLUSH_LOG, OPT_GEMINI_RECOVER, - OPT_GEMINI_UNBUFFERED_IO, OPT_SKIP_SAFEMALLOC, OPT_SKIP_STACK_TRACE, OPT_SKIP_SYMLINKS, OPT_MAX_BINLOG_DUMP_EVENTS, OPT_SPORADIC_BINLOG_DUMP_FAIL, OPT_SAFE_USER_CREATE, OPT_SQL_MODE, OPT_DO_PSTACK, OPT_REPORT_HOST, OPT_REPORT_USER, OPT_REPORT_PASSWORD, OPT_REPORT_PORT, OPT_SHOW_SLAVE_AUTH_INFO, OPT_OLD_RPL_COMPAT, - OPT_SLAVE_LOAD_TMPDIR, OPT_NO_MIX_TYPE + OPT_SLAVE_LOAD_TMPDIR, OPT_NO_MIX_TYPE, + OPT_RPL_RECOVERY_RANK,OPT_INIT_RPL_ROLE }; static struct option long_options[] = { @@ -2644,11 +2634,7 @@ static struct option long_options[] = { {"enable-pstack", no_argument, 0, (int) OPT_DO_PSTACK}, {"exit-info", optional_argument, 0, 'T'}, {"flush", no_argument, 0, (int) OPT_FLUSH}, -#ifdef HAVE_GEMINI_DB - {"gemini-flush-log-at-commit",no_argument, 0, (int) OPT_GEMINI_FLUSH_LOG}, - {"gemini-recovery", required_argument, 0, (int) OPT_GEMINI_RECOVER}, - {"gemini-unbuffered-io", no_argument, 0, (int) OPT_GEMINI_UNBUFFERED_IO}, -#endif + {"init-rpl-role", required_argument, 0, (int) OPT_INIT_RPL_ROLE}, /* We must always support this option to make scripts like mysqltest easier to do */ {"innodb_data_file_path", required_argument, 0, @@ -2733,6 +2719,7 @@ static struct option long_options[] = { {"report-user", required_argument, 0, (int) OPT_REPORT_USER}, {"report-password", required_argument, 0, (int) OPT_REPORT_PASSWORD}, {"report-port", required_argument, 0, (int) OPT_REPORT_PORT}, + {"rpl-recovery-rank", required_argument, 0, (int) OPT_RPL_RECOVERY_RANK}, {"safe-mode", no_argument, 0, (int) OPT_SAFE}, {"safe-show-database", no_argument, 0, (int) OPT_SAFE_SHOW_DB}, {"safe-user-create", no_argument, 0, (int) OPT_SAFE_USER_CREATE}, @@ -2742,7 +2729,6 @@ static struct option long_options[] = { (int) OPT_SHOW_SLAVE_AUTH_INFO}, {"skip-bdb", no_argument, 0, (int) OPT_BDB_SKIP}, {"skip-innodb", no_argument, 0, (int) OPT_INNODB_SKIP}, - {"skip-gemini", no_argument, 0, (int) OPT_GEMINI_SKIP}, {"skip-concurrent-insert", no_argument, 0, (int) OPT_SKIP_CONCURRENT_INSERT}, {"skip-delay-key-write", no_argument, 0, (int) OPT_SKIP_DELAY_KEY_WRITE}, {"skip-grant-tables", no_argument, 0, (int) OPT_SKIP_GRANT}, @@ -2810,22 +2796,6 @@ CHANGEABLE_VAR changeable_vars[] = { HA_FT_MAXLEN, 10, HA_FT_MAXLEN, 0, 1 }, { "ft_max_word_len_for_sort",(long*) &ft_max_word_len_for_sort, 20, 4, HA_FT_MAXLEN, 0, 1 }, -#ifdef HAVE_GEMINI_DB - { "gemini_buffer_cache", (long*) &gemini_buffer_cache, - 128 * 8192, 16, LONG_MAX, 0, 1 }, - { "gemini_connection_limit", (long*) &gemini_connection_limit, - 100, 10, LONG_MAX, 0, 1 }, - { "gemini_io_threads", (long*) &gemini_io_threads, - 2, 0, 256, 0, 1 }, - { "gemini_log_cluster_size", (long*) &gemini_log_cluster_size, - 256 * 1024, 16 * 1024, LONG_MAX, 0, 1 }, - { "gemini_lock_table_size", (long*) &gemini_locktablesize, - 4096, 1024, LONG_MAX, 0, 1 }, - { "gemini_lock_wait_timeout",(long*) &gemini_lock_wait_timeout, - 10, 1, LONG_MAX, 0, 1 }, - { "gemini_spin_retries", (long*) &gemini_spin_retries, - 1, 0, LONG_MAX, 0, 1 }, -#endif #ifdef HAVE_INNOBASE_DB {"innodb_mirrored_log_groups", (long*) &innobase_mirrored_log_groups, 1, 1, 10, 0, 1}, @@ -2961,18 +2931,7 @@ struct show_var_st init_vars[]= { {"ft_min_word_len", (char*) &ft_min_word_len, SHOW_LONG}, {"ft_max_word_len", (char*) &ft_max_word_len, SHOW_LONG}, {"ft_max_word_len_for_sort",(char*) &ft_max_word_len_for_sort, SHOW_LONG}, -#ifdef HAVE_GEMINI_DB - {"gemini_buffer_cache", (char*) &gemini_buffer_cache, SHOW_LONG}, - {"gemini_connection_limit", (char*) &gemini_connection_limit, SHOW_LONG}, - {"gemini_io_threads", (char*) &gemini_io_threads, SHOW_LONG}, - {"gemini_log_cluster_size", (char*) &gemini_log_cluster_size, SHOW_LONG}, - {"gemini_lock_table_size", (char*) &gemini_locktablesize, SHOW_LONG}, - {"gemini_lock_wait_timeout",(char*) &gemini_lock_wait_timeout, SHOW_LONG}, - {"gemini_recovery_options", (char*) &gemini_recovery_options_str, SHOW_CHAR_PTR}, - {"gemini_spin_retries", (char*) &gemini_spin_retries, SHOW_LONG}, -#endif {"have_bdb", (char*) &have_berkeley_db, SHOW_HAVE}, - {"have_gemini", (char*) &have_gemini, SHOW_HAVE}, {"have_innodb", (char*) &have_innodb, SHOW_HAVE}, {"have_isam", (char*) &have_isam, SHOW_HAVE}, {"have_raid", (char*) &have_raid, SHOW_HAVE}, @@ -3032,6 +2991,7 @@ struct show_var_st init_vars[]= { {"protocol_version", (char*) &protocol_version, SHOW_INT}, {"record_buffer", (char*) &my_default_record_cache_size,SHOW_LONG}, {"record_rnd_buffer", (char*) &record_rnd_cache_size, SHOW_LONG}, + {"rpl_recovery_rank", (char*) &rpl_recovery_rank, SHOW_LONG}, {"query_buffer_size", (char*) &query_buff_size, SHOW_LONG}, {"safe_show_database", (char*) &opt_safe_show_db, SHOW_BOOL}, {"server_id", (char*) &server_id, SHOW_LONG}, @@ -3096,6 +3056,8 @@ struct show_var_st status_vars[]= { {"Open_streams", (char*) &my_stream_opened, SHOW_INT_CONST}, {"Opened_tables", (char*) &opened_tables, SHOW_LONG}, {"Questions", (char*) 0, SHOW_QUESTION}, + {"Rpl_status", (char*) 0, + SHOW_RPL_STATUS}, {"Select_full_join", (char*) &select_full_join_count, SHOW_LONG}, {"Select_full_range_join", (char*) &select_full_range_join_count, SHOW_LONG}, {"Select_range", (char*) &select_range_count, SHOW_LONG}, @@ -3288,16 +3250,6 @@ static void usage(void) --skip-bdb Don't use berkeley db (will save memory)\n\ "); #endif /* HAVE_BERKELEY_DB */ -#ifdef HAVE_GEMINI_DB - puts("\ - --gemini-recovery=mode Set Crash Recovery operating mode\n\ - (FULL, NONE, FORCE - default FULL)\n\ - --gemini-flush-log-at-commit\n\ - Every commit forces a write to the reovery log\n\ - --gemini-unbuffered-io Use unbuffered i/o\n\ - --skip-gemini Don't use gemini (will save memory)\n\ -"); -#endif #ifdef HAVE_INNOBASE_DB puts("\ --innodb_data_home_dir=dir The common part for Innodb table spaces\n\ @@ -3452,6 +3404,9 @@ static void get_options(int argc,char **argv) safemalloc_mem_limit = atoi(optarg); #endif break; + case OPT_RPL_RECOVERY_RANK: + rpl_recovery_rank=atoi(optarg); + break; case OPT_SLAVE_LOAD_TMPDIR: slave_load_tmpdir = my_strdup(optarg, MYF(MY_FAE)); break; @@ -3540,6 +3495,17 @@ static void get_options(int argc,char **argv) opt_log_slave_updates = 1; break; + case (int) OPT_INIT_RPL_ROLE: + { + int role; + if ((role=find_type(optarg, &rpl_role_typelib, 2)) <= 0) + { + fprintf(stderr, "Unknown replication role: %s\n", optarg); + exit(1); + } + rpl_status = (role == 1) ? RPL_AUTH_MASTER : RPL_IDLE_SLAVE; + break; + } case (int)OPT_REPLICATE_IGNORE_DB: { i_string *db = new i_string(optarg); @@ -3871,27 +3837,6 @@ static void get_options(int argc,char **argv) have_berkeley_db=SHOW_OPTION_DISABLED; #endif break; - case OPT_GEMINI_SKIP: -#ifdef HAVE_GEMINI_DB - gemini_skip=1; - have_gemini=SHOW_OPTION_DISABLED; - break; - case OPT_GEMINI_RECOVER: - gemini_recovery_options_str=optarg; - if ((gemini_recovery_options= - find_bit_type(optarg, &gemini_recovery_typelib)) == ~(ulong) 0) - { - fprintf(stderr, "Unknown option to gemini-recovery: %s\n",optarg); - exit(1); - } - break; - case OPT_GEMINI_FLUSH_LOG: - gemini_options |= GEMOPT_FLUSH_LOG; - break; - case OPT_GEMINI_UNBUFFERED_IO: - gemini_options |= GEMOPT_UNBUFFERED_IO; -#endif - break; case OPT_INNODB_SKIP: #ifdef HAVE_INNOBASE_DB innodb_skip=1; @@ -4438,68 +4383,6 @@ static int get_service_parameters() { SET_CHANGEABLE_VARVAL( "thread_concurrency" ); } -#ifdef HAVE_GEMINI_DB - else if ( lstrcmp(szKeyValueName, TEXT("GeminiLazyCommit")) == 0 ) - { - CHECK_KEY_TYPE( REG_DWORD, szKeyValueName ); - if ( *lpdwValue ) - gemini_options |= GEMOPT_FLUSH_LOG; - else - gemini_options &= ~GEMOPT_FLUSH_LOG; - } - else if ( lstrcmp(szKeyValueName, TEXT("GeminiFullRecovery")) == 0 ) - { - CHECK_KEY_TYPE( REG_DWORD, szKeyValueName ); - if ( *lpdwValue ) - gemini_options &= ~GEMOPT_NO_CRASH_PROTECTION; - else - gemini_options |= GEMOPT_NO_CRASH_PROTECTION; - } - else if ( lstrcmp(szKeyValueName, TEXT("GeminiNoRecovery")) == 0 ) - { - CHECK_KEY_TYPE( REG_DWORD, szKeyValueName ); - if ( *lpdwValue ) - gemini_options |= GEMOPT_NO_CRASH_PROTECTION; - else - gemini_options &= ~GEMOPT_NO_CRASH_PROTECTION; - } - else if ( lstrcmp(szKeyValueName, TEXT("GeminiUnbufferedIO")) == 0 ) - { - CHECK_KEY_TYPE( REG_DWORD, szKeyValueName ); - if ( *lpdwValue ) - gemini_options |= GEMOPT_UNBUFFERED_IO; - else - gemini_options &= ~GEMOPT_UNBUFFERED_IO; - } - else if ( lstrcmp(szKeyValueName, TEXT("GeminiLockTableSize")) == 0 ) - { - SET_CHANGEABLE_VARVAL( "gemini_lock_table_size" ); - } - else if ( lstrcmp(szKeyValueName, TEXT("GeminiBufferCache")) == 0 ) - { - SET_CHANGEABLE_VARVAL( "gemini_buffer_cache" ); - } - else if ( lstrcmp(szKeyValueName, TEXT("GeminiSpinRetries")) == 0 ) - { - SET_CHANGEABLE_VARVAL( "gemini_spin_retries" ); - } - else if ( lstrcmp(szKeyValueName, TEXT("GeminiIoThreads")) == 0 ) - { - SET_CHANGEABLE_VARVAL( "gemini_io_threads" ); - } - else if ( lstrcmp(szKeyValueName, TEXT("GeminiConnectionLimit")) == 0 ) - { - SET_CHANGEABLE_VARVAL( "gemini_connection_limit" ); - } - else if ( lstrcmp(szKeyValueName, TEXT("GeminiLogClusterSize")) == 0 ) - { - SET_CHANGEABLE_VARVAL( "gemini_log_cluster_size" ); - } - else if ( lstrcmp(szKeyValueName, TEXT("GeminiLockWaitTimeout")) == 0 ) - { - SET_CHANGEABLE_VARVAL( "gemini_lock_wait_timeout" ); - } -#endif else { TCHAR szErrorMsg [ 512 ]; diff --git a/sql/repl_failsafe.cc b/sql/repl_failsafe.cc new file mode 100644 index 00000000000..bdd63bd9a10 --- /dev/null +++ b/sql/repl_failsafe.cc @@ -0,0 +1,36 @@ +/* Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB & Sasha + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +// Sasha Pachev <sasha@mysql.com> is currently in charge of this file + +#include "mysql_priv.h" +#include "repl_failsafe.h" + +RPL_STATUS rpl_status=RPL_NULL; +pthread_mutex_t LOCK_rpl_status; +pthread_cond_t COND_rpl_status; + +const char *rpl_role_type[] = {"MASTER","SLAVE",NullS}; +TYPELIB rpl_role_typelib = {array_elements(rpl_role_type)-1,"", + rpl_role_type}; + +const char* rpl_status_type[] = {"AUTH_MASTER","ACTIVE_SLAVE","IDLE_SLAVE", + "LOST_SOLDIER","TROOP_SOLDIER", + "RECOVERY_CAPTAIN","NULL",NullS}; +TYPELIB rpl_status_typelib= {array_elements(rpl_status_type)-1,"", + rpl_status_type}; + + diff --git a/sql/repl_failsafe.h b/sql/repl_failsafe.h new file mode 100644 index 00000000000..95069404acb --- /dev/null +++ b/sql/repl_failsafe.h @@ -0,0 +1,13 @@ +#ifndef REPL_FAILSAFE_H +#define REPL_FAILSAFE_H + +typedef enum {RPL_AUTH_MASTER=0,RPL_ACTIVE_SLAVE,RPL_IDLE_SLAVE, + RPL_LOST_SOLDIER,RPL_TROOP_SOLDIER, + RPL_RECOVERY_CAPTAIN,RPL_NULL} RPL_STATUS; +extern RPL_STATUS rpl_status; + +extern pthread_mutex_t LOCK_rpl_status; +extern pthread_cond_t COND_rpl_status; +extern TYPELIB rpl_role_typelib, rpl_status_typelib; +extern const char* rpl_role_type[], *rpl_status_type[]; +#endif diff --git a/sql/sql_base.cc b/sql/sql_base.cc index 595bee99908..d16998e1581 100644 --- a/sql/sql_base.cc +++ b/sql/sql_base.cc @@ -1390,11 +1390,7 @@ TABLE *open_ltable(THD *thd, TABLE_LIST *table_list, thr_lock_type lock_type) #if defined( __WIN__) || defined(OS2) /* Win32 can't drop a file that is open */ - if (lock_type == TL_WRITE_ALLOW_READ -#ifdef HAVE_GEMINI_DB - && table->db_type != DB_TYPE_GEMINI -#endif /* HAVE_GEMINI_DB */ - ) + if (lock_type == TL_WRITE_ALLOW_READ) { lock_type= TL_WRITE; } diff --git a/sql/sql_class.cc b/sql/sql_class.cc index b77166d0bc0..006e7364b3b 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -93,7 +93,6 @@ THD::THD():user_time(0),fatal_error(0),last_insert_id_used(0), tmp_table=0; lock=locked_tables=0; used_tables=0; - gemini_spin_retries=0; cuted_fields=sent_row_count=0L; start_time=(time_t) 0; current_linfo = 0; @@ -110,9 +109,6 @@ THD::THD():user_time(0),fatal_error(0),last_insert_id_used(0), #ifdef __WIN__ real_id = 0; #endif -#ifdef HAVE_GEMINI_DB - bzero((char *)&gemini, sizeof(gemini)); -#endif #ifdef SIGNAL_WITH_VIO_CLOSE active_vio = 0; pthread_mutex_init(&active_vio_lock, MY_MUTEX_INIT_FAST); diff --git a/sql/sql_class.h b/sql/sql_class.h index b34b97b29a0..f2e174d85c1 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -262,9 +262,6 @@ public: THD_TRANS stmt; /* Trans for current statement */ uint bdb_lock_count; } transaction; -#ifdef HAVE_GEMINI_DB - struct st_gemini gemini; -#endif Item *free_list, *handler_items; CONVERT *convert_set; Field *dupp_field; @@ -280,7 +277,6 @@ public: max_join_size, sent_row_count, examined_row_count; table_map used_tables; ulong query_id,version, inactive_timeout,options,thread_id; - ulong gemini_spin_retries; long dbug_thread_id; pthread_t real_id; uint current_tablenr,tmp_table,cond_count,col_access,query_length; @@ -374,12 +370,10 @@ public: { #ifdef USING_TRANSACTIONS return (transaction.all.bdb_tid != 0 || - transaction.all.innodb_active_trans != 0 || - transaction.all.gemini_tid != 0); + transaction.all.innodb_active_trans != 0); #else return 0; #endif - } inline gptr alloc(unsigned int size) { return alloc_root(&mem_root,size); } inline gptr calloc(unsigned int size) diff --git a/sql/sql_db.cc b/sql/sql_db.cc index 05b380ebd77..64ad205803e 100644 --- a/sql/sql_db.cc +++ b/sql/sql_db.cc @@ -158,6 +158,7 @@ int mysql_rm_db(THD *thd,char *db,bool if_exists, bool silent) error = -1; if ((deleted=mysql_rm_known_files(thd, dirp, db, path,0)) >= 0 && thd) { + ha_drop_database(path); if (!silent) { if (!thd->query) @@ -333,7 +334,7 @@ bool mysql_change_db(THD *thd,const char *name) x_free(dbname); DBUG_RETURN(1); } - DBUG_PRINT("general",("Use database: %s", dbname)); + DBUG_PRINT("info",("Use database: %s", dbname)); if (test_all_bits(thd->master_access,DB_ACLS)) db_access=DB_ACLS; else diff --git a/sql/sql_lex.h b/sql/sql_lex.h index 6ccb0a6b059..83652d1f818 100644 --- a/sql/sql_lex.h +++ b/sql/sql_lex.h @@ -174,7 +174,6 @@ typedef struct st_lex { HA_CREATE_INFO create_info; LEX_MASTER_INFO mi; // used by CHANGE MASTER ulong thread_id,type; - ulong gemini_spin_retries; enum_sql_command sql_command; enum lex_states next_state; enum enum_duplicates duplicates; diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index cdf6cb8e7f1..b83a50aec16 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -120,7 +120,7 @@ static bool check_user(THD *thd,enum_server_command command, const char *user, protocol_version == 9 || !(thd->client_capabilities & CLIENT_LONG_PASSWORD)); - DBUG_PRINT("general", + DBUG_PRINT("info", ("Capabilities: %d packet_length: %d Host: '%s' User: '%s' Using password: %s Access: %u db: '%s'", thd->client_capabilities, thd->max_packet_length, thd->host_or_ip, thd->priv_user, @@ -323,7 +323,7 @@ check_connections(THD *thd) */ DBUG_PRINT("info", (("check_connections called by thread %d"), thd->thread_id)); - DBUG_PRINT("general",("New connection received on %s", + DBUG_PRINT("info",("New connection received on %s", vio_description(net->vio))); if (!thd->host) // If TCP/IP connection { @@ -347,15 +347,15 @@ check_connections(THD *thd) if (connect_errors > max_connect_errors) return(ER_HOST_IS_BLOCKED); } - DBUG_PRINT("general",("Host: %s ip: %s", - thd->host ? thd->host : "unknown host", - thd->ip ? thd->ip : "unknown ip")); + DBUG_PRINT("info",("Host: %s ip: %s", + thd->host ? thd->host : "unknown host", + thd->ip ? thd->ip : "unknown ip")); if (acl_check_host(thd->host,thd->ip)) return(ER_HOST_NOT_PRIVILEGED); } else /* Hostname given means that the connection was on a socket */ { - DBUG_PRINT("general",("Host: %s",thd->host)); + DBUG_PRINT("info",("Host: %s",thd->host)); thd->host_or_ip=thd->host; thd->ip=0; bzero((char*) &thd->remote,sizeof(struct sockaddr)); @@ -731,17 +731,17 @@ bool do_command(THD *thd) net_new_transaction(net); if ((packet_length=my_net_read(net)) == packet_error) { - DBUG_PRINT("general",("Got error reading command from socket %s", - vio_description(net->vio) )); + DBUG_PRINT("info",("Got error reading command from socket %s", + vio_description(net->vio) )); return TRUE; } else { packet=(char*) net->read_pos; command = (enum enum_server_command) (uchar) packet[0]; - DBUG_PRINT("general",("Command on %s = %d (%s)", - vio_description(net->vio), command, - command_name[command])); + DBUG_PRINT("info",("Command on %s = %d (%s)", + vio_description(net->vio), command, + command_name[command])); } net->timeout=old_timeout; // Timeout for writing DBUG_RETURN(dispatch_command(command,thd, packet+1, packet_length)); @@ -1897,11 +1897,6 @@ mysql_execute_command(void) TL_WRITE_LOW_PRIORITY : TL_WRITE); thd->default_select_limit=select_lex->select_limit; thd->tx_isolation=lex->tx_isolation; - if (thd->gemini_spin_retries != lex->gemini_spin_retries) - { - thd->gemini_spin_retries= lex->gemini_spin_retries; - ha_set_spin_retries(thd->gemini_spin_retries); - } DBUG_PRINT("info",("options: %ld limit: %ld", thd->options,(long) thd->default_select_limit)); diff --git a/sql/sql_rename.cc b/sql/sql_rename.cc index 0f6e2f9fbf3..7a6652953cc 100644 --- a/sql/sql_rename.cc +++ b/sql/sql_rename.cc @@ -31,7 +31,7 @@ static TABLE_LIST *rename_tables(THD *thd, TABLE_LIST *table_list, bool mysql_rename_tables(THD *thd, TABLE_LIST *table_list) { - bool error=1,cerror,got_all_locks=1; + bool error=1,got_all_locks=1; TABLE_LIST *lock_table,*ren_table=0; DBUG_ENTER("mysql_rename_tables"); @@ -87,12 +87,7 @@ end: } /* Lets hope this doesn't fail as the result will be messy */ - if ((cerror=ha_commit_rename(thd))) - { - my_error(ER_GET_ERRNO,MYF(0),cerror); - error= 1; - } - else if (!error) + if (!error) { mysql_update_log.write(thd,thd->query,thd->query_length); if (mysql_bin_log.is_open()) diff --git a/sql/sql_show.cc b/sql/sql_show.cc index 5a9777e24a4..19c3d89caaf 100644 --- a/sql/sql_show.cc +++ b/sql/sql_show.cc @@ -20,6 +20,7 @@ #include "mysql_priv.h" #include "sql_select.h" // For select_describe #include "sql_acl.h" +#include "repl_failsafe.h" #include <my_dir.h> #ifdef HAVE_BERKELEY_DB @@ -1164,6 +1165,9 @@ int mysqld_show(THD *thd, const char *wild, show_var_st *variables) case SHOW_QUESTION: net_store_data(&packet2,(uint32) thd->query_id); break; + case SHOW_RPL_STATUS: + net_store_data(&packet2, rpl_status_type[(int)rpl_status]); + break; case SHOW_OPENTABLES: net_store_data(&packet2,(uint32) cached_tables()); break; diff --git a/sql/sql_table.cc b/sql/sql_table.cc index 99c2b837480..d76c6bbd627 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -598,6 +598,7 @@ int mysql_create_table(THD *thd,const char *db, const char *table_name, thd->proc_info="creating table"; + create_info->create_statement = thd->query; create_info->table_options=db_options; if (rea_create_table(path, create_info, fields, key_count, key_info_buffer)) @@ -1192,12 +1193,6 @@ int mysql_alter_table(THD *thd,char *new_db, char *new_name, if (mysql_rename_table(old_db_type,db,table_name,new_db,new_name)) error= -1; } - if (!error && (error=ha_commit_rename(thd))) - { - my_error(ER_GET_ERRNO,MYF(0),error); - error=1; - } - VOID(pthread_cond_broadcast(&COND_refresh)); VOID(pthread_mutex_unlock(&LOCK_open)); } @@ -1704,7 +1699,6 @@ end_temporary: DBUG_RETURN(0); err: - (void) ha_commit_rename(thd); // Just for safety DBUG_RETURN(-1); } diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index 7d084f5878e..7f35886f52d 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -185,8 +185,6 @@ bool my_yyoverflow(short **a, YYSTYPE **b,int *yystacksize); %token FROM %token FULL %token FULLTEXT_SYM -%token GEMINI_SYM -%token GEMINI_SPIN_RETRIES %token GLOBAL_SYM %token GRANT %token GRANTS @@ -812,7 +810,6 @@ table_types: | HEAP_SYM { $$= DB_TYPE_HEAP; } | BERKELEY_DB_SYM { $$= DB_TYPE_BERKELEY_DB; } | INNOBASE_SYM { $$= DB_TYPE_INNODB; } - | GEMINI_SYM { $$= DB_TYPE_GEMINI; } row_types: DEFAULT { $$= ROW_TYPE_DEFAULT; } @@ -2862,7 +2859,6 @@ keyword: | FIXED_SYM {} | FLUSH_SYM {} | GRANTS {} - | GEMINI_SYM {} | GLOBAL_SYM {} | HEAP_SYM {} | HANDLER_SYM {} @@ -2957,7 +2953,6 @@ set: lex->sql_command= SQLCOM_SET_OPTION; lex->select->options=lex->thd->options; lex->select->select_limit=lex->thd->default_select_limit; - lex->gemini_spin_retries=lex->thd->gemini_spin_retries; lex->tx_isolation=lex->thd->tx_isolation; lex->option_type=0; lex->option_list.empty() @@ -3026,14 +3021,6 @@ option_value: { current_thd->next_insert_id=$3; } - | GEMINI_SPIN_RETRIES equal ULONG_NUM - { - Lex->gemini_spin_retries= $3; - } - | GEMINI_SPIN_RETRIES equal DEFAULT - { - Lex->gemini_spin_retries= 1; - } | CHAR_SYM SET IDENT { CONVERT *tmp; diff --git a/sql/structs.h b/sql/structs.h index 2f6f850bc9e..439384c7191 100644 --- a/sql/structs.h +++ b/sql/structs.h @@ -140,6 +140,7 @@ enum SHOW_TYPE { SHOW_LONG,SHOW_CHAR,SHOW_INT,SHOW_CHAR_PTR,SHOW_BOOL, ,SHOW_SSL_CTX_SESS_TIMEOUTS, SHOW_SSL_CTX_SESS_CACHE_FULL ,SHOW_SSL_GET_CIPHER_LIST #endif /* HAVE_OPENSSL */ + ,SHOW_RPL_STATUS }; enum SHOW_COMP_OPTION { SHOW_OPTION_YES, SHOW_OPTION_NO, SHOW_OPTION_DISABLED}; diff --git a/sql/table.cc b/sql/table.cc index 1ed856f7854..927119f45de 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -261,7 +261,7 @@ int openfrm(const char *name, const char *alias, uint db_stat, uint prgflag, outparam->comment=strdup_root(&outparam->mem_root, (char*) head+47); - DBUG_PRINT("form",("i_count: %d i_parts: %d index: %d n_length: %d int_length: %d", interval_count,interval_parts, outparam->keys,n_length,int_length)); + DBUG_PRINT("info",("i_count: %d i_parts: %d index: %d n_length: %d int_length: %d", interval_count,interval_parts, outparam->keys,n_length,int_length)); if (!(field_ptr = (Field **) alloc_root(&outparam->mem_root, diff --git a/support-files/mysql.spec.sh b/support-files/mysql.spec.sh index e62b8e84f9b..95d42fdc24d 100644 --- a/support-files/mysql.spec.sh +++ b/support-files/mysql.spec.sh @@ -148,7 +148,7 @@ Summary(pt_BR): MySQL - Medições de desempenho Group(pt_BR): Aplicações/Banco_de_Dados Obsoletes: embedded -%description devel +%description embedded This package contains the MySQL server as library. %{see_base} @@ -360,7 +360,7 @@ fi %attr(755, root, root) /usr/bin/mysqlbug %attr(755, root, root) /usr/bin/mysqltest %attr(755, root, root) /usr/bin/mysqlhotcopy -%attr(755, root, root) /usr/bin/mysql_explain +%attr(755, root, root) /usr/bin/mysql_explain_log %attr(755, root, root) /usr/bin/perror %attr(755, root, root) /usr/bin/replace %attr(755, root, root) /usr/bin/resolveip @@ -423,6 +423,9 @@ fi %files bench %attr(-, root, root) /usr/share/sql-bench %attr(-, root, root) /usr/share/mysql-test +%attr(755, root, root) /usr/bin/mysqlmanager +%attr(755, root, root) /usr/bin/mysqlmanager-pwgen +%attr(755, root, root) /usr/bin/mysqlmanagerc %files Max %attr(755, root, root) /usr/sbin/mysqld-max diff --git a/tools/mysqlmanager.c b/tools/mysqlmanager.c index 4a3981a890e..f2f89c6bc07 100644 --- a/tools/mysqlmanager.c +++ b/tools/mysqlmanager.c @@ -21,27 +21,25 @@ **/ #include <my_global.h> -#include <my_sys.h> -#include <m_string.h> +#include <my_pthread.h> #include <mysql.h> #include <mysql_version.h> -#include <m_ctype.h> -#include <my_config.h> +#include <mysqld_error.h> +#include <my_sys.h> #include <my_dir.h> +#include <m_string.h> +#include <m_ctype.h> #include <hash.h> -#include <mysqld_error.h> -#include <stdio.h> -#include <stdlib.h> #include <getopt.h> #include <stdarg.h> #include <sys/stat.h> -#include <sys/types.h> -#include <sys/wait.h> -#include <unistd.h> #include <errno.h> #include <violite.h> -#include <my_pthread.h> #include <md5.h> +#include <signal.h> +#ifdef HAVE_SYS_WAIT_H +#include <sys/wait.h> +#endif #define MANAGER_VERSION "1.0" #define MANAGER_GREETING "MySQL Server Management Daemon v. 1.0" @@ -299,9 +297,8 @@ static int authenticate(struct manager_thd* thd); static char* read_line(struct manager_thd* thd); /* returns pointer to end of line */ -static pthread_handler_decl(process_connection,arg); -static pthread_handler_decl(process_launcher_messages, - __attribute__((unused)) arg); +static pthread_handler_decl(process_connection, arg); +static pthread_handler_decl(process_launcher_messages, arg); static int exec_line(struct manager_thd* thd,char* buf,char* buf_end); #ifdef DO_STACKTRACE @@ -1026,7 +1023,8 @@ static void log_msg(const char* fmt, int msg_type, va_list args) pthread_mutex_unlock(&lock_log); } -#define LOG_MSG_FUNC(type,TYPE) inline static void type \ +/* No 'inline' here becasue functions with ... can't do that portable */ +#define LOG_MSG_FUNC(type,TYPE) static void type \ (const char* fmt,...) { \ va_list args; \ va_start(args,fmt); \ @@ -1040,7 +1038,7 @@ LOG_MSG_FUNC(log_info,LOG_INFO) #ifndef DBUG_OFF LOG_MSG_FUNC(log_debug,LOG_DEBUG) #else -inline void log_debug(char* __attribute__((unused)) fmt,...) {} +void log_debug(const char* __attribute__((unused)) fmt,...) {} #endif static pthread_handler_decl(process_launcher_messages, @@ -1065,7 +1063,7 @@ static pthread_handler_decl(process_launcher_messages, char* ident=buf+1; int ident_len=strlen(ident); memcpy(&pid,ident+ident_len+1,sizeof(pid)); - log_debug("process message - ident=%s,ident_len=%d,pid=%d",ident, + log_debug("process message - ident=%s ident_len=%d pid=%d",ident, ident_len,pid); pthread_mutex_lock(&lock_exec_hash); log_debug("hash has %d records",exec_hash.records); @@ -1369,6 +1367,12 @@ static int run_server_loop() int client_sock; uint len; Vio* vio; + pthread_attr_t thr_attr; + (void) pthread_attr_init(&thr_attr); +#if !defined(HAVE_DEC_3_2_THREADS) + pthread_attr_setscope(&thr_attr,PTHREAD_SCOPE_SYSTEM); + (void) pthread_attr_setdetachstate(&thr_attr,PTHREAD_CREATE_DETACHED); +#endif for (;!shutdown_requested;) { @@ -1414,7 +1418,7 @@ static int run_server_loop() manager_thd_free(thd); continue; } - else if (pthread_create(&th,0,process_connection,(void*)thd)) + else if (pthread_create(&th,&thr_attr,process_connection,(void*)thd)) { client_msg(vio,MANAGER_INTERNAL_ERR,"Could not create thread, errno=%d", errno); @@ -1422,6 +1426,7 @@ static int run_server_loop() continue; } } + (void) pthread_attr_destroy(&thr_attr); return 0; } @@ -1545,10 +1550,11 @@ static struct manager_exec* manager_exec_new(char* arg_start,char* arg_end) tmp->error="Too few arguments"; return tmp; } - tmp->data_buf=(char*)tmp+sizeof(*tmp); + /* We have to allocate 'args' first as this must be alligned */ + tmp->args=(char**)(tmp +1); + tmp->data_buf= (char*) (tmp->args + num_args); memcpy(tmp->data_buf,arg_start,arg_len); tmp->data_buf_size=arg_len; - tmp->args=(char**)(tmp->data_buf+arg_len); tmp->num_args=num_args; tmp->ident=tmp->data_buf; tmp->ident_len=strlen(tmp->ident); @@ -1660,13 +1666,20 @@ static void init_user_hash() static void init_globals() { + pthread_attr_t thr_attr; if (hash_init(&exec_hash,1024,0,0,get_exec_key,manager_exec_free,MYF(0))) die("Exec hash initialization failed"); if (!one_thread) { + (void) pthread_attr_init(&thr_attr); +#if !defined(HAVE_DEC_3_2_THREADS) + pthread_attr_setscope(&thr_attr,PTHREAD_SCOPE_SYSTEM); + (void) pthread_attr_setdetachstate(&thr_attr,PTHREAD_CREATE_DETACHED); +#endif fork_launcher(); - if (pthread_create(&launch_msg_th,0,process_launcher_messages,0)) + if (pthread_create(&launch_msg_th,&thr_attr,process_launcher_messages,0)) die("Could not start launcher message handler thread"); + /* (void) pthread_attr_destroy(&thr_attr); */ } init_user_hash(); loop_th=pthread_self(); diff --git a/vio/viossl.c b/vio/viossl.c index e4f2d0a5c9f..19fd6e0f0ab 100644 --- a/vio/viossl.c +++ b/vio/viossl.c @@ -58,7 +58,7 @@ report_errors() if (!any_ssl_error) { DBUG_PRINT("info", ("No OpenSSL errors.")); } - DBUG_PRINT("info", ("BTW, errno=%d", scoket_errno)); + DBUG_PRINT("info", ("BTW, errno=%d", socket_errno)); DBUG_VOID_RETURN; } |