summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.bzrignore1
-rwxr-xr-xBuild-tools/Do-compile9
-rw-r--r--Docs/internals.texi21
-rw-r--r--Docs/manual.texi38
-rw-r--r--client/mysqldump.c2
-rw-r--r--client/mysqlmanagerc.c16
-rw-r--r--client/mysqltest.c26
-rw-r--r--innobase/btr/btr0btr.c2
-rw-r--r--innobase/btr/btr0cur.c130
-rw-r--r--innobase/btr/btr0pcur.c70
-rw-r--r--innobase/btr/btr0sea.c2
-rw-r--r--innobase/buf/buf0buf.c201
-rw-r--r--innobase/buf/buf0lru.c4
-rw-r--r--innobase/configure.in2
-rw-r--r--innobase/data/data0data.c50
-rw-r--r--innobase/data/data0type.c19
-rw-r--r--innobase/dict/dict0crea.c229
-rw-r--r--innobase/dict/dict0dict.c1020
-rw-r--r--innobase/dict/dict0load.c606
-rw-r--r--innobase/dict/dict0mem.c43
-rw-r--r--innobase/fil/fil0fil.c77
-rw-r--r--innobase/ibuf/ibuf0ibuf.c118
-rw-r--r--innobase/include/btr0cur.h23
-rw-r--r--innobase/include/btr0pcur.h64
-rw-r--r--innobase/include/btr0pcur.ic4
-rw-r--r--innobase/include/btr0sea.h6
-rw-r--r--innobase/include/buf0buf.h50
-rw-r--r--innobase/include/buf0buf.ic8
-rw-r--r--innobase/include/data0data.h26
-rw-r--r--innobase/include/data0data.ic70
-rw-r--r--innobase/include/data0type.h11
-rw-r--r--innobase/include/data0type.ic21
-rw-r--r--innobase/include/db0err.h13
-rw-r--r--innobase/include/dict0crea.h18
-rw-r--r--innobase/include/dict0dict.h77
-rw-r--r--innobase/include/dict0dict.ic28
-rw-r--r--innobase/include/dict0load.h32
-rw-r--r--innobase/include/dict0mem.h70
-rw-r--r--innobase/include/dict0types.h1
-rw-r--r--innobase/include/fil0fil.h3
-rw-r--r--innobase/include/ibuf0ibuf.h15
-rw-r--r--innobase/include/lock0lock.h32
-rw-r--r--innobase/include/log0log.h5
-rw-r--r--innobase/include/mtr0mtr.h12
-rw-r--r--innobase/include/mtr0mtr.ic16
-rw-r--r--innobase/include/rem0cmp.h26
-rw-r--r--innobase/include/row0ins.h22
-rw-r--r--innobase/include/row0mysql.h49
-rw-r--r--innobase/include/row0upd.h45
-rw-r--r--innobase/include/row0upd.ic17
-rw-r--r--innobase/include/srv0srv.h51
-rw-r--r--innobase/include/sync0arr.h24
-rw-r--r--innobase/include/sync0ipm.ic4
-rw-r--r--innobase/include/sync0rw.h98
-rw-r--r--innobase/include/sync0rw.ic93
-rw-r--r--innobase/include/sync0sync.h36
-rw-r--r--innobase/include/sync0sync.ic41
-rw-r--r--innobase/include/trx0rseg.ic12
-rw-r--r--innobase/include/trx0trx.h19
-rw-r--r--innobase/include/ut0mem.h3
-rw-r--r--innobase/lock/lock0lock.c116
-rw-r--r--innobase/log/log0log.c47
-rw-r--r--innobase/log/log0recv.c18
-rw-r--r--innobase/mem/mem0pool.c4
-rw-r--r--innobase/mtr/mtr0log.c27
-rw-r--r--innobase/os/os0file.c142
-rw-r--r--innobase/page/page0page.c4
-rw-r--r--innobase/pars/lexyy.c6
-rw-r--r--innobase/pars/pars0grm.c20
-rw-r--r--innobase/pars/pars0opt.c1
-rw-r--r--innobase/pars/pars0pars.c3
-rw-r--r--innobase/que/que0que.c2
-rw-r--r--innobase/rem/rem0cmp.c62
-rw-r--r--innobase/row/row0ins.c244
-rw-r--r--innobase/row/row0mysql.c419
-rw-r--r--innobase/row/row0purge.c4
-rw-r--r--innobase/row/row0sel.c94
-rw-r--r--innobase/row/row0umod.c4
-rw-r--r--innobase/row/row0upd.c291
-rw-r--r--innobase/row/row0vers.c16
-rw-r--r--innobase/srv/srv0srv.c395
-rw-r--r--innobase/srv/srv0start.c66
-rw-r--r--innobase/sync/sync0arr.c253
-rw-r--r--innobase/sync/sync0rw.c108
-rw-r--r--innobase/sync/sync0sync.c85
-rw-r--r--innobase/thr/thr0loc.c2
-rw-r--r--innobase/trx/trx0purge.c6
-rw-r--r--innobase/trx/trx0rec.c6
-rw-r--r--innobase/trx/trx0trx.c4
-rw-r--r--innobase/trx/trx0undo.c37
-rw-r--r--innobase/ut/ut0mem.c39
-rw-r--r--innobase/ut/ut0ut.c2
-rw-r--r--libmysql/manager.c38
-rw-r--r--libmysqld/Makefile.am3
-rw-r--r--mysql-test/mysql-test-run.sh140
-rw-r--r--mysql-test/r/innodb.result6
-rw-r--r--mysql-test/r/rpl_failsafe.result22
-rw-r--r--mysql-test/t/rpl000018.test1
-rw-r--r--mysql-test/t/rpl_failsafe.test19
-rw-r--r--mysys/mf_sort.c2
-rw-r--r--scripts/make_binary_distribution.sh7
-rw-r--r--sql-bench/test-insert.sh14
-rw-r--r--sql/Makefile.am2
-rw-r--r--sql/ha_innobase.cc236
-rw-r--r--sql/ha_innobase.h2
-rw-r--r--sql/handler.cc7
-rw-r--r--sql/handler.h2
-rw-r--r--sql/mysqld.cc23
-rw-r--r--sql/repl_failsafe.cc43
-rw-r--r--sql/repl_failsafe.h16
-rw-r--r--sql/slave.cc11
-rw-r--r--sql/sql_db.cc3
-rw-r--r--sql/sql_parse.cc22
-rw-r--r--sql/sql_show.cc4
-rw-r--r--sql/sql_table.cc1
-rw-r--r--sql/structs.h1
-rw-r--r--sql/table.cc2
-rw-r--r--support-files/mysql.spec.sh3
-rw-r--r--tools/mysqlmanager.c44
119 files changed, 5672 insertions, 1360 deletions
diff --git a/.bzrignore b/.bzrignore
index 0e2ef25bf2e..6180f251d7c 100644
--- a/.bzrignore
+++ b/.bzrignore
@@ -255,6 +255,7 @@ libmysqld/opt_sum.cc
libmysqld/password.c
libmysqld/procedure.cc
libmysqld/records.cc
+libmysqld/repl_failsafe.cc
libmysqld/simple-test
libmysqld/slave.cc
libmysqld/sql_acl.cc
diff --git a/Build-tools/Do-compile b/Build-tools/Do-compile
index c03ca84c29d..76b94f314c8 100755
--- a/Build-tools/Do-compile
+++ b/Build-tools/Do-compile
@@ -153,7 +153,7 @@ if ($opt_stage <= 1)
{
$opt_config_options.= " --with-innodb"
}
- check_system("$opt_config_env ./configure --prefix=/usr/local/mysql \"--with-comment=Official MySQL$version_suffix binary\" --with-extra-charsets=complex \"--with-server-suffix=$version_suffix\" $opt_config_options","Thank you for choosing MySQL");
+ check_system("$opt_config_env ./configure --prefix=/usr/local/mysql \"--with-comment=Official MySQL$version_suffix binary\" --with-extra-charsets=complex \"--with-server-suffix=$version_suffix\" --enable-thread-safe-client $opt_config_options","Thank you for choosing MySQL");
if (-d "$pwd/$host/include-mysql")
{
safe_system("cp -r $pwd/$host/include-mysql/* $pwd/$host/$ver/include");
@@ -215,7 +215,7 @@ if ($opt_stage <= 5 && !$opt_no_test && !$opt_no_mysqltest)
{
system("mkdir $bench_tmpdir") if (! -d $bench_tmpdir);
safe_cd("${test_dir}/mysql-test");
- check_system("./mysql-test-run --tmpdir=$bench_tmpdir --master_port=$mysql_tcp_port --slave_port=$slave_port --manager-port=$manager_port --sleep=10", "tests were successful");
+ check_system("./mysql-test-run --tmpdir=$bench_tmpdir --master_port=$mysql_tcp_port --slave_port=$slave_port --manager-port=$manager_port --no-manager --sleep=10", "tests were successful");
}
# Start the server if we are going to run any of the benchmarks
@@ -317,7 +317,7 @@ exit 0;
sub usage
{
print <<EOF;
-$0 version 1.3
+$0 version 1.4
$0 takes the following options:
@@ -549,7 +549,8 @@ sub kill_all
chop($cand);
($pid_user, $pid) = split(' ', $cand);
next if $pid == $$;
- next process if (! ($cand =~ $pattern) || $pid_user ne $user)
+ next process if (! ($cand =~ $pattern) || $pid_user ne $user);
+ print LOG "Killing $_\n";
&killpid($pid);
}
}
diff --git a/Docs/internals.texi b/Docs/internals.texi
index 2195b42d9a0..f255a7ab6a6 100644
--- a/Docs/internals.texi
+++ b/Docs/internals.texi
@@ -481,6 +481,27 @@ Functions i mysys: (For flags se my_sys.h)
void end_key_cache _A((void));
- End key-cacheing.
+@node DBUG,,,
+@chapter The DBUG tags to use:
+
+Here is some of the tags we now use:
+(We should probably add a couple of new ones)
+
+"enter" Arguments to the function.
+"exit" Results from the function.
+"info" is something that may be interesting.
+"warning" is when something doesn't go the usual route or may be wrong.
+"error" when something went wrong.
+"loop" write in a loop, that is probably only useful when debugging
+ the loop. These should normally be deleted when on is
+ satisfied with the code and it has been in real use for a while.
+
+Some specific to mysqld, because we want to watch these carefully:
+
+"trans" Starting/stopping transactions.
+"quit" 'info' when mysqld is preparing to die.
+"query" Print query
+
@node protocol,,,
@chapter MySQL client/server protocol
diff --git a/Docs/manual.texi b/Docs/manual.texi
index 28219eabcaf..fb7fe288082 100644
--- a/Docs/manual.texi
+++ b/Docs/manual.texi
@@ -2433,7 +2433,7 @@ mysql> SHOW STATUS;
If a bug or problem occurs while running @strong{mysqld}, try to provide an
input script that will reproduce the anomaly. This script should include any
necessary source files. The more closely the script can reproduce your
-situation, the better. If you can make a repeatable test case, you should
+situation, the better. If you can make a reproduceable test case, you should
post this to @email{bugs@@lists.mysql.com} for a high priority treatment!
If you can't provide a script, you should at least include the output
@@ -3532,12 +3532,18 @@ an application when you delete records from a table that has a foreign key.
In practice this is as quick (in some cases quicker) and much more portable
than using foreign keys.
+In MySQL 4.0 you can use multi-table delete to delete rows from many
+tables with one command. @xref{DELETE}.
+
In the near future we will extend the @code{FOREIGN KEY} implementation so
that at least the information will be saved in the table specification file
and may be retrieved by @code{mysqldump} and ODBC. At a later stage we will
implement the foreign key constraints for application that can't easily be
coded to avoid them.
+MySQL 3.23.44 and forwards, InnoDB tables supports checking of foreign
+key constraints. @xref{InnoDB}.
+
@menu
* Broken Foreign KEY:: Reasons NOT to use foreign keys constraints
@end menu
@@ -4033,8 +4039,13 @@ If the date is totally wrong, MySQL will store the special
0000-00-00 date value in the column.
@item
-If you set an @code{enum} to an unsupported value, it will be set to
+If you set an @code{ENUM} column to an unsupported value, it will be set to
the error value 'empty string', with numeric value 0.
+
+@item
+If you set an @code{SET} column to an unsupported value, the value will
+be ignored. @xref{Bugs}.
+
@end itemize
@item
@@ -4775,7 +4786,7 @@ Included in the MySQL distribution are two different testing suites,
@file{mysql-test-run} and
@uref{http://www.mysql.com/information/crash-me.php,crash-me}, as well
as a benchmark suite. The test system is actively updated with code to
-test each new feature and almost all repeatable bugs that have come to
+test each new feature and almost all reproduceable bugs that have come to
our attention. We test MySQL with these on a lot of platforms before
every release. These tests are more sophisticated than anything we have
seen from PostgreSQL, and they ensures that the MySQL is kept to a high
@@ -4935,6 +4946,18 @@ Standard usage in PostgreSQL is closer to ANSI SQL in some cases.
One can speed up PostgreSQL by coding things as stored procedures.
@item
+For geographical data, R-TREES makes PostgreSQL better than MySQL.
+
+@item
+The PostgreSQL optimizer can do some optimization that the current MySQL
+optimizer can't do. Most notable is doing joins when you don't have the
+proper keys in place and doing a join where you are using different keys
+combined with OR. The MySQL benchmark suite at
+@uref{http://www.mysql.com/information/benchmarks.html} shows you what
+kind of constructs you should watch out for when using different
+databases.
+
+@item
PostgreSQL has a bigger team of developers that contribute to the server.
@end itemize
@@ -29146,6 +29169,9 @@ specified at table creation time. For example, if a column is specified as
@code{SET("a","b","c","d")}, then @code{"a,d"}, @code{"d,a"}, and
@code{"d,a,a,d,d"} will all appear as @code{"a,d"} when retrieved.
+If you set a @code{SET} column to an unsupported value, the value will
+be ignored.
+
@code{SET} values are sorted numerically. @code{NULL} values sort before
non-@code{NULL} @code{SET} values.
@@ -33790,8 +33816,10 @@ column in a table, the default value is the current date and time.
@xref{Date and time types}.
@item
-For string types other than @code{ENUM}, the default value is the empty string.
-For @code{ENUM}, the default is the first enumeration value.
+For string types other than @code{ENUM}, the default value is the empty
+string. For @code{ENUM}, the default is the first enumeration value (if
+you haven't explicitely specified another default value with the
+@code{DEFAULT} directive).
@end itemize
Default values must be constants. This means, for example, that you cannot
diff --git a/client/mysqldump.c b/client/mysqldump.c
index 894286f8896..64e7ae0fd82 100644
--- a/client/mysqldump.c
+++ b/client/mysqldump.c
@@ -1150,9 +1150,9 @@ static void dumpTable(uint numFields, char *table)
}
if (opt_lock)
fputs("UNLOCK TABLES;\n", md_result_file);
- mysql_free_result(res);
if (opt_autocommit)
fprintf(md_result_file, "commit;\n");
+ mysql_free_result(res);
}
} /* dumpTable */
diff --git a/client/mysqlmanagerc.c b/client/mysqlmanagerc.c
index 66b77e237e2..4e34c85d55d 100644
--- a/client/mysqlmanagerc.c
+++ b/client/mysqlmanagerc.c
@@ -17,27 +17,15 @@
#define MANAGER_CLIENT_VERSION "1.0"
#include <my_global.h>
-#include <my_sys.h>
-#include <m_string.h>
#include <mysql.h>
#include <mysql_version.h>
-#include <m_ctype.h>
-#ifdef OS2
-#include <config-os2.h>
-#else
-#include <my_config.h>
-#endif
-#include <my_dir.h>
-#include <hash.h>
#include <mysqld_error.h>
-#include <stdio.h>
-#include <stdlib.h>
+#include <my_sys.h>
+#include <m_string.h>
#include <getopt.h>
#include <stdarg.h>
#include <sys/stat.h>
#include <unistd.h>
-#include <errno.h>
-#include <violite.h>
#ifndef MYSQL_MANAGER_PORT
#define MYSQL_MANAGER_PORT 23546
diff --git a/client/mysqltest.c b/client/mysqltest.c
index 1374a276231..1bf3ab75aed 100644
--- a/client/mysqltest.c
+++ b/client/mysqltest.c
@@ -92,7 +92,7 @@ static char *db = 0, *pass=0;
const char* user = 0, *host = 0, *unix_sock = 0, *opt_basedir="./";
static int port = 0, opt_big_test=0, opt_compress=0;
static uint start_lineno, *lineno;
-const char* manager_user="root",*manager_host="localhost";
+const char* manager_user="root",*manager_host=0;
char *manager_pass=0;
int manager_port=MYSQL_MANAGER_PORT;
int manager_wait_timeout=3;
@@ -181,7 +181,7 @@ Q_PING, Q_EVAL,
Q_RPL_PROBE, Q_ENABLE_RPL_PARSE,
Q_DISABLE_RPL_PARSE, Q_EVAL_RESULT,
Q_ENABLE_QUERY_LOG, Q_DISABLE_QUERY_LOG,
-Q_SERVER_START, Q_SERVER_STOP,
+Q_SERVER_START, Q_SERVER_STOP,Q_REQUIRE_MANAGER,
Q_UNKNOWN, /* Unknown command. */
Q_COMMENT, /* Comments, ignored. */
Q_COMMENT_WITH_COMMAND
@@ -215,6 +215,7 @@ const char *command_names[] = {
"disable_rpl_parse", "eval_result",
"enable_query_log", "disable_query_log",
"server_start", "server_stop",
+ "require_manager",
0
};
@@ -640,6 +641,13 @@ int open_file(const char* name)
return 0;
}
+int do_require_manager(struct st_query* __attribute__((unused)) q)
+{
+ if (!manager)
+ abort_not_supported_test();
+ return 0;
+}
+
#ifndef EMBEDDED_LIBRARY
int do_server_start(struct st_query* q)
{
@@ -655,6 +663,10 @@ int do_server_op(struct st_query* q,const char* op)
{
char* p=q->first_argument;
char com_buf[256],*com_p;
+ if (!manager)
+ {
+ die("Manager is not initialized, manager commands are not possible");
+ }
com_p=strmov(com_buf,op);
com_p=strmov(com_p,"_exec ");
if (!*p)
@@ -1926,7 +1938,9 @@ int run_query(MYSQL* mysql, struct st_query* q, int flags)
ds= &ds_res;
if ((flags & QUERY_SEND) && mysql_send_query(mysql, query, query_len))
- die("At line %u: unable to send query '%s'", start_lineno, query);
+ die("At line %u: unable to send query '%s'(mysql_errno=%d,errno=%d)",
+ start_lineno, query,
+ mysql_errno(mysql), errno);
if ((flags & QUERY_SEND) && !disable_query_log)
{
dynstr_append_mem(ds,query,query_len);
@@ -2195,8 +2209,9 @@ int main(int argc, char** argv)
if (cur_file == file_stack)
*++cur_file = stdin;
*lineno=1;
-#ifndef EMBEDDED_LIBRARY
- init_manager();
+#ifndef EMBEDDED_LIBRARY
+ if (manager_host)
+ init_manager();
#endif
if (!( mysql_init(&cur_con->mysql)))
die("Failed in mysql_init()");
@@ -2231,6 +2246,7 @@ int main(int argc, char** argv)
case Q_DISABLE_QUERY_LOG: disable_query_log=1; break;
case Q_SOURCE: do_source(q); break;
case Q_SLEEP: do_sleep(q); break;
+ case Q_REQUIRE_MANAGER: do_require_manager(q); break;
#ifndef EMBEDDED_LIBRARY
case Q_SERVER_START: do_server_start(q); break;
case Q_SERVER_STOP: do_server_stop(q); break;
diff --git a/innobase/btr/btr0btr.c b/innobase/btr/btr0btr.c
index 6da323867fb..e4e957ea7b6 100644
--- a/innobase/btr/btr0btr.c
+++ b/innobase/btr/btr0btr.c
@@ -2347,6 +2347,8 @@ btr_validate_level(
mtr_start(&mtr);
+ mtr_x_lock(dict_tree_get_lock(tree), &mtr);
+
page = btr_root_get(tree, &mtr);
space = buf_frame_get_space_id(page);
diff --git a/innobase/btr/btr0cur.c b/innobase/btr/btr0cur.c
index 7783f618d6d..a64ed8b6fe1 100644
--- a/innobase/btr/btr0cur.c
+++ b/innobase/btr/btr0cur.c
@@ -256,7 +256,8 @@ btr_cur_search_to_nth_level(
#ifdef UNIV_SEARCH_PERF_STAT
info->n_searches++;
#endif
- if (latch_mode <= BTR_MODIFY_LEAF && info->last_hash_succ
+ if (btr_search_latch.writer != RW_LOCK_NOT_LOCKED
+ && latch_mode <= BTR_MODIFY_LEAF && info->last_hash_succ
&& !estimate
&& btr_search_guess_on_hash(index, info, tuple, mode,
latch_mode, cursor,
@@ -344,9 +345,7 @@ btr_cur_search_to_nth_level(
retry_page_get:
page = buf_page_get_gen(space, page_no, rw_latch, guess,
buf_mode,
-#ifdef UNIV_SYNC_DEBUG
IB__FILE__, __LINE__,
-#endif
mtr);
if (page == NULL) {
@@ -380,7 +379,7 @@ retry_page_get:
}
#endif
ut_ad(0 == ut_dulint_cmp(tree->id,
- btr_page_get_index_id(page)));
+ btr_page_get_index_id(page)));
if (height == ULINT_UNDEFINED) {
/* We are in the root node */
@@ -515,9 +514,7 @@ btr_cur_open_at_index_side(
for (;;) {
page = buf_page_get_gen(space, page_no, RW_NO_LATCH, NULL,
BUF_GET,
-#ifdef UNIV_SYNC_DEBUG
IB__FILE__, __LINE__,
-#endif
mtr);
ut_ad(0 == ut_dulint_cmp(tree->id,
btr_page_get_index_id(page)));
@@ -604,9 +601,7 @@ btr_cur_open_at_rnd_pos(
for (;;) {
page = buf_page_get_gen(space, page_no, RW_NO_LATCH, NULL,
BUF_GET,
-#ifdef UNIV_SYNC_DEBUG
IB__FILE__, __LINE__,
-#endif
mtr);
ut_ad(0 == ut_dulint_cmp(tree->id,
btr_page_get_index_id(page)));
@@ -1223,6 +1218,57 @@ btr_cur_parse_update_in_place(
}
/*****************************************************************
+Updates a secondary index record when the update causes no size
+changes in its fields. The only case when this function is currently
+called is that in a char field characters change to others which
+are identified in the collation order. */
+
+ulint
+btr_cur_update_sec_rec_in_place(
+/*============================*/
+ /* out: DB_SUCCESS or error number */
+ btr_cur_t* cursor, /* in: cursor on the record to update;
+ cursor stays valid and positioned on the
+ same record */
+ upd_t* update, /* in: update vector */
+ que_thr_t* thr, /* in: query thread */
+ mtr_t* mtr) /* in: mtr */
+{
+ dict_index_t* index = cursor->index;
+ dict_index_t* clust_index;
+ ulint err;
+ rec_t* rec;
+ dulint roll_ptr = ut_dulint_zero;
+ trx_t* trx = thr_get_trx(thr);
+
+ /* Only secondary index records are updated using this function */
+ ut_ad(0 == (index->type & DICT_CLUSTERED));
+
+ rec = btr_cur_get_rec(cursor);
+
+ err = lock_sec_rec_modify_check_and_lock(0, rec, index, thr);
+
+ if (err != DB_SUCCESS) {
+
+ return(err);
+ }
+
+ /* Remove possible hash index pointer to this record */
+ btr_search_update_hash_on_delete(cursor);
+
+ row_upd_rec_in_place(rec, update);
+
+ clust_index = dict_table_get_first_index(index->table);
+
+ /* Note that roll_ptr is really just a dummy value since
+ a secondary index record does not contain any sys columns */
+
+ btr_cur_update_in_place_log(BTR_KEEP_SYS_FLAG, rec, clust_index,
+ update, trx, roll_ptr, mtr);
+ return(DB_SUCCESS);
+}
+
+/*****************************************************************
Updates a record when the update causes no size changes in its fields. */
ulint
@@ -1248,7 +1294,7 @@ btr_cur_update_in_place(
ibool was_delete_marked;
/* Only clustered index records are updated using this function */
- ut_ad((cursor->index)->type & DICT_CLUSTERED);
+ ut_ad(cursor->index->type & DICT_CLUSTERED);
rec = btr_cur_get_rec(cursor);
index = cursor->index;
@@ -2477,27 +2523,33 @@ btr_estimate_n_rows_in_range(
}
/***********************************************************************
-Estimates the number of different key values in a given index. */
+Estimates the number of different key values in a given index, for
+each n-column prefix of the index where n <= dict_index_get_n_unique(index).
+The estimates are stored in the array index->stat_n_diff_key_vals. */
-ulint
+void
btr_estimate_number_of_different_key_vals(
/*======================================*/
- /* out: estimated number of key values */
dict_index_t* index) /* in: index */
{
btr_cur_t cursor;
page_t* page;
rec_t* rec;
- ulint total_n_recs = 0;
- ulint n_diff_in_page;
- ulint n_diff = 0;
+ ulint n_cols;
ulint matched_fields;
ulint matched_bytes;
+ ulint* n_diff;
+ ulint not_empty_flag = 0;
ulint i;
+ ulint j;
mtr_t mtr;
- if (index->type & DICT_UNIQUE) {
- return(index->table->stat_n_rows);
+ n_cols = dict_index_get_n_unique(index);
+
+ n_diff = mem_alloc((n_cols + 1) * sizeof(ib_longlong));
+
+ for (j = 0; j <= n_cols; j++) {
+ n_diff[j] = 0;
}
/* We sample some pages in the index to get an estimate */
@@ -2507,17 +2559,19 @@ btr_estimate_number_of_different_key_vals(
btr_cur_open_at_rnd_pos(index, BTR_SEARCH_LEAF, &cursor, &mtr);
- /* Count the number of different key values minus one on this
- index page: we subtract one because otherwise our algorithm
- would give a wrong estimate for an index where there is
- just one key value */
+ /* Count the number of different key values minus one
+ for each prefix of the key on this index page: we subtract
+ one because otherwise our algorithm would give a wrong
+ estimate for an index where there is just one key value */
page = btr_cur_get_page(&cursor);
rec = page_get_infimum_rec(page);
rec = page_rec_get_next(rec);
- n_diff_in_page = 0;
+ if (rec != page_get_supremum_rec(page)) {
+ not_empty_flag = 1;
+ }
while (rec != page_get_supremum_rec(page)
&& page_rec_get_next(rec)
@@ -2528,30 +2582,30 @@ btr_estimate_number_of_different_key_vals(
cmp_rec_rec_with_match(rec, page_rec_get_next(rec),
index, &matched_fields,
&matched_bytes);
- if (matched_fields <
- dict_index_get_n_ordering_defined_by_user(
- index)) {
- n_diff_in_page++;
- }
+ for (j = matched_fields + 1; j <= n_cols; j++) {
+ n_diff[j]++;
+ }
+
rec = page_rec_get_next(rec);
}
-
- n_diff += n_diff_in_page;
-
- total_n_recs += page_get_n_recs(page);
mtr_commit(&mtr);
}
- if (n_diff == 0) {
- /* We play safe and assume that there are just two different
- key values in the index */
-
- return(2);
+ /* If we saw k borders between different key values on
+ BTR_KEY_VAL_ESTIMATE_N_PAGES leaf pages, we can estimate how many
+ there will be in index->stat_n_leaf_pages */
+
+ for (j = 0; j <= n_cols; j++) {
+ index->stat_n_diff_key_vals[j] =
+ (n_diff[j] * index->stat_n_leaf_pages
+ + BTR_KEY_VAL_ESTIMATE_N_PAGES - 1
+ + not_empty_flag)
+ / BTR_KEY_VAL_ESTIMATE_N_PAGES;
}
-
- return(index->table->stat_n_rows / (total_n_recs / n_diff));
+
+ mem_free(n_diff);
}
/*================== EXTERNAL STORAGE OF BIG FIELDS ===================*/
diff --git a/innobase/btr/btr0pcur.c b/innobase/btr/btr0pcur.c
index 5e625553929..8ca3d41f7f9 100644
--- a/innobase/btr/btr0pcur.c
+++ b/innobase/btr/btr0pcur.c
@@ -62,8 +62,10 @@ btr_pcur_free_for_mysql(
/******************************************************************
The position of the cursor is stored by taking an initial segment of the
record the cursor is positioned on, before, or after, and copying it to the
-cursor data structure. NOTE that the page where the cursor is positioned
-must not be empty! */
+cursor data structure, or just setting a flag if the cursor id before the
+first in an EMPTY tree, or after the last in an EMPTY tree. NOTE that the
+page where the cursor is positioned must not be empty if the index tree is
+not totally empty! */
void
btr_pcur_store_position(
@@ -93,9 +95,21 @@ btr_pcur_store_position(
ut_a(cursor->latch_mode != BTR_NO_LATCHES);
if (page_get_n_recs(page) == 0) {
+ /* It must be an empty index tree */
- /* Cannot store position! */
- btr_pcur_close(cursor);
+ ut_a(btr_page_get_next(page, mtr) == FIL_NULL
+ && btr_page_get_prev(page, mtr) == FIL_NULL);
+
+ if (rec == page_get_supremum_rec(page)) {
+
+ cursor->rel_pos = BTR_PCUR_AFTER_LAST_IN_TREE;
+ cursor->old_stored = BTR_PCUR_OLD_STORED;
+
+ return;
+ }
+
+ cursor->rel_pos = BTR_PCUR_BEFORE_FIRST_IN_TREE;
+ cursor->old_stored = BTR_PCUR_OLD_STORED;
return;
}
@@ -140,13 +154,15 @@ btr_pcur_copy_stored_position(
ut_memcpy((byte*)pcur_receive, (byte*)pcur_donate, sizeof(btr_pcur_t));
- pcur_receive->old_rec_buf = mem_alloc(pcur_donate->buf_size);
+ if (pcur_donate->old_rec_buf) {
+
+ pcur_receive->old_rec_buf = mem_alloc(pcur_donate->buf_size);
- ut_memcpy(pcur_receive->old_rec_buf, pcur_donate->old_rec_buf,
+ ut_memcpy(pcur_receive->old_rec_buf, pcur_donate->old_rec_buf,
pcur_donate->buf_size);
- pcur_receive->old_rec = pcur_receive->old_rec_buf
+ pcur_receive->old_rec = pcur_receive->old_rec_buf
+ (pcur_donate->old_rec - pcur_donate->old_rec_buf);
-
+ }
}
/******************************************************************
@@ -158,7 +174,9 @@ to the last record LESS OR EQUAL to the stored record;
the last record LESS than the user record which was the successor of the page
infimum;
(3) cursor was positioned on the page supremum: restores to the first record
-GREATER than the user record which was the predecessor of the supremum. */
+GREATER than the user record which was the predecessor of the supremum.
+(4) cursor was positioned before the first or after the last in an empty tree:
+restores to before first or after the last in the tree. */
ibool
btr_pcur_restore_position(
@@ -177,17 +195,33 @@ btr_pcur_restore_position(
dtuple_t* tuple;
ulint mode;
ulint old_mode;
+ ibool from_left;
mem_heap_t* heap;
- ut_a((cursor->pos_state == BTR_PCUR_WAS_POSITIONED)
- || (cursor->pos_state == BTR_PCUR_IS_POSITIONED));
+ ut_a(cursor->pos_state == BTR_PCUR_WAS_POSITIONED
+ || cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_a(cursor->old_stored == BTR_PCUR_OLD_STORED);
+
+ if (cursor->rel_pos == BTR_PCUR_AFTER_LAST_IN_TREE
+ || cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE) {
+
+ if (cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE) {
+ from_left = TRUE;
+ } else {
+ from_left = FALSE;
+ }
+
+ btr_cur_open_at_index_side(from_left,
+ btr_pcur_get_btr_cur(cursor)->index, latch_mode,
+ btr_pcur_get_btr_cur(cursor), mtr);
+ return(FALSE);
+ }
+
ut_a(cursor->old_rec);
page = btr_cur_get_page(btr_pcur_get_btr_cur(cursor));
- if ((latch_mode == BTR_SEARCH_LEAF)
- || (latch_mode == BTR_MODIFY_LEAF)) {
+ if (latch_mode == BTR_SEARCH_LEAF || latch_mode == BTR_MODIFY_LEAF) {
/* Try optimistic restoration */
if (buf_page_optimistic_get(latch_mode, page,
@@ -242,16 +276,15 @@ btr_pcur_restore_position(
/* Restore the old search mode */
cursor->search_mode = old_mode;
- if ((cursor->rel_pos == BTR_PCUR_ON)
- && btr_pcur_is_on_user_rec(cursor, mtr)
- && (0 == cmp_dtuple_rec(tuple, btr_pcur_get_rec(cursor)))) {
+ if (cursor->rel_pos == BTR_PCUR_ON
+ && btr_pcur_is_on_user_rec(cursor, mtr)
+ && 0 == cmp_dtuple_rec(tuple, btr_pcur_get_rec(cursor))) {
/* We have to store the NEW value for the modify clock, since
the cursor can now be on a different page! */
cursor->modify_clock = buf_frame_get_modify_clock(
- buf_frame_align(
- btr_pcur_get_rec(cursor)));
+ buf_frame_align(btr_pcur_get_rec(cursor)));
mem_heap_free(heap);
return(TRUE);
@@ -366,6 +399,7 @@ btr_pcur_move_backward_from_page(
latch_mode2 = BTR_MODIFY_PREV;
} else {
+ latch_mode2 = 0; /* To eliminate compiler warning */
ut_error;
}
diff --git a/innobase/btr/btr0sea.c b/innobase/btr/btr0sea.c
index 616f8911aba..31ef8ce573b 100644
--- a/innobase/btr/btr0sea.c
+++ b/innobase/btr/btr0sea.c
@@ -680,9 +680,7 @@ btr_search_guess_on_hash(
success = buf_page_get_known_nowait(latch_mode, page,
BUF_MAKE_YOUNG,
-#ifdef UNIV_SYNC_DEBUG
IB__FILE__, __LINE__,
-#endif
mtr);
rw_lock_s_unlock(&btr_search_latch);
diff --git a/innobase/buf/buf0buf.c b/innobase/buf/buf0buf.c
index acf56ac4ddf..7d9cbf24948 100644
--- a/innobase/buf/buf0buf.c
+++ b/innobase/buf/buf0buf.c
@@ -34,6 +34,8 @@ Created 11/5/1995 Heikki Tuuri
#include "ibuf0ibuf.h"
#include "dict0dict.h"
#include "log0recv.h"
+#include "trx0undo.h"
+#include "srv0srv.h"
/*
IMPLEMENTATION OF THE BUFFER POOL
@@ -240,6 +242,11 @@ buf_page_is_corrupted(
checksum = buf_calc_page_checksum(read_buf);
+ /* Note that InnoDB initializes empty pages to zero, and
+ early versions of InnoDB did not store page checksum to
+ the 4 most significant bytes of the page lsn field at the
+ end of a page: */
+
if ((mach_read_from_4(read_buf + FIL_PAGE_LSN + 4)
!= mach_read_from_4(read_buf + UNIV_PAGE_SIZE
- FIL_PAGE_END_LSN + 4))
@@ -257,6 +264,71 @@ buf_page_is_corrupted(
}
/************************************************************************
+Prints a page to stderr. */
+
+void
+buf_page_print(
+/*===========*/
+ byte* read_buf) /* in: a database page */
+{
+ dict_index_t* index;
+ ulint checksum;
+ char* buf;
+
+ buf = mem_alloc(4 * UNIV_PAGE_SIZE);
+
+ ut_sprintf_buf(buf, read_buf, UNIV_PAGE_SIZE);
+
+ fprintf(stderr,
+ "InnoDB: Page dump in ascii and hex (%lu bytes):\n%s",
+ UNIV_PAGE_SIZE, buf);
+ fprintf(stderr, "InnoDB: End of page dump\n");
+
+ mem_free(buf);
+
+ checksum = buf_calc_page_checksum(read_buf);
+
+ fprintf(stderr, "InnoDB: Page checksum %lu stored checksum %lu\n",
+ checksum, mach_read_from_4(read_buf
+ + UNIV_PAGE_SIZE
+ - FIL_PAGE_END_LSN));
+ fprintf(stderr,
+ "InnoDB: Page lsn %lu %lu, low 4 bytes of lsn at page end %lu\n",
+ mach_read_from_4(read_buf + FIL_PAGE_LSN),
+ mach_read_from_4(read_buf + FIL_PAGE_LSN + 4),
+ mach_read_from_4(read_buf + UNIV_PAGE_SIZE
+ - FIL_PAGE_END_LSN + 4));
+ if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE)
+ == TRX_UNDO_INSERT) {
+ fprintf(stderr,
+ "InnoDB: Page may be an insert undo log page\n");
+ } else if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR
+ + TRX_UNDO_PAGE_TYPE)
+ == TRX_UNDO_UPDATE) {
+ fprintf(stderr,
+ "InnoDB: Page may be an update undo log page\n");
+ }
+
+ if (fil_page_get_type(read_buf) == FIL_PAGE_INDEX) {
+ fprintf(stderr,
+ "InnoDB: Page may be an index page ");
+
+ fprintf(stderr,
+ "where index id is %lu %lu\n",
+ ut_dulint_get_high(btr_page_get_index_id(read_buf)),
+ ut_dulint_get_low(btr_page_get_index_id(read_buf)));
+
+ index = dict_index_find_on_id_low(
+ btr_page_get_index_id(read_buf));
+ if (index) {
+ fprintf(stderr, "InnoDB: and table %s index %s\n",
+ index->table_name,
+ index->name);
+ }
+ }
+}
+
+/************************************************************************
Initializes a buffer control block when the buf_pool is created. */
static
void
@@ -334,6 +406,8 @@ buf_pool_create(
frame = ut_align(buf_pool->frame_mem, UNIV_PAGE_SIZE);
buf_pool->frame_zero = frame;
+ buf_pool->high_end = frame + UNIV_PAGE_SIZE * curr_size;
+
/* Init block structs and assign frames for them */
for (i = 0; i < max_size; i++) {
@@ -345,6 +419,9 @@ buf_pool_create(
buf_pool->page_hash = hash_create(2 * max_size);
buf_pool->n_pend_reads = 0;
+
+ buf_pool->last_printout_time = time(NULL);
+
buf_pool->n_pages_read = 0;
buf_pool->n_pages_written = 0;
buf_pool->n_pages_created = 0;
@@ -352,6 +429,8 @@ buf_pool_create(
buf_pool->n_page_gets = 0;
buf_pool->n_page_gets_old = 0;
buf_pool->n_pages_read_old = 0;
+ buf_pool->n_pages_written_old = 0;
+ buf_pool->n_pages_created_old = 0;
/* 2. Initialize flushing fields
---------------------------- */
@@ -379,6 +458,10 @@ buf_pool_create(
for (i = 0; i < curr_size; i++) {
block = buf_pool_get_nth_block(buf_pool, i);
+
+ /* Wipe contents of page to eliminate a Purify warning */
+ memset(block->frame, '\0', UNIV_PAGE_SIZE);
+
UT_LIST_ADD_FIRST(free, buf_pool->free, block);
}
@@ -650,10 +733,8 @@ buf_page_get_gen(
buf_frame_t* guess, /* in: guessed frame or NULL */
ulint mode, /* in: BUF_GET, BUF_GET_IF_IN_POOL,
BUF_GET_NO_LATCH, BUF_GET_NOWAIT */
-#ifdef UNIV_SYNC_DEBUG
char* file, /* in: file name */
ulint line, /* in: line where called */
-#endif
mtr_t* mtr) /* in: mini-transaction */
{
buf_block_t* block;
@@ -759,19 +840,13 @@ loop:
if (mode == BUF_GET_NOWAIT) {
if (rw_latch == RW_S_LATCH) {
- success = rw_lock_s_lock_func_nowait(&(block->lock)
- #ifdef UNIV_SYNC_DEBUG
- ,file, line
- #endif
- );
+ success = rw_lock_s_lock_func_nowait(&(block->lock),
+ file, line);
fix_type = MTR_MEMO_PAGE_S_FIX;
} else {
ut_ad(rw_latch == RW_X_LATCH);
- success = rw_lock_x_lock_func_nowait(&(block->lock)
- #ifdef UNIV_SYNC_DEBUG
- ,file, line
- #endif
- );
+ success = rw_lock_x_lock_func_nowait(&(block->lock),
+ file, line);
fix_type = MTR_MEMO_PAGE_X_FIX;
}
@@ -796,18 +871,12 @@ loop:
fix_type = MTR_MEMO_BUF_FIX;
} else if (rw_latch == RW_S_LATCH) {
- rw_lock_s_lock_func(&(block->lock)
- #ifdef UNIV_SYNC_DEBUG
- ,0, file, line
- #endif
- );
+ rw_lock_s_lock_func(&(block->lock), 0, file, line);
+
fix_type = MTR_MEMO_PAGE_S_FIX;
} else {
- rw_lock_x_lock_func(&(block->lock), 0
- #ifdef UNIV_SYNC_DEBUG
- , file, line
- #endif
- );
+ rw_lock_x_lock_func(&(block->lock), 0, file, line);
+
fix_type = MTR_MEMO_PAGE_X_FIX;
}
@@ -838,10 +907,8 @@ buf_page_optimistic_get_func(
buf_frame_t* guess, /* in: guessed frame */
dulint modify_clock,/* in: modify clock value if mode is
..._GUESS_ON_CLOCK */
-#ifdef UNIV_SYNC_DEBUG
char* file, /* in: file name */
ulint line, /* in: line where called */
-#endif
mtr_t* mtr) /* in: mini-transaction */
{
buf_block_t* block;
@@ -883,18 +950,12 @@ buf_page_optimistic_get_func(
ut_ad(!ibuf_inside() || ibuf_page(block->space, block->offset));
if (rw_latch == RW_S_LATCH) {
- success = rw_lock_s_lock_func_nowait(&(block->lock)
- #ifdef UNIV_SYNC_DEBUG
- , file, line
- #endif
- );
+ success = rw_lock_s_lock_func_nowait(&(block->lock),
+ file, line);
fix_type = MTR_MEMO_PAGE_S_FIX;
} else {
- success = rw_lock_x_lock_func_nowait(&(block->lock)
- #ifdef UNIV_SYNC_DEBUG
- , file, line
- #endif
- );
+ success = rw_lock_x_lock_func_nowait(&(block->lock),
+ file, line);
fix_type = MTR_MEMO_PAGE_X_FIX;
}
@@ -971,10 +1032,8 @@ buf_page_get_known_nowait(
ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */
buf_frame_t* guess, /* in: the known page frame */
ulint mode, /* in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */
-#ifdef UNIV_SYNC_DEBUG
char* file, /* in: file name */
ulint line, /* in: line where called */
-#endif
mtr_t* mtr) /* in: mini-transaction */
{
buf_block_t* block;
@@ -1017,18 +1076,12 @@ buf_page_get_known_nowait(
ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
if (rw_latch == RW_S_LATCH) {
- success = rw_lock_s_lock_func_nowait(&(block->lock)
- #ifdef UNIV_SYNC_DEBUG
- , file, line
- #endif
- );
+ success = rw_lock_s_lock_func_nowait(&(block->lock),
+ file, line);
fix_type = MTR_MEMO_PAGE_S_FIX;
} else {
- success = rw_lock_x_lock_func_nowait(&(block->lock)
- #ifdef UNIV_SYNC_DEBUG
- , file, line
- #endif
- );
+ success = rw_lock_x_lock_func_nowait(&(block->lock),
+ file, line);
fix_type = MTR_MEMO_PAGE_X_FIX;
}
@@ -1318,9 +1371,26 @@ buf_page_io_complete(
fprintf(stderr,
"InnoDB: Database page corruption or a failed\n"
"InnoDB: file read of page %lu.\n", block->offset);
+
fprintf(stderr,
"InnoDB: You may have to recover from a backup.\n");
- exit(1);
+
+ buf_page_print(block->frame);
+
+ fprintf(stderr,
+ "InnoDB: Database page corruption or a failed\n"
+ "InnoDB: file read of page %lu.\n", block->offset);
+ fprintf(stderr,
+ "InnoDB: You may have to recover from a backup.\n");
+ fprintf(stderr,
+ "InnoDB: It is also possible that your operating\n"
+ "InnoDB: system has corrupted its own file cache\n"
+ "InnoDB: and rebooting your computer removes the\n"
+ "InnoDB: error.\n");
+
+ if (srv_force_recovery < SRV_FORCE_IGNORE_CORRUPT) {
+ exit(1);
+ }
}
if (recv_recovery_is_on()) {
@@ -1623,12 +1693,27 @@ buf_print(void)
}
/*************************************************************************
+Returns the number of pending buf pool ios. */
+
+ulint
+buf_get_n_pending_ios(void)
+/*=======================*/
+{
+ return(buf_pool->n_pend_reads
+ + buf_pool->n_flush[BUF_FLUSH_LRU]
+ + buf_pool->n_flush[BUF_FLUSH_LIST]
+ + buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]);
+}
+
+/*************************************************************************
Prints info of the buffer i/o. */
void
buf_print_io(void)
/*==============*/
{
+ time_t current_time;
+ double time_elapsed;
ulint size;
ut_ad(buf_pool);
@@ -1637,11 +1722,11 @@ buf_print_io(void)
mutex_enter(&(buf_pool->mutex));
- printf("LRU list length %lu \n", UT_LIST_GET_LEN(buf_pool->LRU));
- printf("Free list length %lu \n", UT_LIST_GET_LEN(buf_pool->free));
+ printf("Free list length %lu \n", UT_LIST_GET_LEN(buf_pool->free));
+ printf("LRU list length %lu \n", UT_LIST_GET_LEN(buf_pool->LRU));
printf("Flush list length %lu \n",
UT_LIST_GET_LEN(buf_pool->flush_list));
- printf("Buffer pool size in pages %lu\n", size);
+ printf("Buffer pool size %lu\n", size);
printf("Pending reads %lu \n", buf_pool->n_pend_reads);
@@ -1650,9 +1735,21 @@ buf_print_io(void)
buf_pool->n_flush[BUF_FLUSH_LIST],
buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]);
+ current_time = time(NULL);
+ time_elapsed = difftime(current_time, buf_pool->last_printout_time);
+
+ buf_pool->last_printout_time = current_time;
+
printf("Pages read %lu, created %lu, written %lu\n",
buf_pool->n_pages_read, buf_pool->n_pages_created,
buf_pool->n_pages_written);
+ printf("%.2f reads/s, %.2f creates/s, %.2f writes/s\n",
+ (buf_pool->n_pages_read - buf_pool->n_pages_read_old)
+ / time_elapsed,
+ (buf_pool->n_pages_created - buf_pool->n_pages_created_old)
+ / time_elapsed,
+ (buf_pool->n_pages_written - buf_pool->n_pages_written_old)
+ / time_elapsed);
if (buf_pool->n_page_gets > buf_pool->n_page_gets_old) {
printf("Buffer pool hit rate %lu / 1000\n",
@@ -1660,10 +1757,14 @@ buf_print_io(void)
- ((1000 *
(buf_pool->n_pages_read - buf_pool->n_pages_read_old))
/ (buf_pool->n_page_gets - buf_pool->n_page_gets_old)));
+ } else {
+ printf("No buffer pool activity since the last printout\n");
}
buf_pool->n_page_gets_old = buf_pool->n_page_gets;
buf_pool->n_pages_read_old = buf_pool->n_pages_read;
+ buf_pool->n_pages_created_old = buf_pool->n_pages_created;
+ buf_pool->n_pages_written_old = buf_pool->n_pages_written;
mutex_exit(&(buf_pool->mutex));
}
diff --git a/innobase/buf/buf0lru.c b/innobase/buf/buf0lru.c
index 142beaaaa15..eb63fa99f4a 100644
--- a/innobase/buf/buf0lru.c
+++ b/innobase/buf/buf0lru.c
@@ -551,6 +551,10 @@ buf_LRU_block_free_non_file_page(
block->state = BUF_BLOCK_NOT_USED;
+#ifdef UNIV_DEBUG
+ /* Wipe contents of page to reveal possible stale pointers to it */
+ memset(block->frame, '\0', UNIV_PAGE_SIZE);
+#endif
UT_LIST_ADD_FIRST(free, buf_pool->free, block);
}
diff --git a/innobase/configure.in b/innobase/configure.in
index 48bb9504219..b606ecfffc0 100644
--- a/innobase/configure.in
+++ b/innobase/configure.in
@@ -38,7 +38,7 @@ AC_CHECK_HEADERS(aio.h sched.h)
AC_CHECK_SIZEOF(int, 4)
AC_CHECK_FUNCS(sched_yield)
AC_CHECK_FUNCS(fdatasync)
-AC_CHECK_FUNCS(localtime_r)
+#AC_CHECK_FUNCS(localtime_r) # Already checked by MySQL
#AC_C_INLINE Already checked in MySQL
AC_C_BIGENDIAN
diff --git a/innobase/data/data0data.c b/innobase/data/data0data.c
index aecc56ec022..2254dcb6ae6 100644
--- a/innobase/data/data0data.c
+++ b/innobase/data/data0data.c
@@ -14,6 +14,7 @@ Created 5/30/1994 Heikki Tuuri
#include "ut0rnd.h"
#include "rem0rec.h"
+#include "rem0cmp.h"
#include "page0page.h"
#include "dict0dict.h"
#include "btr0cur.h"
@@ -63,6 +64,53 @@ dtuple_get_nth_field_noninline(
return(dtuple_get_nth_field(tuple, n));
}
+/****************************************************************
+Returns TRUE if lengths of two dtuples are equal and respective data fields
+in them are equal when compared with collation in char fields (not as binary
+strings). */
+
+ibool
+dtuple_datas_are_ordering_equal(
+/*============================*/
+ /* out: TRUE if length and fieds are equal
+ when compared with cmp_data_data:
+ NOTE: in character type fields some letters
+ are identified with others! (collation) */
+ dtuple_t* tuple1, /* in: tuple 1 */
+ dtuple_t* tuple2) /* in: tuple 2 */
+{
+ dfield_t* field1;
+ dfield_t* field2;
+ ulint n_fields;
+ ulint i;
+
+ ut_ad(tuple1 && tuple2);
+ ut_ad(tuple1->magic_n = DATA_TUPLE_MAGIC_N);
+ ut_ad(tuple2->magic_n = DATA_TUPLE_MAGIC_N);
+ ut_ad(dtuple_check_typed(tuple1));
+ ut_ad(dtuple_check_typed(tuple2));
+
+ n_fields = dtuple_get_n_fields(tuple1);
+
+ if (n_fields != dtuple_get_n_fields(tuple2)) {
+
+ return(FALSE);
+ }
+
+ for (i = 0; i < n_fields; i++) {
+
+ field1 = dtuple_get_nth_field(tuple1, i);
+ field2 = dtuple_get_nth_field(tuple2, i);
+
+ if (0 != cmp_dfield_dfield(field1, field2)) {
+
+ return(FALSE);
+ }
+ }
+
+ return(TRUE);
+}
+
/*************************************************************************
Creates a dtuple for use in MySQL. */
@@ -408,7 +456,7 @@ dtuple_convert_big_rec(
ulint size;
ulint n_fields;
ulint longest;
- ulint longest_i;
+ ulint longest_i = ULINT_MAX;
ibool is_externally_stored;
ulint i;
ulint j;
diff --git a/innobase/data/data0type.c b/innobase/data/data0type.c
index 82c00a83fb2..5d0ddf3e887 100644
--- a/innobase/data/data0type.c
+++ b/innobase/data/data0type.c
@@ -28,7 +28,6 @@ dtype_validate(
ut_a((type->mtype >= DATA_VARCHAR) && (type->mtype <= DATA_MYSQL));
if (type->mtype == DATA_SYS) {
- ut_a(type->prtype >= DATA_ROW_ID);
ut_a(type->prtype <= DATA_MIX_ID);
}
@@ -45,11 +44,10 @@ dtype_print(
{
ulint mtype;
ulint prtype;
-
+ ulint len;
+
ut_a(type);
- printf("DATA TYPE: ");
-
mtype = type->mtype;
prtype = type->prtype;
if (mtype == DATA_VARCHAR) {
@@ -65,8 +63,10 @@ dtype_print(
} else if (mtype == DATA_SYS) {
printf("DATA_SYS");
} else {
- printf("unknown type %lu", mtype);
+ printf("type %lu", mtype);
}
+
+ len = type->len;
if ((type->mtype == DATA_SYS)
|| (type->mtype == DATA_VARCHAR)
@@ -74,8 +74,13 @@ dtype_print(
printf(" ");
if (prtype == DATA_ROW_ID) {
printf("DATA_ROW_ID");
+ len = DATA_ROW_ID_LEN;
} else if (prtype == DATA_ROLL_PTR) {
printf("DATA_ROLL_PTR");
+ len = DATA_ROLL_PTR_LEN;
+ } else if (prtype == DATA_TRX_ID) {
+ printf("DATA_TRX_ID");
+ len = DATA_TRX_ID_LEN;
} else if (prtype == DATA_MIX_ID) {
printf("DATA_MIX_ID");
} else if (prtype == DATA_ENGLISH) {
@@ -83,9 +88,9 @@ dtype_print(
} else if (prtype == DATA_FINNISH) {
printf("DATA_FINNISH");
} else {
- printf("unknown prtype %lu", mtype);
+ printf("prtype %lu", mtype);
}
}
- printf("; len %lu prec %lu\n", type->len, type->prec);
+ printf(" len %lu prec %lu", len, type->prec);
}
diff --git a/innobase/dict/dict0crea.c b/innobase/dict/dict0crea.c
index 478364fba8a..9d79983c9e5 100644
--- a/innobase/dict/dict0crea.c
+++ b/innobase/dict/dict0crea.c
@@ -17,9 +17,13 @@ Created 1/8/1996 Heikki Tuuri
#include "page0page.h"
#include "mach0data.h"
#include "dict0boot.h"
+#include "dict0dict.h"
#include "que0que.h"
#include "row0ins.h"
+#include "row0mysql.h"
#include "pars0pars.h"
+#include "trx0roll.h"
+#include "usr0sess.h"
/*********************************************************************
Based on a table object, this function builds the entry to be inserted
@@ -1019,3 +1023,228 @@ function_exit:
return(thr);
}
+
+/********************************************************************
+Creates the foreign key constraints system tables inside InnoDB
+at database creation or database start if they are not found or are
+not of the right form. */
+
+ulint
+dict_create_or_check_foreign_constraint_tables(void)
+/*================================================*/
+ /* out: DB_SUCCESS or error code */
+{
+ dict_table_t* table1;
+ dict_table_t* table2;
+ que_thr_t* thr;
+ que_t* graph;
+ ulint error;
+ trx_t* trx;
+ char* str;
+
+ mutex_enter(&(dict_sys->mutex));
+
+ table1 = dict_table_get_low("SYS_FOREIGN");
+ table2 = dict_table_get_low("SYS_FOREIGN_COLS");
+
+ if (table1 && table2
+ && UT_LIST_GET_LEN(table1->indexes) == 3
+ && UT_LIST_GET_LEN(table2->indexes) == 1) {
+
+ /* Foreign constraint system tables have already been
+ created, and they are ok */
+
+ mutex_exit(&(dict_sys->mutex));
+
+ return(DB_SUCCESS);
+ }
+
+ trx = trx_allocate_for_mysql();
+
+ trx->op_info = "creating foreign key sys tables";
+
+ if (table1) {
+ fprintf(stderr,
+ "InnoDB: dropping incompletely created SYS_FOREIGN table\n");
+ row_drop_table_for_mysql("SYS_FOREIGN", trx, TRUE);
+ }
+
+ if (table2) {
+ fprintf(stderr,
+ "InnoDB: dropping incompletely created SYS_FOREIGN_COLS table\n");
+ row_drop_table_for_mysql("SYS_FOREIGN_COLS", trx, TRUE);
+ }
+
+ fprintf(stderr,
+ "InnoDB: creating foreign key constraint system tables\n");
+
+ /* NOTE: in dict_load_foreigns we use the fact that
+ there are 2 secondary indexes on SYS_FOREIGN, and they
+ are defined just like below */
+
+ str =
+ "PROCEDURE CREATE_FOREIGN_SYS_TABLES_PROC () IS\n"
+ "BEGIN\n"
+ "CREATE TABLE\n"
+ "SYS_FOREIGN(ID CHAR, FOR_NAME CHAR, REF_NAME CHAR, N_COLS INT);\n"
+ "CREATE UNIQUE CLUSTERED INDEX ID_IND ON SYS_FOREIGN (ID);\n"
+ "CREATE INDEX FOR_IND ON SYS_FOREIGN (FOR_NAME);\n"
+ "CREATE INDEX REF_IND ON SYS_FOREIGN (REF_NAME);\n"
+ "CREATE TABLE\n"
+ "SYS_FOREIGN_COLS(ID CHAR, POS INT, FOR_COL_NAME CHAR, REF_COL_NAME CHAR);\n"
+ "CREATE UNIQUE CLUSTERED INDEX ID_IND ON SYS_FOREIGN_COLS (ID, POS);\n"
+ "COMMIT WORK;\n"
+ "END;\n";
+
+ graph = pars_sql(str);
+
+ ut_a(graph);
+
+ graph->trx = trx;
+ trx->graph = NULL;
+
+ graph->fork_type = QUE_FORK_MYSQL_INTERFACE;
+
+ ut_a(thr = que_fork_start_command(graph, SESS_COMM_EXECUTE, 0));
+
+ que_run_threads(thr);
+
+ error = trx->error_state;
+
+ if (error != DB_SUCCESS) {
+ ut_a(error == DB_OUT_OF_FILE_SPACE);
+
+ fprintf(stderr, "InnoDB: creation failed\n");
+ fprintf(stderr, "InnoDB: tablespace is full\n");
+ fprintf(stderr,
+ "InnoDB: dropping incompletely created SYS_FOREIGN tables\n");
+
+ row_drop_table_for_mysql("SYS_FOREIGN", trx, TRUE);
+ row_drop_table_for_mysql("SYS_FOREIGN_COLS", trx, TRUE);
+
+ error = DB_MUST_GET_MORE_FILE_SPACE;
+ }
+
+ que_graph_free(graph);
+
+ trx->op_info = "";
+
+ trx_free_for_mysql(trx);
+
+ if (error == DB_SUCCESS) {
+ fprintf(stderr,
+ "InnoDB: foreign key constraint system tables created\n");
+ }
+
+ mutex_exit(&(dict_sys->mutex));
+
+ return(error);
+}
+
+/************************************************************************
+Adds foreign key definitions to data dictionary tables in the database. */
+
+ulint
+dict_create_add_foreigns_to_dictionary(
+/*===================================*/
+ /* out: error code or DB_SUCCESS */
+ dict_table_t* table, /* in: table */
+ trx_t* trx) /* in: transaction */
+{
+ dict_foreign_t* foreign;
+ que_thr_t* thr;
+ que_t* graph;
+ dulint id;
+ ulint len;
+ ulint error;
+ ulint i;
+ char buf2[50];
+ char buf[10000];
+
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
+ if (NULL == dict_table_get_low("SYS_FOREIGN")) {
+ fprintf(stderr,
+ "InnoDB: table SYS_FOREIGN not found from internal data dictionary\n");
+ return(DB_ERROR);
+ }
+
+ foreign = UT_LIST_GET_FIRST(table->foreign_list);
+loop:
+ if (foreign == NULL) {
+
+ return(DB_SUCCESS);
+ }
+
+ /* Build an InnoDB stored procedure which will insert the necessary
+ rows to SYS_FOREIGN and SYS_FOREIGN_COLS */
+
+ len = 0;
+
+ len += sprintf(buf,
+ "PROCEDURE ADD_FOREIGN_DEFS_PROC () IS\n"
+ "BEGIN\n");
+
+ /* We allocate the new id from the sequence of table id's */
+ id = dict_hdr_get_new_id(DICT_HDR_TABLE_ID);
+
+ sprintf(buf2, "%lu_%lu", ut_dulint_get_high(id),
+ ut_dulint_get_low(id));
+ foreign->id = mem_heap_alloc(foreign->heap, ut_strlen(buf2) + 1);
+ ut_memcpy(foreign->id, buf2, ut_strlen(buf2) + 1);
+
+ len += sprintf(buf + len,
+ "INSERT INTO SYS_FOREIGN VALUES('%lu_%lu', '%s', '%s', %lu);\n",
+ ut_dulint_get_high(id),
+ ut_dulint_get_low(id),
+ table->name,
+ foreign->referenced_table_name,
+ foreign->n_fields);
+
+ for (i = 0; i < foreign->n_fields; i++) {
+
+ len += sprintf(buf + len,
+ "INSERT INTO SYS_FOREIGN_COLS VALUES('%lu_%lu', %lu, '%s', '%s');\n",
+ ut_dulint_get_high(id),
+ ut_dulint_get_low(id),
+ i,
+ foreign->foreign_col_names[i],
+ foreign->referenced_col_names[i]);
+ }
+
+ len += sprintf(buf + len,"COMMIT WORK;\nEND;\n");
+
+ graph = pars_sql(buf);
+
+ ut_a(graph);
+
+ graph->trx = trx;
+ trx->graph = NULL;
+
+ graph->fork_type = QUE_FORK_MYSQL_INTERFACE;
+
+ ut_a(thr = que_fork_start_command(graph, SESS_COMM_EXECUTE, 0));
+
+ que_run_threads(thr);
+
+ error = trx->error_state;
+
+ que_graph_free(graph);
+
+ if (error != DB_SUCCESS) {
+ ut_a(error == DB_OUT_OF_FILE_SPACE);
+
+ fprintf(stderr, "InnoDB: foreign constraint creation failed\n");
+ fprintf(stderr, "InnoDB: tablespace is full\n");
+
+ trx_general_rollback_for_mysql(trx, FALSE, NULL);
+
+ error = DB_MUST_GET_MORE_FILE_SPACE;
+
+ return(error);
+ }
+
+ foreign = UT_LIST_GET_NEXT(foreign_list, foreign);
+
+ goto loop;
+}
diff --git a/innobase/dict/dict0dict.c b/innobase/dict/dict0dict.c
index 10d93fc6ecf..e0a7fd327a5 100644
--- a/innobase/dict/dict0dict.c
+++ b/innobase/dict/dict0dict.c
@@ -17,6 +17,7 @@ Created 1/8/1996 Heikki Tuuri
#include "mach0data.h"
#include "dict0boot.h"
#include "dict0mem.h"
+#include "dict0crea.h"
#include "trx0undo.h"
#include "btr0btr.h"
#include "btr0cur.h"
@@ -24,10 +25,12 @@ Created 1/8/1996 Heikki Tuuri
#include "pars0pars.h"
#include "pars0sym.h"
#include "que0que.h"
-
+#include "rem0cmp.h"
dict_sys_t* dict_sys = NULL; /* the dictionary system */
+rw_lock_t dict_foreign_key_check_lock;
+
#define DICT_HEAP_SIZE 100 /* initial memory heap size when
creating a table or index object */
#define DICT_POOL_PER_PROCEDURE_HASH 512 /* buffer pool max size per stored
@@ -137,12 +140,12 @@ dict_tree_find_index_low(
dict_tree_t* tree, /* in: index tree */
rec_t* rec); /* in: record for which to find correct index */
/**************************************************************************
-Prints a table data. */
+Removes a foreign constraint struct from the dictionet cache. */
static
void
-dict_table_print_low(
-/*=================*/
- dict_table_t* table); /* in: table */
+dict_foreign_remove_from_cache(
+/*===========================*/
+ dict_foreign_t* foreign); /* in, own: foreign constraint */
/**************************************************************************
Prints a column data. */
static
@@ -164,6 +167,13 @@ void
dict_field_print_low(
/*=================*/
dict_field_t* field); /* in: field */
+/*************************************************************************
+Frees a foreign key struct. */
+static
+void
+dict_foreign_free(
+/*==============*/
+ dict_foreign_t* foreign); /* in, own: foreign key struct */
/************************************************************************
Reserves the dictionary system mutex for MySQL. */
@@ -353,7 +363,8 @@ dict_table_get_on_id(
{
dict_table_t* table;
- if (ut_dulint_cmp(table_id, DICT_FIELDS_ID) <= 0) {
+ if (ut_dulint_cmp(table_id, DICT_FIELDS_ID) <= 0
+ || trx->dict_operation) {
/* It is a system table which will always exist in the table
cache: we avoid acquiring the dictionary mutex, because
if we are doing a rollback to handle an error in TABLE
@@ -415,6 +426,10 @@ dict_init(void)
dict_sys->size = 0;
UT_LIST_INIT(dict_sys->table_LRU);
+
+ rw_lock_create(&dict_foreign_key_check_lock);
+ rw_lock_set_level(&dict_foreign_key_check_lock,
+ SYNC_FOREIGN_KEY_CHECK);
}
/**************************************************************************
@@ -535,6 +550,41 @@ dict_table_add_to_cache(
}
/**************************************************************************
+Looks for an index with the given id. NOTE that we do not reserve
+the dictionary mutex: this function is for emergency purposes like
+printing info of a corrupt database page! */
+
+dict_index_t*
+dict_index_find_on_id_low(
+/*======================*/
+ /* out: index or NULL if not found from cache */
+ dulint id) /* in: index id */
+{
+ dict_table_t* table;
+ dict_index_t* index;
+
+ table = UT_LIST_GET_FIRST(dict_sys->table_LRU);
+
+ while (table) {
+ index = dict_table_get_first_index(table);
+
+ while (index) {
+ if (0 == ut_dulint_cmp(id, index->tree->id)) {
+ /* Found */
+
+ return(index);
+ }
+
+ index = dict_table_get_next_index(index);
+ }
+
+ table = UT_LIST_GET_NEXT(table_LRU, table);
+ }
+
+ return(NULL);
+}
+
+/**************************************************************************
Renames a table object. */
ibool
@@ -544,10 +594,12 @@ dict_table_rename_in_cache(
dict_table_t* table, /* in: table */
char* new_name) /* in: new name */
{
- ulint fold;
- ulint old_size;
- char* name_buf;
- ulint i;
+ dict_foreign_t* foreign;
+ dict_index_t* index;
+ ulint fold;
+ ulint old_size;
+ char* name_buf;
+ ulint i;
ut_ad(table);
ut_ad(mutex_own(&(dict_sys->mutex)));
@@ -589,6 +641,55 @@ dict_table_rename_in_cache(
dict_sys->size += (mem_heap_get_size(table->heap) - old_size);
+ /* Update the table_name field in indexes */
+ index = dict_table_get_first_index(table);
+
+ while (index != NULL) {
+ index->table_name = table->name;
+
+ index = dict_table_get_next_index(index);
+ }
+
+ /* Update the table name fields in foreign constraints */
+
+ foreign = UT_LIST_GET_FIRST(table->foreign_list);
+
+ while (foreign != NULL) {
+ if (ut_strlen(foreign->foreign_table_name) <
+ ut_strlen(table->name)) {
+ /* Allocate a longer name buffer;
+ TODO: store buf len to save memory */
+ foreign->foreign_table_name = mem_heap_alloc(
+ foreign->heap,
+ ut_strlen(table->name) + 1);
+ }
+
+ ut_memcpy(foreign->foreign_table_name, table->name,
+ ut_strlen(table->name) + 1);
+ foreign->foreign_table_name[ut_strlen(table->name)] = '\0';
+
+ foreign = UT_LIST_GET_NEXT(foreign_list, foreign);
+ }
+
+ foreign = UT_LIST_GET_FIRST(table->referenced_list);
+
+ while (foreign != NULL) {
+ if (ut_strlen(foreign->referenced_table_name) <
+ ut_strlen(table->name)) {
+ /* Allocate a longer name buffer;
+ TODO: store buf len to save memory */
+ foreign->referenced_table_name = mem_heap_alloc(
+ foreign->heap,
+ ut_strlen(table->name) + 1);
+ }
+
+ ut_memcpy(foreign->referenced_table_name, table->name,
+ ut_strlen(table->name) + 1);
+ foreign->referenced_table_name[ut_strlen(table->name)] = '\0';
+
+ foreign = UT_LIST_GET_NEXT(referenced_list, foreign);
+ }
+
return(TRUE);
}
@@ -600,6 +701,7 @@ dict_table_remove_from_cache(
/*=========================*/
dict_table_t* table) /* in, own: table */
{
+ dict_foreign_t* foreign;
dict_index_t* index;
ulint size;
ulint i;
@@ -610,6 +712,29 @@ dict_table_remove_from_cache(
/* printf("Removing table %s from dictionary cache\n", table->name); */
+ /* Remove the foreign constraints from the cache */
+ foreign = UT_LIST_GET_LAST(table->foreign_list);
+
+ while (foreign != NULL) {
+ ut_a(0 == ut_strcmp(foreign->foreign_table_name, table->name));
+
+ dict_foreign_remove_from_cache(foreign);
+ foreign = UT_LIST_GET_LAST(table->foreign_list);
+ }
+
+ /* Reset table field in referencing constraints */
+
+ foreign = UT_LIST_GET_FIRST(table->referenced_list);
+
+ while (foreign != NULL) {
+ ut_a(0 == ut_strcmp(foreign->referenced_table_name,
+ table->name));
+ foreign->referenced_table = NULL;
+ foreign->referenced_index = NULL;
+
+ foreign = UT_LIST_GET_NEXT(referenced_list, foreign);
+ }
+
/* Remove the indexes from the cache */
index = UT_LIST_GET_LAST(table->indexes);
@@ -856,6 +981,21 @@ dict_index_add_to_cache(
new_index->tree = tree;
}
+ if (!(new_index->type & DICT_UNIVERSAL)) {
+
+ new_index->stat_n_diff_key_vals =
+ mem_heap_alloc(new_index->heap,
+ (1 + dict_index_get_n_unique(new_index))
+ * sizeof(ib_longlong));
+ /* Give some sensible values to stat_n_... in case we do
+ not calculate statistics quickly enough */
+
+ for (i = 0; i <= dict_index_get_n_unique(new_index); i++) {
+
+ new_index->stat_n_diff_key_vals[i] = 100;
+ }
+ }
+
/* Add the index to the list of indexes stored in the tree */
UT_LIST_ADD_LAST(tree_indexes, tree->tree_indexes, new_index);
@@ -1290,6 +1430,654 @@ dict_index_build_internal_non_clust(
return(new_index);
}
+/*====================== FOREIGN KEY PROCESSING ========================*/
+
+/*************************************************************************
+Frees a foreign key struct. */
+static
+void
+dict_foreign_free(
+/*==============*/
+ dict_foreign_t* foreign) /* in, own: foreign key struct */
+{
+ mem_heap_free(foreign->heap);
+}
+
+/**************************************************************************
+Removes a foreign constraint struct from the dictionary cache. */
+static
+void
+dict_foreign_remove_from_cache(
+/*===========================*/
+ dict_foreign_t* foreign) /* in, own: foreign constraint */
+{
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+ ut_a(foreign);
+
+ if (foreign->referenced_table) {
+ UT_LIST_REMOVE(referenced_list,
+ foreign->referenced_table->referenced_list, foreign);
+ }
+
+ if (foreign->foreign_table) {
+ UT_LIST_REMOVE(foreign_list,
+ foreign->foreign_table->foreign_list, foreign);
+ }
+
+ dict_foreign_free(foreign);
+}
+
+/**************************************************************************
+Looks for the foreign constraint from the foreign and referenced lists
+of a table. */
+static
+dict_foreign_t*
+dict_foreign_find(
+/*==============*/
+ /* out: foreign constraint */
+ dict_table_t* table, /* in: table object */
+ char* id) /* in: foreign constraint id */
+{
+ dict_foreign_t* foreign;
+
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
+ foreign = UT_LIST_GET_FIRST(table->foreign_list);
+
+ while (foreign) {
+ if (ut_strcmp(id, foreign->id) == 0) {
+
+ return(foreign);
+ }
+
+ foreign = UT_LIST_GET_NEXT(foreign_list, foreign);
+ }
+
+ foreign = UT_LIST_GET_FIRST(table->referenced_list);
+
+ while (foreign) {
+ if (ut_strcmp(id, foreign->id) == 0) {
+
+ return(foreign);
+ }
+
+ foreign = UT_LIST_GET_NEXT(referenced_list, foreign);
+ }
+
+ return(NULL);
+}
+
+/*************************************************************************
+Tries to find an index whose first fields are the columns in the array,
+in the same order. */
+static
+dict_index_t*
+dict_foreign_find_index(
+/*====================*/
+ /* out: matching index, NULL if not found */
+ dict_table_t* table, /* in: table */
+ char** columns,/* in: array of column names */
+ ulint n_cols, /* in: number of columns */
+ dict_index_t* types_idx)/* in: NULL or an index to whose types the
+ column types must match */
+{
+ dict_index_t* index;
+ char* col_name;
+ ulint i;
+
+ index = dict_table_get_first_index(table);
+
+ while (index != NULL) {
+ if (dict_index_get_n_fields(index) >= n_cols) {
+
+ for (i = 0; i < n_cols; i++) {
+ col_name = dict_index_get_nth_field(index, i)
+ ->col->name;
+ if (ut_strlen(columns[i]) !=
+ ut_strlen(col_name)
+ || 0 != ut_memcmp(columns[i],
+ col_name,
+ ut_strlen(col_name))) {
+ break;
+ }
+
+ if (types_idx && !cmp_types_are_equal(
+ dict_index_get_nth_type(index, i),
+ dict_index_get_nth_type(types_idx, i))) {
+
+ break;
+ }
+ }
+
+ if (i == n_cols) {
+ /* We found a matching index */
+
+ return(index);
+ }
+ }
+
+ index = dict_table_get_next_index(index);
+ }
+
+ return(NULL);
+}
+
+/**************************************************************************
+Adds a foreign key constraint object to the dictionary cache. May free
+the object if there already is an object with the same identifier in.
+At least one of the foreign table and the referenced table must already
+be in the dictionary cache! */
+
+ulint
+dict_foreign_add_to_cache(
+/*======================*/
+ /* out: DB_SUCCESS or error code */
+ dict_foreign_t* foreign) /* in, own: foreign key constraint */
+{
+ dict_table_t* for_table;
+ dict_table_t* ref_table;
+ dict_foreign_t* for_in_cache = NULL;
+ dict_index_t* index;
+
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
+ for_table = dict_table_check_if_in_cache_low(
+ foreign->foreign_table_name);
+
+ ref_table = dict_table_check_if_in_cache_low(
+ foreign->referenced_table_name);
+ ut_a(for_table || ref_table);
+
+ if (for_table) {
+ for_in_cache = dict_foreign_find(for_table, foreign->id);
+ }
+
+ if (!for_in_cache && ref_table) {
+ for_in_cache = dict_foreign_find(ref_table, foreign->id);
+ }
+
+ if (for_in_cache) {
+ /* Free the foreign object */
+ mem_heap_free(foreign->heap);
+ } else {
+ for_in_cache = foreign;
+ }
+
+ if (for_in_cache->referenced_table == NULL && ref_table) {
+ index = dict_foreign_find_index(ref_table,
+ for_in_cache->referenced_col_names,
+ for_in_cache->n_fields,
+ for_in_cache->foreign_index);
+
+ if (index == NULL) {
+ if (for_in_cache == foreign) {
+ mem_heap_free(foreign->heap);
+ }
+ return(DB_CANNOT_ADD_CONSTRAINT);
+ }
+
+ for_in_cache->referenced_table = ref_table;
+ for_in_cache->referenced_index = index;
+ UT_LIST_ADD_LAST(referenced_list,
+ ref_table->referenced_list,
+ for_in_cache);
+ }
+
+ if (for_in_cache->foreign_table == NULL && for_table) {
+ index = dict_foreign_find_index(for_table,
+ for_in_cache->foreign_col_names,
+ for_in_cache->n_fields,
+ for_in_cache->referenced_index);
+
+ if (index == NULL) {
+ if (for_in_cache == foreign) {
+ mem_heap_free(foreign->heap);
+ }
+
+ return(DB_CANNOT_ADD_CONSTRAINT);
+ }
+
+ for_in_cache->foreign_table = for_table;
+ for_in_cache->foreign_index = index;
+ UT_LIST_ADD_LAST(foreign_list,
+ for_table->foreign_list,
+ for_in_cache);
+ }
+
+ return(DB_SUCCESS);
+}
+
+/*************************************************************************
+Scans from pointer onwards. Stops if is at the start of a copy of
+'string' where characters are compared without case sensitivity. Stops
+also at '\0'. */
+static
+char*
+dict_scan_to(
+/*=========*/
+
+ char* ptr, /* in: scan from */
+ char* string) /* in: look for this */
+{
+ ibool success;
+ ulint i;
+loop:
+ if (*ptr == '\0') {
+ return(ptr);
+ }
+
+ success = TRUE;
+
+ for (i = 0; i < ut_strlen(string); i++) {
+ if (toupper((ulint)(ptr[i])) != toupper((ulint)(string[i]))) {
+ success = FALSE;
+
+ break;
+ }
+ }
+
+ if (success) {
+
+ return(ptr);
+ }
+
+ ptr++;
+
+ goto loop;
+}
+
+/*************************************************************************
+Accepts a specified string. Comparisons are case-insensitive. */
+static
+char*
+dict_accept(
+/*========*/
+ /* out: if string was accepted, the pointer
+ is moved after that, else ptr is returned */
+ char* ptr, /* in: scan from this */
+ char* string, /* in: accept only this string as the next
+ non-whitespace string */
+ ibool* success)/* out: TRUE if accepted */
+{
+ char* old_ptr = ptr;
+ char* old_ptr2;
+
+ *success = FALSE;
+
+ while (isspace(*ptr)) {
+ ptr++;
+ }
+
+ old_ptr2 = ptr;
+
+ ptr = dict_scan_to(ptr, string);
+
+ if (*ptr == '\0' || old_ptr2 != ptr) {
+ return(old_ptr);
+ }
+
+ *success = TRUE;
+
+ return(ptr + ut_strlen(string));
+}
+
+/*************************************************************************
+Tries to scan a column name. */
+static
+char*
+dict_scan_col(
+/*==========*/
+ /* out: scanned to */
+ char* ptr, /* in: scanned to */
+ ibool* success,/* out: TRUE if success */
+ dict_table_t* table, /* in: table in which the column is */
+ dict_col_t** column, /* out: pointer to column if success */
+ char** column_name)/* out: pointer to column->name if
+ success */
+{
+ dict_col_t* col;
+ char* old_ptr;
+ ulint i;
+
+ *success = FALSE;
+
+ while (isspace(*ptr)) {
+ ptr++;
+ }
+
+ if (*ptr == '\0') {
+
+ return(ptr);
+ }
+
+ old_ptr = ptr;
+
+ while (!isspace(*ptr) && *ptr != ',' && *ptr != ')') {
+ ptr++;
+ }
+
+ for (i = 0; i < dict_table_get_n_cols(table); i++) {
+
+ col = dict_table_get_nth_col(table, i);
+
+ if (ut_strlen(col->name) == (ulint)(ptr - old_ptr)
+ && 0 == ut_memcmp(col->name, old_ptr,
+ (ulint)(ptr - old_ptr))) {
+
+ /* Found */
+
+ *success = TRUE;
+ *column = col;
+ *column_name = col->name;
+
+ break;
+ }
+ }
+
+ return(ptr);
+}
+
+/*************************************************************************
+Scans the referenced table name from an SQL string. */
+static
+char*
+dict_scan_table_name(
+/*=================*/
+ /* out: scanned to */
+ char* ptr, /* in: scanned to */
+ dict_table_t** table, /* out: table object or NULL if error */
+ char* name) /* in: foreign key table name */
+{
+ char* dot_ptr = NULL;
+ char* old_ptr;
+ ulint i;
+ char second_table_name[10000];
+
+ *table = NULL;
+
+ while (isspace(*ptr)) {
+ ptr++;
+ }
+
+ if (*ptr == '\0') {
+
+ return(ptr);
+ }
+
+ old_ptr = ptr;
+
+ while (!isspace(*ptr) && *ptr != '(') {
+ if (*ptr == '.') {
+ dot_ptr = ptr;
+ }
+
+ ptr++;
+ }
+
+ if (ptr - old_ptr > 9000) {
+ return(old_ptr);
+ }
+
+ if (dot_ptr == NULL) {
+ /* Copy the database name from 'name' to the start */
+ for (i = 0;; i++) {
+ second_table_name[i] = name[i];
+ if (name[i] == '/') {
+ i++;
+ break;
+ }
+ }
+
+ ut_memcpy(second_table_name + i, old_ptr, ptr - old_ptr);
+ second_table_name[i + (ptr - old_ptr)] = '\0';
+ } else {
+ ut_memcpy(second_table_name, old_ptr, ptr - old_ptr);
+ second_table_name[dot_ptr - old_ptr] = '/';
+ second_table_name[ptr - old_ptr] = '\0';
+ }
+
+ *table = dict_table_get_low(second_table_name);
+
+ return(ptr);
+}
+
+/*************************************************************************
+Returns the number of opening brackets '(' subtracted by the number
+of closing brackets ')' between string and ptr. */
+static
+int
+dict_bracket_count(
+/*===============*/
+ /* out: bracket count */
+ char* string, /* in: start of string */
+ char* ptr) /* in: end of string */
+{
+ int count = 0;
+
+ while (string != ptr) {
+ if (*string == '(') {
+ count++;
+ }
+ if (*string == ')') {
+ count--;
+ }
+
+ string++;
+ }
+
+ return(count);
+}
+
+/*************************************************************************
+Scans a table create SQL string and adds to the data dictionary the foreign
+key constraints declared in the string. This function should be called after
+the indexes for a table have been created. Each foreign key constraint must
+be accompanied with indexes in both participating tables. The indexes are
+allowed to contain more fields than mentioned in the constraint. */
+
+ulint
+dict_create_foreign_constraints(
+/*============================*/
+ /* out: error code or DB_SUCCESS */
+ trx_t* trx, /* in: transaction */
+ char* sql_string, /* in: table create statement where
+ foreign keys are declared like:
+ FOREIGN KEY (a, b) REFERENCES table2(c, d),
+ table2 can be written also with the database
+ name before it: test.table2; the default
+ database id the database of parameter name */
+ char* name) /* in: table full name in the normalized form
+ database_name/table_name */
+{
+ dict_table_t* table;
+ dict_table_t* referenced_table;
+ dict_index_t* index;
+ dict_foreign_t* foreign;
+ char* ptr = sql_string;
+ ibool success;
+ ulint error;
+ ulint i;
+ dict_col_t* columns[1000];
+ char* column_names[1000];
+
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
+ table = dict_table_get_low(name);
+
+ if (table == NULL) {
+ return(DB_ERROR);
+ }
+loop:
+ ptr = dict_scan_to(ptr, "FOREIGN");
+
+ if (*ptr == '\0' || dict_bracket_count(sql_string, ptr) != 1) {
+
+ /* The following call adds the foreign key constraints
+ to the data dictionary system tables on disk */
+
+ error = dict_create_add_foreigns_to_dictionary(table, trx);
+
+ return(error);
+ }
+
+ ptr = dict_accept(ptr, "FOREIGN", &success);
+
+ if (!isspace(*ptr)) {
+ return(DB_CANNOT_ADD_CONSTRAINT);
+ }
+
+ ptr = dict_accept(ptr, "KEY", &success);
+
+ if (!success) {
+ goto loop;
+ }
+
+ ptr = dict_accept(ptr, "(", &success);
+
+ if (!success) {
+ goto loop;
+ }
+
+ i = 0;
+
+ /* Scan the columns in the first list */
+col_loop1:
+ ptr = dict_scan_col(ptr, &success, table, columns + i,
+ column_names + i);
+ if (!success) {
+ return(DB_CANNOT_ADD_CONSTRAINT);
+ }
+
+ i++;
+
+ ptr = dict_accept(ptr, ",", &success);
+
+ if (success) {
+ goto col_loop1;
+ }
+
+ ptr = dict_accept(ptr, ")", &success);
+
+ if (!success) {
+ return(DB_CANNOT_ADD_CONSTRAINT);
+ }
+
+ /* Try to find an index which contains the columns
+ as the first fields and in the right order */
+
+ index = dict_foreign_find_index(table, column_names, i, NULL);
+
+ if (!index) {
+ return(DB_CANNOT_ADD_CONSTRAINT);
+ }
+
+ ptr = dict_accept(ptr, "REFERENCES", &success);
+
+ if (!success || !isspace(*ptr)) {
+ return(DB_CANNOT_ADD_CONSTRAINT);
+ }
+
+ /* Let us create a constraint struct */
+
+ foreign = dict_mem_foreign_create();
+
+ foreign->foreign_table = table;
+ foreign->foreign_table_name = table->name;
+ foreign->foreign_index = index;
+ foreign->n_fields = i;
+ foreign->foreign_col_names = mem_heap_alloc(foreign->heap,
+ i * sizeof(void*));
+ for (i = 0; i < foreign->n_fields; i++) {
+ foreign->foreign_col_names[i] = mem_heap_alloc(foreign->heap,
+ 1 + ut_strlen(columns[i]->name));
+ ut_memcpy(foreign->foreign_col_names[i], columns[i]->name,
+ 1 + ut_strlen(columns[i]->name));
+ }
+
+ ptr = dict_scan_table_name(ptr, &referenced_table, name);
+
+ if (!referenced_table) {
+ dict_foreign_free(foreign);
+
+ return(DB_CANNOT_ADD_CONSTRAINT);
+ }
+
+ ptr = dict_accept(ptr, "(", &success);
+
+ if (!success) {
+ dict_foreign_free(foreign);
+ return(DB_CANNOT_ADD_CONSTRAINT);
+ }
+
+ /* Scan the columns in the second list */
+ i = 0;
+
+col_loop2:
+ ptr = dict_scan_col(ptr, &success, referenced_table, columns + i,
+ column_names + i);
+ i++;
+
+ if (!success) {
+ dict_foreign_free(foreign);
+ return(DB_CANNOT_ADD_CONSTRAINT);
+ }
+
+ ptr = dict_accept(ptr, ",", &success);
+
+ if (success) {
+ goto col_loop2;
+ }
+
+ ptr = dict_accept(ptr, ")", &success);
+
+ if (!success || foreign->n_fields != i) {
+ dict_foreign_free(foreign);
+
+ return(DB_CANNOT_ADD_CONSTRAINT);
+ }
+
+ /* Try to find an index which contains the columns as the first fields
+ and in the right order, and the types are the same as in
+ foreign->foreign_index */
+
+ index = dict_foreign_find_index(referenced_table, column_names, i,
+ foreign->foreign_index);
+
+ if (!index) {
+ dict_foreign_free(foreign);
+ return(DB_CANNOT_ADD_CONSTRAINT);
+ }
+
+ foreign->referenced_index = index;
+ foreign->referenced_table = referenced_table;
+
+ foreign->referenced_table_name = mem_heap_alloc(foreign->heap,
+ 1 + ut_strlen(referenced_table->name));
+
+ ut_memcpy(foreign->referenced_table_name, referenced_table->name,
+ 1 + ut_strlen(referenced_table->name));
+
+ foreign->referenced_col_names = mem_heap_alloc(foreign->heap,
+ i * sizeof(void*));
+ for (i = 0; i < foreign->n_fields; i++) {
+ foreign->referenced_col_names[i]
+ = mem_heap_alloc(foreign->heap,
+ 1 + ut_strlen(columns[i]->name));
+ ut_memcpy(
+ foreign->referenced_col_names[i], columns[i]->name,
+ 1 + ut_strlen(columns[i]->name));
+ }
+
+ /* We found an ok constraint definition: add to the lists */
+
+ UT_LIST_ADD_LAST(foreign_list, table->foreign_list, foreign);
+ UT_LIST_ADD_LAST(referenced_list, referenced_table->referenced_list,
+ foreign);
+ goto loop;
+}
+
+/*==================== END OF FOREIGN KEY PROCESSING ====================*/
+
/**************************************************************************
Adds a stored procedure object to the dictionary cache. */
@@ -1733,77 +2521,127 @@ dict_tree_build_data_tuple(
}
/*************************************************************************
-Calculates new estimates for table and index statistics. The statistics
-are used in query optimization. */
+Calculates the minimum record length in an index. */
-void
-dict_update_statistics(
-/*===================*/
- dict_table_t* table) /* in: table */
+ulint
+dict_index_calc_min_rec_len(
+/*========================*/
+ dict_index_t* index) /* in: index */
{
- mem_heap_t* heap;
- dict_index_t* index;
- dtuple_t* start;
- dtuple_t* end;
- ulint n_rows;
- ulint n_vals;
- ulint size;
- ulint sum_of_index_sizes = 0;
-
- /* Estimate the number of records in the clustered index */
- index = dict_table_get_first_index(table);
-
- heap = mem_heap_create(500);
-
- start = dtuple_create(heap, 0);
- end = dtuple_create(heap, 0);
+ ulint sum = 0;
+ ulint i;
- n_rows = btr_estimate_n_rows_in_range(index, start, PAGE_CUR_G,
- end, PAGE_CUR_L);
- mem_heap_free(heap);
+ for (i = 0; i < dict_index_get_n_fields(index); i++) {
+ sum += dtype_get_fixed_size(dict_index_get_nth_type(index, i));
+ }
- if (n_rows > 0) {
- /* For small tables our estimate function tends to give
- values 1 too big */
- n_rows--;
+ if (sum > 127) {
+ sum += 2 * dict_index_get_n_fields(index);
+ } else {
+ sum += dict_index_get_n_fields(index);
}
- mutex_enter(&(dict_sys->mutex));
+ sum += REC_N_EXTRA_BYTES;
- table->stat_last_estimate_counter = table->stat_modif_counter;
- table->stat_n_rows = n_rows;
+ return(sum);
+}
- mutex_exit(&(dict_sys->mutex));
+/*************************************************************************
+Calculates new estimates for table and index statistics. The statistics
+are used in query optimization. */
+
+void
+dict_update_statistics_low(
+/*=======================*/
+ dict_table_t* table, /* in: table */
+ ibool has_dict_mutex) /* in: TRUE if the caller has the
+ dictionary mutex */
+{
+ dict_index_t* index;
+ ulint size;
+ ulint sum_of_index_sizes = 0;
/* Find out the sizes of the indexes and how many different values
for the key they approximately have */
-
+
+ index = dict_table_get_first_index(table);
+
while (index) {
- n_vals = btr_estimate_number_of_different_key_vals(index);
size = btr_get_size(index, BTR_TOTAL_SIZE);
+ index->stat_index_size = size;
+
sum_of_index_sizes += size;
- mutex_enter(&(dict_sys->mutex));
+ size = btr_get_size(index, BTR_N_LEAF_PAGES);
- index->stat_n_diff_key_vals = n_vals;
- index->stat_index_size = size;
+ if (size == 0) {
+ /* The root node of the tree is a leaf */
+ size = 1;
+ }
- mutex_exit(&(dict_sys->mutex));
+ index->stat_n_leaf_pages = size;
+
+ btr_estimate_number_of_different_key_vals(index);
index = dict_table_get_next_index(index);
}
index = dict_table_get_first_index(table);
+ table->stat_n_rows = index->stat_n_diff_key_vals[
+ dict_index_get_n_unique(index)];
+
table->stat_clustered_index_size = index->stat_index_size;
table->stat_sum_of_other_index_sizes = sum_of_index_sizes
- - index->stat_index_size;
+ - index->stat_index_size;
table->stat_last_estimate_counter = table->stat_modif_counter;
}
+/*************************************************************************
+Calculates new estimates for table and index statistics. The statistics
+are used in query optimization. */
+
+void
+dict_update_statistics(
+/*===================*/
+ dict_table_t* table) /* in: table */
+{
+ dict_update_statistics_low(table, FALSE);
+}
+
+/**************************************************************************
+Prints info of a foreign key constraint. */
+static
+void
+dict_foreign_print_low(
+/*===================*/
+ dict_foreign_t* foreign) /* in: foreign key constraint */
+{
+ ulint i;
+
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
+ printf(" FOREIGN KEY CONSTRAINT %s: %s (", foreign->id,
+ foreign->foreign_table_name);
+
+ for (i = 0; i < foreign->n_fields; i++) {
+ printf(" %s", foreign->foreign_col_names[i]);
+ }
+
+ printf(" )\n");
+
+ printf(" REFERENCES %s (", foreign->referenced_table_name);
+
+ for (i = 0; i < foreign->n_fields; i++) {
+ printf(" %s", foreign->referenced_col_names[i]);
+ }
+
+ printf(" )\n");
+}
+
/**************************************************************************
Prints a table data. */
@@ -1839,31 +2677,57 @@ dict_table_print_by_name(
/**************************************************************************
Prints a table data. */
-static
+
void
dict_table_print_low(
/*=================*/
dict_table_t* table) /* in: table */
{
- ulint i;
dict_index_t* index;
+ dict_foreign_t* foreign;
+ ulint i;
ut_ad(mutex_own(&(dict_sys->mutex)));
+ dict_update_statistics_low(table, TRUE);
+
printf("--------------------------------------\n");
- printf("TABLE INFO: name %s, columns %lu, indexes %lu\n", table->name,
- table->n_cols, UT_LIST_GET_LEN(table->indexes));
- for (i = 0; i < table->n_cols; i++) {
- printf(" ");
+ printf(
+ "TABLE: name %s, id %lu %lu, columns %lu, indexes %lu, appr.rows %lu\n",
+ table->name,
+ ut_dulint_get_high(table->id),
+ ut_dulint_get_low(table->id),
+ table->n_cols, UT_LIST_GET_LEN(table->indexes),
+ (ulint)table->stat_n_rows);
+ printf(" COLUMNS: ");
+
+ for (i = 0; i < table->n_cols - 1; i++) {
dict_col_print_low(dict_table_get_nth_col(table, i));
+ printf("; ");
}
+ printf("\n");
+
index = UT_LIST_GET_FIRST(table->indexes);
while (index != NULL) {
dict_index_print_low(index);
index = UT_LIST_GET_NEXT(indexes, index);
}
+
+ foreign = UT_LIST_GET_FIRST(table->foreign_list);
+
+ while (foreign != NULL) {
+ dict_foreign_print_low(foreign);
+ foreign = UT_LIST_GET_NEXT(foreign_list, foreign);
+ }
+
+ foreign = UT_LIST_GET_FIRST(table->referenced_list);
+
+ while (foreign != NULL) {
+ dict_foreign_print_low(foreign);
+ foreign = UT_LIST_GET_NEXT(referenced_list, foreign);
+ }
}
/**************************************************************************
@@ -1879,7 +2743,7 @@ dict_col_print_low(
ut_ad(mutex_own(&(dict_sys->mutex)));
type = dict_col_get_type(col);
- printf("COLUMN: name %s; ", col->name);
+ printf("%s: ", col->name);
dtype_print(type);
}
@@ -1892,28 +2756,47 @@ dict_index_print_low(
/*=================*/
dict_index_t* index) /* in: index */
{
- ulint i;
dict_tree_t* tree;
+ ib_longlong n_vals;
+ ulint i;
ut_ad(mutex_own(&(dict_sys->mutex)));
tree = index->tree;
-
+
+ if (index->n_user_defined_cols > 0) {
+ n_vals = index->stat_n_diff_key_vals[
+ index->n_user_defined_cols];
+ } else {
+ n_vals = index->stat_n_diff_key_vals[1];
+ }
+
+
printf(
- "INDEX INFO: name %s, table name %s, fields %lu, type %lu\n",
- index->name, index->table_name, index->n_fields,
- index->type);
- printf(" root node: space %lu, page number %lu\n",
- tree->space, tree->page);
+ " INDEX: name %s, table name %s, id %lu %lu, fields %lu/%lu, type %lu\n",
+ index->name, index->table_name,
+ ut_dulint_get_high(tree->id),
+ ut_dulint_get_low(tree->id),
+ index->n_user_defined_cols,
+ index->n_fields, index->type);
+ printf(
+ " root page %lu, appr.key vals %lu, leaf pages %lu, size pages %lu\n",
+ tree->page,
+ (ulint)n_vals,
+ index->stat_n_leaf_pages,
+ index->stat_index_size);
+ printf(" FIELDS: ");
+
for (i = 0; i < index->n_fields; i++) {
- printf(" ");
dict_field_print_low(dict_index_get_nth_field(index, i));
}
- btr_print_size(tree);
+ printf("\n");
+
+/* btr_print_size(tree); */
- btr_print_tree(tree, 7);
+/* btr_print_tree(tree, 7); */
}
/**************************************************************************
@@ -1926,6 +2809,5 @@ dict_field_print_low(
{
ut_ad(mutex_own(&(dict_sys->mutex)));
- printf("FIELD: column name %s, order criterion %lu\n", field->name,
- field->order);
+ printf(" %s", field->name);
}
diff --git a/innobase/dict/dict0load.c b/innobase/dict/dict0load.c
index be16988086a..dcdc9ee01cd 100644
--- a/innobase/dict/dict0load.c
+++ b/innobase/dict/dict0load.c
@@ -48,8 +48,171 @@ dict_load_fields(
/************************************************************************
+Finds the first table name in the given database. */
+
+char*
+dict_get_first_table_name_in_db(
+/*============================*/
+ /* out, own: table name, NULL if does not exist;
+ the caller must free the memory in the string! */
+ char* name) /* in: database name which ends to '/' */
+{
+ dict_table_t* sys_tables;
+ btr_pcur_t pcur;
+ dict_index_t* sys_index;
+ dtuple_t* tuple;
+ mem_heap_t* heap;
+ dfield_t* dfield;
+ rec_t* rec;
+ byte* field;
+ ulint len;
+ char* table_name;
+ mtr_t mtr;
+
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
+ heap = mem_heap_create(1000);
+
+ mtr_start(&mtr);
+
+ sys_tables = dict_table_get_low("SYS_TABLES");
+ sys_index = UT_LIST_GET_FIRST(sys_tables->indexes);
+
+ tuple = dtuple_create(heap, 1);
+ dfield = dtuple_get_nth_field(tuple, 0);
+
+ dfield_set_data(dfield, name, ut_strlen(name));
+ dict_index_copy_types(tuple, sys_index, 1);
+
+ btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
+ BTR_SEARCH_LEAF, &pcur, &mtr);
+loop:
+ rec = btr_pcur_get_rec(&pcur);
+
+ if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) {
+ /* Not found */
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+ mem_heap_free(heap);
+
+ return(NULL);
+ }
+
+ field = rec_get_nth_field(rec, 0, &len);
+
+ if (len < strlen(name)
+ || ut_memcmp(name, field, strlen(name)) != 0) {
+ /* Not found */
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+ mem_heap_free(heap);
+
+ return(NULL);
+ }
+
+ if (!rec_get_deleted_flag(rec)) {
+
+ /* We found one */
+
+ table_name = mem_alloc(len + 1);
+ ut_memcpy(table_name, field, len);
+ table_name[len] = '\0';
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+ mem_heap_free(heap);
+
+ return(table_name);
+ }
+
+ btr_pcur_move_to_next_user_rec(&pcur, &mtr);
+
+ goto loop;
+}
+
+/************************************************************************
+Prints to the standard output information on all tables found in the data
+dictionary system table. */
+
+void
+dict_print(void)
+/*============*/
+{
+ dict_table_t* sys_tables;
+ dict_index_t* sys_index;
+ dict_table_t* table;
+ btr_pcur_t pcur;
+ rec_t* rec;
+ byte* field;
+ ulint len;
+ char table_name[10000];
+ mtr_t mtr;
+
+ mutex_enter(&(dict_sys->mutex));
+
+ mtr_start(&mtr);
+
+ sys_tables = dict_table_get_low("SYS_TABLES");
+ sys_index = UT_LIST_GET_FIRST(sys_tables->indexes);
+
+ btr_pcur_open_at_index_side(TRUE, sys_index, BTR_SEARCH_LEAF, &pcur,
+ TRUE, &mtr);
+loop:
+ btr_pcur_move_to_next_user_rec(&pcur, &mtr);
+
+ rec = btr_pcur_get_rec(&pcur);
+
+ if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) {
+ /* end of index */
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ mutex_exit(&(dict_sys->mutex));
+
+ return;
+ }
+
+ field = rec_get_nth_field(rec, 0, &len);
+
+ if (!rec_get_deleted_flag(rec)) {
+
+ /* We found one */
+
+ ut_memcpy(table_name, field, len);
+ table_name[len] = '\0';
+
+ btr_pcur_store_position(&pcur, &mtr);
+
+ mtr_commit(&mtr);
+
+ table = dict_table_get_low(table_name);
+
+ if (table == NULL) {
+ fprintf(stderr, "InnoDB: Failed to load table %s\n",
+ table_name);
+ } else {
+ dict_update_statistics_low(table, TRUE);
+
+ dict_table_print_low(table);
+ }
+
+ mtr_start(&mtr);
+
+ btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr);
+ }
+
+ goto loop;
+}
+
+/************************************************************************
Loads a table definition and also all its index definitions, and also
-the cluster definition if the table is a member in a cluster. */
+the cluster definition if the table is a member in a cluster. Also loads
+all foreign key constraints where the foreign key is in the table or where
+a foreign key references columns in this table. Adds all these to the data
+dictionary cache. */
dict_table_t*
dict_load_table(
@@ -59,7 +222,6 @@ dict_load_table(
{
dict_table_t* table;
dict_table_t* sys_tables;
- mtr_t mtr;
btr_pcur_t pcur;
dict_index_t* sys_index;
dtuple_t* tuple;
@@ -71,6 +233,7 @@ dict_load_table(
char* buf;
ulint space;
ulint n_cols;
+ mtr_t mtr;
ut_ad(mutex_own(&(dict_sys->mutex)));
@@ -178,6 +341,106 @@ dict_load_table(
dict_load_indexes(table, heap);
+ ut_a(DB_SUCCESS == dict_load_foreigns(table->name));
+
+ mem_heap_free(heap);
+
+ return(table);
+}
+
+/***************************************************************************
+Loads a table object based on the table id. */
+
+dict_table_t*
+dict_load_table_on_id(
+/*==================*/
+ /* out: table; NULL if table does not exist */
+ dulint table_id) /* in: table id */
+{
+ byte id_buf[8];
+ btr_pcur_t pcur;
+ mem_heap_t* heap;
+ dtuple_t* tuple;
+ dfield_t* dfield;
+ dict_index_t* sys_table_ids;
+ dict_table_t* sys_tables;
+ rec_t* rec;
+ byte* field;
+ ulint len;
+ dict_table_t* table;
+ char* name;
+ mtr_t mtr;
+
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
+ /* NOTE that the operation of this function is protected by
+ the dictionary mutex, and therefore no deadlocks can occur
+ with other dictionary operations. */
+
+ mtr_start(&mtr);
+ /*---------------------------------------------------*/
+ /* Get the secondary index based on ID for table SYS_TABLES */
+ sys_tables = dict_sys->sys_tables;
+ sys_table_ids = dict_table_get_next_index(
+ dict_table_get_first_index(sys_tables));
+ heap = mem_heap_create(256);
+
+ tuple = dtuple_create(heap, 1);
+ dfield = dtuple_get_nth_field(tuple, 0);
+
+ /* Write the table id in byte format to id_buf */
+ mach_write_to_8(id_buf, table_id);
+
+ dfield_set_data(dfield, id_buf, 8);
+ dict_index_copy_types(tuple, sys_table_ids, 1);
+
+ btr_pcur_open_on_user_rec(sys_table_ids, tuple, PAGE_CUR_GE,
+ BTR_SEARCH_LEAF, &pcur, &mtr);
+ rec = btr_pcur_get_rec(&pcur);
+
+ if (!btr_pcur_is_on_user_rec(&pcur, &mtr)
+ || rec_get_deleted_flag(rec)) {
+ /* Not found */
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+ mem_heap_free(heap);
+
+ return(NULL);
+ }
+
+ /*---------------------------------------------------*/
+ /* Now we have the record in the secondary index containing the
+ table ID and NAME */
+
+ rec = btr_pcur_get_rec(&pcur);
+ field = rec_get_nth_field(rec, 0, &len);
+ ut_ad(len == 8);
+
+ /* Check if the table id in record is the one searched for */
+ if (ut_dulint_cmp(table_id, mach_read_from_8(field)) != 0) {
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+ mem_heap_free(heap);
+
+ return(NULL);
+ }
+
+ /* Now we get the table name from the record */
+ field = rec_get_nth_field(rec, 1, &len);
+
+ name = mem_heap_alloc(heap, len + 1);
+ ut_memcpy(name, field, len);
+ name[len] = '\0';
+
+ /* Load the table definition to memory */
+ table = dict_load_table(name);
+
+ ut_a(table);
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
mem_heap_free(heap);
return(table);
@@ -305,7 +568,8 @@ dict_load_columns(
}
/************************************************************************
-Loads definitions for table indexes. */
+Loads definitions for table indexes. Adds them to the data dictionary cache.
+*/
static
void
dict_load_indexes(
@@ -446,7 +710,6 @@ dict_load_fields(
{
dict_table_t* sys_fields;
dict_index_t* sys_index;
- mtr_t mtr;
btr_pcur_t pcur;
dtuple_t* tuple;
dfield_t* dfield;
@@ -456,6 +719,7 @@ dict_load_fields(
ulint len;
byte* buf;
ulint i;
+ mtr_t mtr;
ut_ad(mutex_own(&(dict_sys->mutex)));
@@ -512,100 +776,328 @@ dict_load_fields(
mtr_commit(&mtr);
}
+/************************************************************************
+Loads foreign key constraint col names (also for the referenced table). */
+static
+void
+dict_load_foreign_cols(
+/*===================*/
+ char* id, /* in: foreign constraint id as a null-
+ terminated string */
+ dict_foreign_t* foreign)/* in: foreign constraint object */
+{
+ dict_table_t* sys_foreign_cols;
+ dict_index_t* sys_index;
+ btr_pcur_t pcur;
+ dtuple_t* tuple;
+ dfield_t* dfield;
+ char* col_name;
+ rec_t* rec;
+ byte* field;
+ ulint len;
+ ulint i;
+ mtr_t mtr;
+
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
+ foreign->foreign_col_names = mem_heap_alloc(foreign->heap,
+ foreign->n_fields * sizeof(void*));
+
+ foreign->referenced_col_names = mem_heap_alloc(foreign->heap,
+ foreign->n_fields * sizeof(void*));
+ mtr_start(&mtr);
+
+ sys_foreign_cols = dict_table_get_low("SYS_FOREIGN_COLS");
+ sys_index = UT_LIST_GET_FIRST(sys_foreign_cols->indexes);
+
+ tuple = dtuple_create(foreign->heap, 1);
+ dfield = dtuple_get_nth_field(tuple, 0);
+
+ dfield_set_data(dfield, id, ut_strlen(id));
+ dict_index_copy_types(tuple, sys_index, 1);
+
+ btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
+ BTR_SEARCH_LEAF, &pcur, &mtr);
+ for (i = 0; i < foreign->n_fields; i++) {
+
+ rec = btr_pcur_get_rec(&pcur);
+
+ ut_a(btr_pcur_is_on_user_rec(&pcur, &mtr));
+ ut_a(!rec_get_deleted_flag(rec));
+
+ field = rec_get_nth_field(rec, 0, &len);
+ ut_a(len == ut_strlen(id));
+ ut_a(ut_memcmp(id, field, len) == 0);
+
+ field = rec_get_nth_field(rec, 1, &len);
+ ut_a(len == 4);
+ ut_a(i == mach_read_from_4(field));
+
+ field = rec_get_nth_field(rec, 4, &len);
+
+ col_name = mem_heap_alloc(foreign->heap, len + 1);
+ ut_memcpy(col_name, field, len);
+ col_name[len] = '\0';
+
+ foreign->foreign_col_names[i] = col_name;
+
+ field = rec_get_nth_field(rec, 5, &len);
+
+ col_name = mem_heap_alloc(foreign->heap, len + 1);
+ ut_memcpy(col_name, field, len);
+ col_name[len] = '\0';
+
+ foreign->referenced_col_names[i] = col_name;
+
+ btr_pcur_move_to_next_user_rec(&pcur, &mtr);
+ }
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+}
+
/***************************************************************************
-Loads a table object based on the table id. */
+Loads a foreign key constraint to the dictionary cache. */
+static
+ulint
+dict_load_foreign(
+/*==============*/
+ /* out: DB_SUCCESS or error code */
+ char* id) /* in: foreign constraint id as a null-terminated
+ string */
+{
+ dict_foreign_t* foreign;
+ dict_table_t* sys_foreign;
+ btr_pcur_t pcur;
+ dict_index_t* sys_index;
+ dtuple_t* tuple;
+ mem_heap_t* heap2;
+ dfield_t* dfield;
+ rec_t* rec;
+ byte* field;
+ ulint len;
+ ulint err;
+ mtr_t mtr;
+
+ ut_ad(mutex_own(&(dict_sys->mutex)));
-dict_table_t*
-dict_load_table_on_id(
-/*==================*/
- /* out: table; NULL if table does not exist */
- dulint table_id) /* in: table id */
+ heap2 = mem_heap_create(1000);
+
+ mtr_start(&mtr);
+
+ sys_foreign = dict_table_get_low("SYS_FOREIGN");
+ sys_index = UT_LIST_GET_FIRST(sys_foreign->indexes);
+
+ tuple = dtuple_create(heap2, 1);
+ dfield = dtuple_get_nth_field(tuple, 0);
+
+ dfield_set_data(dfield, id, ut_strlen(id));
+ dict_index_copy_types(tuple, sys_index, 1);
+
+ btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
+ BTR_SEARCH_LEAF, &pcur, &mtr);
+ rec = btr_pcur_get_rec(&pcur);
+
+ if (!btr_pcur_is_on_user_rec(&pcur, &mtr)
+ || rec_get_deleted_flag(rec)) {
+ /* Not found */
+
+ fprintf(stderr,
+ "InnoDB: Error A: cannot load foreign constraint %s\n", id);
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+ mem_heap_free(heap2);
+
+ return(DB_ERROR);
+ }
+
+ field = rec_get_nth_field(rec, 0, &len);
+
+ /* Check if the id in record is the searched one */
+ if (len != ut_strlen(id) || ut_memcmp(id, field, len) != 0) {
+
+ fprintf(stderr,
+ "InnoDB: Error B: cannot load foreign constraint %s\n", id);
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+ mem_heap_free(heap2);
+
+ return(DB_ERROR);
+ }
+
+ /* Read the table names and the number of columns associated
+ with the constraint */
+
+ mem_heap_free(heap2);
+
+ foreign = dict_mem_foreign_create();
+
+ foreign->n_fields = mach_read_from_4(rec_get_nth_field(rec, 5, &len));
+
+ ut_a(len == 4);
+
+ foreign->id = mem_heap_alloc(foreign->heap, ut_strlen(id) + 1);
+
+ ut_memcpy(foreign->id, id, ut_strlen(id) + 1);
+
+ field = rec_get_nth_field(rec, 3, &len);
+
+ foreign->foreign_table_name = mem_heap_alloc(foreign->heap, 1 + len);
+
+ ut_memcpy(foreign->foreign_table_name, field, len);
+ foreign->foreign_table_name[len] = '\0';
+
+ field = rec_get_nth_field(rec, 4, &len);
+
+ foreign->referenced_table_name = mem_heap_alloc(foreign->heap, 1 + len);
+
+ ut_memcpy(foreign->referenced_table_name, field, len);
+ foreign->referenced_table_name[len] = '\0';
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ dict_load_foreign_cols(id, foreign);
+
+ /* Note that there may already be a foreign constraint object in
+ the dictionary cache for this constraint: then the following
+ call only sets the pointers in it to point to the appropriate table
+ and index objects and frees the newly created object foreign. */
+
+ err = dict_foreign_add_to_cache(foreign);
+
+ return(err);
+}
+
+/***************************************************************************
+Loads foreign key constraints where the table is either the foreign key
+holder or where the table is referenced by a foreign key. Adds these
+constraints to the data dictionary. Note that we know that the dictionary
+cache already contains all constraints where the other relevant table is
+already in the dictionary cache. */
+
+ulint
+dict_load_foreigns(
+/*===============*/
+ /* out: DB_SUCCESS or error code */
+ char* table_name) /* in: table name */
{
- mtr_t mtr;
- byte id_buf[8];
btr_pcur_t pcur;
mem_heap_t* heap;
dtuple_t* tuple;
dfield_t* dfield;
- dict_index_t* sys_table_ids;
- dict_table_t* sys_tables;
+ dict_index_t* sec_index;
+ dict_table_t* sys_foreign;
rec_t* rec;
byte* field;
ulint len;
- dict_table_t* table;
- char* name;
+ char* id ;
+ ulint err;
+ mtr_t mtr;
ut_ad(mutex_own(&(dict_sys->mutex)));
- /* NOTE that the operation of this function is protected by
- the dictionary mutex, and therefore no deadlocks can occur
- with other dictionary operations. */
+ sys_foreign = dict_table_get_low("SYS_FOREIGN");
+
+ if (sys_foreign == NULL) {
+ /* No foreign keys defined yet in this database */
+
+ fprintf(stderr,
+ "InnoDB: Error: no foreign key system tables in the database\n");
+
+ return(DB_ERROR);
+ }
mtr_start(&mtr);
- /*---------------------------------------------------*/
- /* Get the secondary index based on ID for table SYS_TABLES */
- sys_tables = dict_sys->sys_tables;
- sys_table_ids = dict_table_get_next_index(
- dict_table_get_first_index(sys_tables));
+
+ /* Get the secondary index based on FOR_NAME from table
+ SYS_FOREIGN */
+
+ sec_index = dict_table_get_next_index(
+ dict_table_get_first_index(sys_foreign));
+start_load:
heap = mem_heap_create(256);
tuple = dtuple_create(heap, 1);
dfield = dtuple_get_nth_field(tuple, 0);
- /* Write the table id in byte format to id_buf */
- mach_write_to_8(id_buf, table_id);
-
- dfield_set_data(dfield, id_buf, 8);
- dict_index_copy_types(tuple, sys_table_ids, 1);
+ dfield_set_data(dfield, table_name, ut_strlen(table_name));
+ dict_index_copy_types(tuple, sec_index, 1);
- btr_pcur_open_on_user_rec(sys_table_ids, tuple, PAGE_CUR_GE,
+ btr_pcur_open_on_user_rec(sec_index, tuple, PAGE_CUR_GE,
BTR_SEARCH_LEAF, &pcur, &mtr);
+loop:
rec = btr_pcur_get_rec(&pcur);
- if (!btr_pcur_is_on_user_rec(&pcur, &mtr)
- || rec_get_deleted_flag(rec)) {
- /* Not found */
+ if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) {
+ /* End of index */
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
- mem_heap_free(heap);
-
- return(NULL);
+ goto load_next_index;
}
- /*---------------------------------------------------*/
- /* Now we have the record in the secondary index containing the
- table ID and NAME */
+ /* Now we have the record in the secondary index containing a table
+ name and a foreign constraint ID */
rec = btr_pcur_get_rec(&pcur);
field = rec_get_nth_field(rec, 0, &len);
- ut_ad(len == 8);
- /* Check if the table id in record is the one searched for */
- if (ut_dulint_cmp(table_id, mach_read_from_8(field)) != 0) {
+ /* Check if the table name in record is the one searched for */
+ if (len != ut_strlen(table_name)
+ || 0 != ut_memcmp(field, table_name, len)) {
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
- mem_heap_free(heap);
-
- return(NULL);
+ goto load_next_index;
}
- /* Now we get the table name from the record */
+ if (rec_get_deleted_flag(rec)) {
+
+ goto next_rec;
+ }
+
+ /* Now we get a foreign key constraint id */
field = rec_get_nth_field(rec, 1, &len);
- name = mem_heap_alloc(heap, len + 1);
- ut_memcpy(name, field, len);
- name[len] = '\0';
+ id = mem_heap_alloc(heap, len + 1);
+ ut_memcpy(id, field, len);
+ id[len] = '\0';
- /* Load the table definition to memory */
- table = dict_load_table(name);
+ btr_pcur_store_position(&pcur, &mtr);
- ut_a(table);
+ mtr_commit(&mtr);
+
+ /* Load the foreign constraint definition to the dictionary cache */
+ err = dict_load_foreign(id);
+
+ if (err != DB_SUCCESS) {
+ btr_pcur_close(&pcur);
+ mem_heap_free(heap);
+
+ return(err);
+ }
+
+ mtr_start(&mtr);
+
+ btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr);
+next_rec:
+ btr_pcur_move_to_next_user_rec(&pcur, &mtr);
+
+ goto loop;
+
+load_next_index:
btr_pcur_close(&pcur);
mtr_commit(&mtr);
mem_heap_free(heap);
+
+ sec_index = dict_table_get_next_index(sec_index);
- return(table);
+ if (sec_index != NULL) {
+
+ mtr_start(&mtr);
+
+ goto start_load;
+ }
+
+ return(DB_SUCCESS);
}
diff --git a/innobase/dict/dict0mem.c b/innobase/dict/dict0mem.c
index 6947db11aea..57926ab9d2f 100644
--- a/innobase/dict/dict0mem.c
+++ b/innobase/dict/dict0mem.c
@@ -18,6 +18,7 @@ Created 1/8/1996 Heikki Tuuri
#include "dict0dict.h"
#include "que0que.h"
#include "pars0pars.h"
+#include "lock0lock.h"
#define DICT_HEAP_SIZE 100 /* initial memory heap size when
creating a table or index object */
@@ -63,7 +64,12 @@ dict_mem_table_create(
table->cols = mem_heap_alloc(heap, (n_cols + DATA_N_SYS_COLS)
* sizeof(dict_col_t));
UT_LIST_INIT(table->indexes);
+
+ table->auto_inc_lock = mem_heap_alloc(heap, lock_get_size());
+
UT_LIST_INIT(table->locks);
+ UT_LIST_INIT(table->foreign_list);
+ UT_LIST_INIT(table->referenced_list);
table->does_not_fit_in_memory = FALSE;
@@ -199,6 +205,8 @@ dict_mem_index_create(
* sizeof(dict_field_t));
/* The '1 +' above prevents allocation
of an empty mem block */
+ index->stat_n_diff_key_vals = NULL;
+
index->cached = FALSE;
index->magic_n = DICT_INDEX_MAGIC_N;
@@ -206,6 +214,41 @@ dict_mem_index_create(
}
/**************************************************************************
+Creates and initializes a foreign constraint memory object. */
+
+dict_foreign_t*
+dict_mem_foreign_create(void)
+/*=========================*/
+ /* out, own: foreign constraint struct */
+{
+ dict_foreign_t* foreign;
+ mem_heap_t* heap;
+
+ heap = mem_heap_create(100);
+
+ foreign = mem_heap_alloc(heap, sizeof(dict_foreign_t));
+
+ foreign->heap = heap;
+
+ foreign->id = NULL;
+
+ foreign->foreign_table_name = NULL;
+ foreign->foreign_table = NULL;
+ foreign->foreign_col_names = NULL;
+
+ foreign->referenced_table_name = NULL;
+ foreign->referenced_table = NULL;
+ foreign->referenced_col_names = NULL;
+
+ foreign->n_fields = 0;
+
+ foreign->foreign_index = NULL;
+ foreign->referenced_index = NULL;
+
+ return(foreign);
+}
+
+/**************************************************************************
Adds a field definition to an index. NOTE: does not take a copy
of the column name if the field is a column. The memory occupied
by the column name may be released only after publishing the index. */
diff --git a/innobase/fil/fil0fil.c b/innobase/fil/fil0fil.c
index b386f224d11..62389c8394c 100644
--- a/innobase/fil/fil0fil.c
+++ b/innobase/fil/fil0fil.c
@@ -77,6 +77,9 @@ out of the LRU-list and keep a count of pending operations. When an operation
completes, we decrement the count and return the file node to the LRU-list if
the count drops to zero. */
+ulint fil_n_pending_log_flushes = 0;
+ulint fil_n_pending_tablespace_flushes = 0;
+
/* Null file address */
fil_addr_t fil_addr_null = {FIL_NULL, 0};
@@ -856,6 +859,15 @@ fil_node_prepare_for_io(
last_node = UT_LIST_GET_LAST(system->LRU);
+ if (last_node == NULL) {
+ fprintf(stderr,
+ "InnoDB: Error: cannot close any file to open another for i/o\n"
+ "InnoDB: Pending i/o's on %lu files exist\n",
+ system->n_open_pending);
+
+ ut_a(0);
+ }
+
fil_node_close(last_node, system);
}
@@ -973,7 +985,8 @@ fil_io(
ibool ret;
ulint is_log;
ulint wake_later;
-
+ ulint count;
+
is_log = type & OS_FILE_LOG;
type = type & ~OS_FILE_LOG;
@@ -996,7 +1009,7 @@ fil_io(
#endif
if (sync) {
mode = OS_AIO_SYNC;
- } else if ((type == OS_FILE_READ) && !is_log
+ } else if (type == OS_FILE_READ && !is_log
&& ibuf_page(space_id, block_offset)) {
mode = OS_AIO_IBUF;
} else if (is_log) {
@@ -1006,9 +1019,44 @@ fil_io(
}
system = fil_system;
+
+ count = 0;
loop:
+ count++;
+
+ /* NOTE that there is a possibility of a hang here:
+ if the read i/o-handler thread needs to complete
+ a read by reading from the insert buffer, it may need to
+ post another read. But if the maximum number of files
+ are already open, it cannot proceed from here! */
+
mutex_enter(&(system->mutex));
+ if (count < 500 && !is_log && !ibuf_inside()
+ && system->n_open_pending >= (3 * system->max_n_open) / 4) {
+
+ /* We are not doing an ibuf operation: leave a
+ safety margin of openable files for possible ibuf
+ merges needed in page read completion */
+
+ mutex_exit(&(system->mutex));
+
+ /* Wake the i/o-handler threads to make sure pending
+ i/o's are handled and eventually we can open the file */
+
+ os_aio_simulated_wake_handler_threads();
+
+ os_thread_sleep(100000);
+
+ if (count > 50) {
+ fprintf(stderr,
+ "InnoDB: Warning: waiting for file closes to proceed\n"
+ "InnoDB: round %lu\n", count);
+ }
+
+ goto loop;
+ }
+
if (system->n_open_pending == system->max_n_open) {
/* It is not sure we can open the file if it is closed: wait */
@@ -1018,11 +1066,19 @@ loop:
mutex_exit(&(system->mutex));
+ /* Wake the i/o-handler threads to make sure pending
+ i/o's are handled and eventually we can open the file */
+
+ os_aio_simulated_wake_handler_threads();
+
+ fprintf(stderr,
+ "InnoDB: Warning: max allowed number of files is open\n");
+
os_event_wait(event);
goto loop;
}
-
+
HASH_SEARCH(hash, system->spaces, space_id, space,
space->id == space_id);
ut_a(space);
@@ -1160,6 +1216,7 @@ fil_aio_wait(
#elif defined(POSIX_ASYNC_IO)
ret = os_aio_posix_handle(segment, &fil_node, &message);
#else
+ ret = 0; /* Eliminate compiler warning */
ut_a(0);
#endif
} else {
@@ -1220,6 +1277,12 @@ fil_flush(
node->is_modified = FALSE;
+ if (space->purpose == FIL_TABLESPACE) {
+ fil_n_pending_tablespace_flushes++;
+ } else {
+ fil_n_pending_log_flushes++;
+ }
+
mutex_exit(&(system->mutex));
/* Note that it is not certain, when we have
@@ -1233,6 +1296,12 @@ fil_flush(
os_file_flush(file);
mutex_enter(&(system->mutex));
+
+ if (space->purpose == FIL_TABLESPACE) {
+ fil_n_pending_tablespace_flushes--;
+ } else {
+ fil_n_pending_log_flushes--;
+ }
}
node = UT_LIST_GET_NEXT(chain, node);
@@ -1377,7 +1446,7 @@ fil_page_set_type(
ulint type) /* in: type */
{
ut_ad(page);
- ut_ad((type == FIL_PAGE_INDEX) || (type == FIL_PAGE_INDEX));
+ ut_ad((type == FIL_PAGE_INDEX) || (type == FIL_PAGE_UNDO_LOG));
mach_write_to_2(page + FIL_PAGE_TYPE, type);
}
diff --git a/innobase/ibuf/ibuf0ibuf.c b/innobase/ibuf/ibuf0ibuf.c
index fa1c630dc08..d289b176efa 100644
--- a/innobase/ibuf/ibuf0ibuf.c
+++ b/innobase/ibuf/ibuf0ibuf.c
@@ -1013,7 +1013,7 @@ ibuf_rec_get_volume(
ulint i;
ut_ad(ibuf_inside());
- ut_ad(rec_get_n_fields(rec) > 2);
+ ut_ad(rec_get_n_fields(ibuf_rec) > 2);
n_fields = rec_get_n_fields(ibuf_rec) - 2;
@@ -1624,13 +1624,14 @@ ibuf_get_merge_page_nos(
/*************************************************************************
Contracts insert buffer trees by reading pages to the buffer pool. */
-
+static
ulint
-ibuf_contract(
-/*==========*/
+ibuf_contract_ext(
+/*==============*/
/* out: a lower limit for the combined size in bytes
of entries which will be merged from ibuf trees to the
pages read, 0 if ibuf is empty */
+ ulint* n_pages,/* out: number of pages to which merged */
ibool sync) /* in: TRUE if the caller wants to wait for the
issued read with the highest tablespace address
to complete */
@@ -1644,6 +1645,8 @@ ibuf_contract(
ulint n_stored;
ulint sum_sizes;
mtr_t mtr;
+
+ *n_pages = 0;
loop:
ut_ad(!ibuf_inside());
@@ -1730,10 +1733,65 @@ loop:
buf_read_ibuf_merge_pages(sync, space, page_nos, n_stored);
+ *n_pages = n_stored;
+
return(sum_sizes + 1);
}
/*************************************************************************
+Contracts insert buffer trees by reading pages to the buffer pool. */
+
+ulint
+ibuf_contract(
+/*==========*/
+ /* out: a lower limit for the combined size in bytes
+ of entries which will be merged from ibuf trees to the
+ pages read, 0 if ibuf is empty */
+ ibool sync) /* in: TRUE if the caller wants to wait for the
+ issued read with the highest tablespace address
+ to complete */
+{
+ ulint n_pages;
+
+ return(ibuf_contract_ext(&n_pages, sync));
+}
+
+/*************************************************************************
+Contracts insert buffer trees by reading pages to the buffer pool. */
+
+ulint
+ibuf_contract_for_n_pages(
+/*======================*/
+ /* out: a lower limit for the combined size in bytes
+ of entries which will be merged from ibuf trees to the
+ pages read, 0 if ibuf is empty */
+ ibool sync, /* in: TRUE if the caller wants to wait for the
+ issued read with the highest tablespace address
+ to complete */
+ ulint n_pages)/* in: try to read at least this many pages to
+ the buffer pool and merge the ibuf contents to
+ them */
+{
+ ulint sum_bytes = 0;
+ ulint sum_pages = 0;
+ ulint n_bytes;
+ ulint n_pag2;
+
+ while (sum_pages < n_pages) {
+ n_bytes = ibuf_contract_ext(&n_pag2, sync);
+
+ if (n_bytes == 0) {
+ return(sum_bytes);
+ }
+
+ sum_bytes += n_bytes;
+ sum_pages += n_pag2;
+ }
+
+ return(sum_bytes);
+}
+
+/*************************************************************************
Contract insert buffer trees after insert if they are too big. */
UNIV_INLINE
void
@@ -2252,8 +2310,6 @@ ibuf_insert_to_index_page(
if (low_match == dtuple_get_n_fields(entry)) {
rec = page_cur_get_rec(&page_cur);
-
- ut_ad(rec_get_deleted_flag(rec));
btr_cur_del_unmark_for_ibuf(rec, mtr);
} else {
@@ -2306,6 +2362,8 @@ ibuf_delete_rec(
should belong */
btr_pcur_t* pcur, /* in: pcur positioned on the record to
delete, having latch mode BTR_MODIFY_LEAF */
+ dtuple_t* search_tuple,
+ /* in: search tuple for entries of page_no */
mtr_t* mtr) /* in: mtr */
{
ibool success;
@@ -2336,12 +2394,33 @@ ibuf_delete_rec(
mtr_start(mtr);
- ut_a(btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr));
+ success = btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr);
+
+ if (!success) {
+ fprintf(stderr,
+ "InnoDB: ERROR: Send the output to heikki.tuuri@innodb.com\n");
+ fprintf(stderr, "InnoDB: ibuf cursor restoration fails!\n");
+ fprintf(stderr, "InnoDB: ibuf record inserted to page %lu\n",
+ page_no);
+ rec_print(btr_pcur_get_rec(pcur));
+ rec_print(pcur->old_rec);
+ dtuple_print(search_tuple);
+
+ rec_print(page_rec_get_next(btr_pcur_get_rec(pcur)));
+
+ mtr_commit(mtr);
+
+ fprintf(stderr, "InnoDB: Validating insert buffer tree:\n");
+ ut_a(btr_validate_tree(ibuf_data->index->tree));
+ fprintf(stderr, "InnoDB: Ibuf tree ok\n");
+ }
+
+ ut_a(success);
root = ibuf_tree_root_get(ibuf_data, space, mtr);
btr_cur_pessimistic_delete(&err, TRUE, btr_pcur_get_btr_cur(pcur),
- FALSE, mtr);
+ FALSE, mtr);
ut_a(err == DB_SUCCESS);
#ifdef UNIV_IBUF_DEBUG
@@ -2393,8 +2472,11 @@ ibuf_merge_or_delete_for_page(
dulint max_trx_id;
mtr_t mtr;
- /* TODO: get MySQL type info to use in ibuf_insert_to_index_page */
+ if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
+ return;
+ }
+
#ifdef UNIV_LOG_DEBUG
if (space % 2 != 0) {
@@ -2451,16 +2533,13 @@ loop:
if (page) {
success = buf_page_get_known_nowait(RW_X_LATCH, page,
BUF_KEEP_OLD,
-#ifdef UNIV_SYNC_DEBUG
IB__FILE__, __LINE__,
-#endif
&mtr);
-
ut_a(success);
buf_page_dbg_add_level(page, SYNC_TREE_NODE);
}
-
+
/* Position pcur in the insert buffer at the first entry for this
index page */
btr_pcur_open_on_user_rec(ibuf_data->index, search_tuple, PAGE_CUR_GE,
@@ -2476,7 +2555,7 @@ loop:
ut_ad(btr_pcur_is_on_user_rec(&pcur, &mtr));
ibuf_rec = btr_pcur_get_rec(&pcur);
-
+
/* Check if the entry is for this index page */
if (ibuf_rec_get_page_no(ibuf_rec) != page_no) {
@@ -2508,13 +2587,13 @@ loop:
/ IBUF_PAGE_SIZE_PER_FREE_SPACE);
#endif
ibuf_insert_to_index_page(entry, page, &mtr);
-
- n_inserts++;
}
+
+ n_inserts++;
/* Delete the record from ibuf */
- closed = ibuf_delete_rec(space, page_no, &pcur, &mtr);
-
+ closed = ibuf_delete_rec(space, page_no, &pcur, search_tuple,
+ &mtr);
if (closed) {
/* Deletion was pessimistic and mtr was committed:
we start from the beginning again */
@@ -2524,6 +2603,7 @@ loop:
if (btr_pcur_is_after_last_on_page(&pcur, &mtr)) {
mtr_commit(&mtr);
+ btr_pcur_close(&pcur);
goto loop;
}
@@ -2619,8 +2699,6 @@ ibuf_print(void)
#endif
mutex_enter(&ibuf_mutex);
- printf("Ibuf size %lu max size %lu\n", ibuf->size, ibuf->max_size);
-
data = UT_LIST_GET_FIRST(ibuf->data_list);
while (data) {
diff --git a/innobase/include/btr0cur.h b/innobase/include/btr0cur.h
index f56a5662253..bce1f0685cc 100644
--- a/innobase/include/btr0cur.h
+++ b/innobase/include/btr0cur.h
@@ -188,6 +188,22 @@ btr_cur_pessimistic_insert(
que_thr_t* thr, /* in: query thread or NULL */
mtr_t* mtr); /* in: mtr */
/*****************************************************************
+Updates a secondary index record when the update causes no size
+changes in its fields. The only case when this function is currently
+called is that in a char field characters change to others which
+are identified in the collation order. */
+
+ulint
+btr_cur_update_sec_rec_in_place(
+/*============================*/
+ /* out: DB_SUCCESS or error number */
+ btr_cur_t* cursor, /* in: cursor on the record to update;
+ cursor stays valid and positioned on the
+ same record */
+ upd_t* update, /* in: update vector */
+ que_thr_t* thr, /* in: query thread */
+ mtr_t* mtr); /* in: mtr */
+/*****************************************************************
Updates a record when the update causes no size changes in its fields. */
ulint
@@ -411,12 +427,13 @@ btr_estimate_n_rows_in_range(
dtuple_t* tuple2, /* in: range end, may also be empty tuple */
ulint mode2); /* in: search mode for range end */
/***********************************************************************
-Estimates the number of different key values in a given index. */
+Estimates the number of different key values in a given index, for
+each n-column prefix of the index where n <= dict_index_get_n_unique(index).
+The estimates are stored in the array index->stat_n_diff_key_vals. */
-ulint
+void
btr_estimate_number_of_different_key_vals(
/*======================================*/
- /* out: estimated number of key values */
dict_index_t* index); /* in: index */
/***********************************************************************
Marks not updated extern fields as not-owned by this record. The ownership
diff --git a/innobase/include/btr0pcur.h b/innobase/include/btr0pcur.h
index 6465093e3c1..05b55e4491d 100644
--- a/innobase/include/btr0pcur.h
+++ b/innobase/include/btr0pcur.h
@@ -19,9 +19,15 @@ Created 2/23/1996 Heikki Tuuri
#include "btr0types.h"
/* Relative positions for a stored cursor position */
-#define BTR_PCUR_ON 1
-#define BTR_PCUR_BEFORE 2
-#define BTR_PCUR_AFTER 3
+#define BTR_PCUR_ON 1
+#define BTR_PCUR_BEFORE 2
+#define BTR_PCUR_AFTER 3
+/* Note that if the tree is not empty, btr_pcur_store_position does not
+use the following, but only uses the above three alternatives, where the
+position is stored relative to a specific record: this makes implementation
+of a scroll cursor easier */
+#define BTR_PCUR_BEFORE_FIRST_IN_TREE 4 /* in an empty tree */
+#define BTR_PCUR_AFTER_LAST_IN_TREE 5 /* in an empty tree */
/******************************************************************
Allocates memory for a persistent cursor object and initializes the cursor. */
@@ -170,34 +176,16 @@ btr_pcur_close(
/******************************************************************
The position of the cursor is stored by taking an initial segment of the
record the cursor is positioned on, before, or after, and copying it to the
-cursor data structure. NOTE that the page where the cursor is positioned
-must not be empty! */
+cursor data structure, or just setting a flag if the cursor id before the
+first in an EMPTY tree, or after the last in an EMPTY tree. NOTE that the
+page where the cursor is positioned must not be empty if the index tree is
+not totally empty! */
void
btr_pcur_store_position(
/*====================*/
- btr_pcur_t* cursor, /* in: persistent cursor */
- mtr_t* mtr); /* in: mtr */
-/******************************************************************
-If the latch mode of the cursor is BTR_LEAF_SEARCH or BTR_LEAF_MODIFY,
-releases the page latch and bufferfix reserved by the cursor.
-NOTE! In the case of BTR_LEAF_MODIFY, there should not exist changes
-made by the current mini-transaction to the data protected by the
-cursor latch, as then the latch must not be released until mtr_commit. */
-
-void
-btr_pcur_release_leaf(
-/*==================*/
btr_pcur_t* cursor, /* in: persistent cursor */
mtr_t* mtr); /* in: mtr */
-/*************************************************************
-Gets the rel_pos field for a cursor whose position has been stored. */
-UNIV_INLINE
-ulint
-btr_pcur_get_rel_pos(
-/*=================*/
- /* out: BTR_PCUR_ON, ... */
- btr_pcur_t* cursor);/* in: persistent cursor */
/******************************************************************
Restores the stored position of a persistent cursor bufferfixing the page and
obtaining the specified latches. If the cursor position was saved when the
@@ -207,7 +195,9 @@ to the last record LESS OR EQUAL to the stored record;
the last record LESS than the user record which was the successor of the page
infimum;
(3) cursor was positioned on the page supremum: restores to the first record
-GREATER than the user record which was the predecessor of the supremum. */
+GREATER than the user record which was the predecessor of the supremum.
+(4) cursor was positioned before the first or after the last in an empty tree:
+restores to before first or after the last in the tree. */
ibool
btr_pcur_restore_position(
@@ -220,6 +210,26 @@ btr_pcur_restore_position(
ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */
btr_pcur_t* cursor, /* in: detached persistent cursor */
mtr_t* mtr); /* in: mtr */
+/******************************************************************
+If the latch mode of the cursor is BTR_LEAF_SEARCH or BTR_LEAF_MODIFY,
+releases the page latch and bufferfix reserved by the cursor.
+NOTE! In the case of BTR_LEAF_MODIFY, there should not exist changes
+made by the current mini-transaction to the data protected by the
+cursor latch, as then the latch must not be released until mtr_commit. */
+
+void
+btr_pcur_release_leaf(
+/*==================*/
+ btr_pcur_t* cursor, /* in: persistent cursor */
+ mtr_t* mtr); /* in: mtr */
+/*************************************************************
+Gets the rel_pos field for a cursor whose position has been stored. */
+UNIV_INLINE
+ulint
+btr_pcur_get_rel_pos(
+/*=================*/
+ /* out: BTR_PCUR_ON, ... */
+ btr_pcur_t* cursor);/* in: persistent cursor */
/*************************************************************
Sets the mtr field for a pcur. */
UNIV_INLINE
@@ -458,7 +468,7 @@ struct btr_pcur_struct{
ulint search_mode; /* PAGE_CUR_G, ... */
/*-----------------------------*/
/* NOTE that the following fields may possess dynamically allocated
- memory, which should be freed if not needed anymore! */
+ memory which should be freed if not needed anymore! */
mtr_t* mtr; /* NULL, or this field may contain
a mini-transaction which holds the
diff --git a/innobase/include/btr0pcur.ic b/innobase/include/btr0pcur.ic
index 8e927689208..a60140e4aa9 100644
--- a/innobase/include/btr0pcur.ic
+++ b/innobase/include/btr0pcur.ic
@@ -19,8 +19,8 @@ btr_pcur_get_rel_pos(
ut_ad(cursor);
ut_ad(cursor->old_rec);
ut_ad(cursor->old_stored == BTR_PCUR_OLD_STORED);
- ut_ad((cursor->pos_state == BTR_PCUR_WAS_POSITIONED)
- || (cursor->pos_state == BTR_PCUR_IS_POSITIONED));
+ ut_ad(cursor->pos_state == BTR_PCUR_WAS_POSITIONED
+ || cursor->pos_state == BTR_PCUR_IS_POSITIONED);
return(cursor->rel_pos);
}
diff --git a/innobase/include/btr0sea.h b/innobase/include/btr0sea.h
index c319e16d740..fdf5cf375a3 100644
--- a/innobase/include/btr0sea.h
+++ b/innobase/include/btr0sea.h
@@ -262,6 +262,12 @@ index */
#define BTR_SEARCH_ON_HASH_LIMIT 3
+/* We do this many searches before trying to keep the search latch over calls
+from MySQL. If we notice someone waiting for the latch, we again set this
+much timeout. This is to reduce contention. */
+
+#define BTR_SEA_TIMEOUT 10000
+
#ifndef UNIV_NONINL
#include "btr0sea.ic"
#endif
diff --git a/innobase/include/buf0buf.h b/innobase/include/buf0buf.h
index 66071030402..5ddbf39335a 100644
--- a/innobase/include/buf0buf.h
+++ b/innobase/include/buf0buf.h
@@ -116,53 +116,30 @@ buf_frame_copy(
NOTE! The following macros should be used instead of buf_page_get_gen,
to improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed
in LA! */
-#ifdef UNIV_SYNC_DEBUG
#define buf_page_get(SP, OF, LA, MTR) buf_page_get_gen(\
SP, OF, LA, NULL,\
BUF_GET, IB__FILE__, __LINE__, MTR)
-#else
-#define buf_page_get(SP, OF, LA, MTR) buf_page_get_gen(\
- SP, OF, LA, NULL,\
- BUF_GET, MTR)
-#endif
/******************************************************************
Use these macros to bufferfix a page with no latching. Remember not to
read the contents of the page unless you know it is safe. Do not modify
the contents of the page! We have separated this case, because it is
error-prone programming not to set a latch, and it should be used
with care. */
-#ifdef UNIV_SYNC_DEBUG
#define buf_page_get_with_no_latch(SP, OF, MTR) buf_page_get_gen(\
SP, OF, RW_NO_LATCH, NULL,\
BUF_GET_NO_LATCH, IB__FILE__, __LINE__, MTR)
-#else
-#define buf_page_get_with_no_latch(SP, OF, MTR) buf_page_get_gen(\
- SP, OF, RW_NO_LATCH, NULL,\
- BUF_GET_NO_LATCH, MTR)
-#endif
/******************************************************************
NOTE! The following macros should be used instead of buf_page_get_gen, to
improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed as LA! */
-#ifdef UNIV_SYNC_DEBUG
#define buf_page_get_nowait(SP, OF, LA, MTR) buf_page_get_gen(\
SP, OF, LA, NULL,\
BUF_GET_NOWAIT, IB__FILE__, __LINE__, MTR)
-#else
-#define buf_page_get_nowait(SP, OF, LA, MTR) buf_page_get_gen(\
- SP, OF, LA, NULL,\
- BUF_GET_NOWAIT, MTR)
-#endif
/******************************************************************
NOTE! The following macros should be used instead of
buf_page_optimistic_get_func, to improve debugging. Only values RW_S_LATCH and
RW_X_LATCH are allowed as LA! */
-#ifdef UNIV_SYNC_DEBUG
#define buf_page_optimistic_get(LA, G, MC, MTR) buf_page_optimistic_get_func(\
LA, G, MC, IB__FILE__, __LINE__, MTR)
-#else
-#define buf_page_optimistic_get(LA, G, MC, MTR) buf_page_optimistic_get_func(\
- LA, G, MC, MTR)
-#endif
/************************************************************************
This is the general function used to get optimistic access to a database
page. */
@@ -175,10 +152,8 @@ buf_page_optimistic_get_func(
buf_frame_t* guess, /* in: guessed frame */
dulint modify_clock,/* in: modify clock value if mode is
..._GUESS_ON_CLOCK */
-#ifdef UNIV_SYNC_DEBUG
char* file, /* in: file name */
ulint line, /* in: line where called */
-#endif
mtr_t* mtr); /* in: mini-transaction */
/************************************************************************
Tries to get the page, but if file io is required, releases all latches
@@ -210,10 +185,8 @@ buf_page_get_known_nowait(
ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */
buf_frame_t* guess, /* in: the known page frame */
ulint mode, /* in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */
-#ifdef UNIV_SYNC_DEBUG
char* file, /* in: file name */
ulint line, /* in: line where called */
-#endif
mtr_t* mtr); /* in: mini-transaction */
/************************************************************************
This is the general function used to get access to a database page. */
@@ -228,10 +201,8 @@ buf_page_get_gen(
buf_frame_t* guess, /* in: guessed frame or NULL */
ulint mode, /* in: BUF_GET, BUF_GET_IF_IN_POOL,
BUF_GET_NO_LATCH */
-#ifdef UNIV_SYNC_DEBUG
char* file, /* in: file name */
ulint line, /* in: line where called */
-#endif
mtr_t* mtr); /* in: mini-transaction */
/************************************************************************
Initializes a page to the buffer buf_pool. The page is usually not read
@@ -455,6 +426,13 @@ Validates the buffer pool data structure. */
ibool
buf_validate(void);
/*==============*/
+/************************************************************************
+Prints a page to stderr. */
+
+void
+buf_page_print(
+/*===========*/
+ byte* read_buf); /* in: a database page */
/*************************************************************************
Prints info of the buffer pool data structure. */
@@ -462,6 +440,12 @@ void
buf_print(void);
/*===========*/
/*************************************************************************
+Returns the number of pending buf pool ios. */
+
+ulint
+buf_get_n_pending_ios(void);
+/*=======================*/
+/*************************************************************************
Prints info of the buffer i/o. */
void
@@ -760,6 +744,8 @@ struct buf_pool_struct{
byte* frame_zero; /* pointer to the first buffer frame:
this may differ from frame_mem, because
this is aligned by the frame size */
+ byte* high_end; /* pointer to the end of the
+ buffer pool */
buf_block_t* blocks; /* array of buffer control blocks */
ulint max_size; /* number of control blocks ==
maximum pool size in pages */
@@ -767,6 +753,9 @@ struct buf_pool_struct{
hash_table_t* page_hash; /* hash table of the file pages */
ulint n_pend_reads; /* number of pending read operations */
+
+ time_t last_printout_time; /* when buf_print was last time
+ called */
ulint n_pages_read; /* number read operations */
ulint n_pages_written;/* number write operations */
ulint n_pages_created;/* number of pages created in the pool
@@ -782,6 +771,9 @@ struct buf_pool_struct{
hit rate */
ulint n_pages_read_old;/* n_pages_read when buf_print was
last time called */
+ ulint n_pages_written_old;/* number write operations */
+ ulint n_pages_created_old;/* number of pages created in
+ the pool with no read */
/* 2. Page flushing algorithm fields */
UT_LIST_BASE_NODE_T(buf_block_t) flush_list;
diff --git a/innobase/include/buf0buf.ic b/innobase/include/buf0buf.ic
index 1ecc6f34a78..3d88d087e63 100644
--- a/innobase/include/buf0buf.ic
+++ b/innobase/include/buf0buf.ic
@@ -486,11 +486,7 @@ buf_block_buf_fix_inc_debug(
{
ibool ret;
- ret = rw_lock_s_lock_func_nowait(&(block->debug_latch)
-#ifdef UNIV_SYNC_DEBUG
- ,file, line
-#endif
- );
+ ret = rw_lock_s_lock_func_nowait(&(block->debug_latch), file, line);
ut_ad(ret == TRUE);
@@ -557,9 +553,7 @@ buf_page_get_release_on_io(
frame = buf_page_get_gen(space, offset, rw_latch, guess,
BUF_GET_IF_IN_POOL,
-#ifdef UNIV_SYNC_DEBUG
IB__FILE__, __LINE__,
-#endif
mtr);
if (frame != NULL) {
diff --git a/innobase/include/data0data.h b/innobase/include/data0data.h
index c314281d758..c19d7ea5552 100644
--- a/innobase/include/data0data.h
+++ b/innobase/include/data0data.h
@@ -116,8 +116,8 @@ dfield_copy(
Tests if data length and content is equal for two dfields. */
UNIV_INLINE
ibool
-dfield_datas_are_equal(
-/*===================*/
+dfield_datas_are_binary_equal(
+/*==========================*/
/* out: TRUE if equal */
dfield_t* field1, /* in: field */
dfield_t* field2);/* in: field */
@@ -125,8 +125,8 @@ dfield_datas_are_equal(
Tests if dfield data length and content is equal to the given. */
UNIV_INLINE
ibool
-dfield_data_is_equal(
-/*=================*/
+dfield_data_is_binary_equal(
+/*========================*/
/* out: TRUE if equal */
dfield_t* field, /* in: field */
ulint len, /* in: data length or UNIV_SQL_NULL */
@@ -230,14 +230,18 @@ dtuple_get_data_size(
dtuple_t* tuple); /* in: typed data tuple */
/****************************************************************
Returns TRUE if lengths of two dtuples are equal and respective data fields
-in them are equal. */
-UNIV_INLINE
+in them are equal when compared with collation in char fields (not as binary
+strings). */
+
ibool
-dtuple_datas_are_equal(
-/*===================*/
- /* out: TRUE if length and datas are equal */
+dtuple_datas_are_ordering_equal(
+/*============================*/
+ /* out: TRUE if length and fieds are equal
+ when compared with cmp_data_data:
+ NOTE: in character type fields some letters
+ are identified with others! (collation) */
dtuple_t* tuple1, /* in: tuple 1 */
- dtuple_t* tuple2); /* in: tuple 2 */
+ dtuple_t* tuple2);/* in: tuple 2 */
/****************************************************************
Folds a prefix given as the number of fields of a tuple. */
UNIV_INLINE
@@ -447,7 +451,7 @@ struct dfield_struct{
struct dtuple_struct {
ulint info_bits; /* info bits of an index record:
- default is 0; this field is used
+ the default is 0; this field is used
if an index record is built from
a data tuple */
ulint n_fields; /* number of fields in dtuple */
diff --git a/innobase/include/data0data.ic b/innobase/include/data0data.ic
index b886ad6c69c..0750a3894d1 100644
--- a/innobase/include/data0data.ic
+++ b/innobase/include/data0data.ic
@@ -133,8 +133,8 @@ dfield_copy(
Tests if data length and content is equal for two dfields. */
UNIV_INLINE
ibool
-dfield_datas_are_equal(
-/*===================*/
+dfield_datas_are_binary_equal(
+/*==========================*/
/* out: TRUE if equal */
dfield_t* field1, /* in: field */
dfield_t* field2) /* in: field */
@@ -157,8 +157,8 @@ dfield_datas_are_equal(
Tests if dfield data length and content is equal to the given. */
UNIV_INLINE
ibool
-dfield_data_is_equal(
-/*=================*/
+dfield_data_is_binary_equal(
+/*========================*/
/* out: TRUE if equal */
dfield_t* field, /* in: field */
ulint len, /* in: data length or UNIV_SQL_NULL */
@@ -169,8 +169,7 @@ dfield_data_is_equal(
return(FALSE);
}
- if ((len != UNIV_SQL_NULL)
- && (0 != ut_memcmp(field->data, data, len))) {
+ if (len != UNIV_SQL_NULL && 0 != ut_memcmp(field->data, data, len)) {
return(FALSE);
}
@@ -342,65 +341,6 @@ dtuple_get_data_size(
return(sum);
}
-/****************************************************************
-Returns TRUE if lengths of two dtuples are equal and respective data fields
-in them are equal. */
-UNIV_INLINE
-ibool
-dtuple_datas_are_equal(
-/*===================*/
- /* out: TRUE if length and datas are equal */
- dtuple_t* tuple1, /* in: tuple 1 */
- dtuple_t* tuple2) /* in: tuple 2 */
-{
- dfield_t* field1;
- dfield_t* field2;
- ulint n_fields;
- byte* data1;
- byte* data2;
- ulint len1;
- ulint len2;
- ulint i;
-
- ut_ad(tuple1 && tuple2);
- ut_ad(tuple1->magic_n = DATA_TUPLE_MAGIC_N);
- ut_ad(tuple2->magic_n = DATA_TUPLE_MAGIC_N);
- ut_ad(dtuple_check_typed(tuple1));
- ut_ad(dtuple_check_typed(tuple2));
-
- n_fields = dtuple_get_n_fields(tuple1);
-
- if (n_fields != dtuple_get_n_fields(tuple2)) {
-
- return(FALSE);
- }
-
- for (i = 0; i < n_fields; i++) {
-
- field1 = dtuple_get_nth_field(tuple1, i);
- data1 = (byte*) dfield_get_data(field1);
- len1 = dfield_get_len(field1);
-
- field2 = dtuple_get_nth_field(tuple2, i);
- data2 = (byte*) dfield_get_data(field2);
- len2 = dfield_get_len(field2);
-
- if (len1 != len2) {
-
- return(FALSE);
- }
-
- if (len1 != UNIV_SQL_NULL) {
- if (ut_memcmp(data1, data2, len1) != 0) {
-
- return(FALSE);
- }
- }
- }
-
- return(TRUE);
-}
-
/***********************************************************************
Sets types of fields binary in a tuple. */
UNIV_INLINE
diff --git a/innobase/include/data0type.h b/innobase/include/data0type.h
index 4817f0ca839..b53a70a8909 100644
--- a/innobase/include/data0type.h
+++ b/innobase/include/data0type.h
@@ -124,17 +124,6 @@ dtype_get_pad_char(
/* out: padding character code, or
ULINT_UNDEFINED if no padding specified */
dtype_t* type); /* in: typeumn */
-/*************************************************************************
-Transforms the character code so that it is ordered appropriately
-for the language. */
-UNIV_INLINE
-ulint
-dtype_collate(
-/*==========*/
- /* out: padding character */
- dtype_t* type, /* in: type */
- ulint code); /* in: character code stored in database
- record */
/***************************************************************************
Returns the size of a fixed size data type, 0 if not a fixed size type. */
UNIV_INLINE
diff --git a/innobase/include/data0type.ic b/innobase/include/data0type.ic
index f6bdaf69662..4a62902eb1b 100644
--- a/innobase/include/data0type.ic
+++ b/innobase/include/data0type.ic
@@ -120,23 +120,6 @@ dtype_get_pad_char(
return(ULINT_UNDEFINED);
}
-/*************************************************************************
-Transforms the character code so that it is ordered appropriately for the
-language. */
-UNIV_INLINE
-ulint
-dtype_collate(
-/*==========*/
- /* out: collation order position */
- dtype_t* type, /* in: type */
- ulint code) /* in: character code stored in database
- record */
-{
- ut_ad((type->mtype == DATA_CHAR) || (type->mtype == DATA_VARCHAR));
-
- return(toupper(code));
-}
-
/**************************************************************************
Stores to a type the information which determines its alphabetical
ordering. */
@@ -198,6 +181,10 @@ dtype_get_fixed_size(
case DATA_SYS: if (type->prtype == DATA_ROW_ID) {
return(DATA_ROW_ID_LEN);
+ } else if (type->prtype == DATA_TRX_ID) {
+ return(DATA_TRX_ID_LEN);
+ } else if (type->prtype == DATA_ROLL_PTR) {
+ return(DATA_ROLL_PTR_LEN);
} else {
return(0);
}
diff --git a/innobase/include/db0err.h b/innobase/include/db0err.h
index 34513545faa..ddfbd5b7862 100644
--- a/innobase/include/db0err.h
+++ b/innobase/include/db0err.h
@@ -27,12 +27,21 @@ Created 5/24/1996 Heikki Tuuri
#define DB_CLUSTER_NOT_FOUND 30
#define DB_TABLE_NOT_FOUND 31
#define DB_MUST_GET_MORE_FILE_SPACE 32 /* the database has to be stopped
- and restrated with more file space */
+ and restarted with more file space */
#define DB_TABLE_IS_BEING_USED 33
#define DB_TOO_BIG_RECORD 34 /* a record in an index would become
bigger than 1/2 free space in a page
frame */
-
+#define DB_LOCK_WAIT_TIMEOUT 35 /* lock wait lasted too long */
+#define DB_NO_REFERENCED_ROW 36 /* referenced key value not found
+ for a foreign key in an insert or
+ update of a row */
+#define DB_ROW_IS_REFERENCED 37 /* cannot delete or update a row
+ because it contains a key value
+ which is referenced */
+#define DB_CANNOT_ADD_CONSTRAINT 38 /* adding a foreign key constraint
+ to a table failed */
+
/* The following are partial failure codes */
#define DB_FAIL 1000
#define DB_OVERFLOW 1001
diff --git a/innobase/include/dict0crea.h b/innobase/include/dict0crea.h
index 6bc31e1e722..ccdedff42c8 100644
--- a/innobase/include/dict0crea.h
+++ b/innobase/include/dict0crea.h
@@ -71,6 +71,24 @@ dict_drop_index_tree(
rec_t* rec, /* in: record in the clustered index of SYS_INDEXES
table */
mtr_t* mtr); /* in: mtr having the latch on the record page */
+/********************************************************************
+Creates the foreign key constraints system tables inside InnoDB
+at database creation or database start if they are not found or are
+not of the right form. */
+
+ulint
+dict_create_or_check_foreign_constraint_tables(void);
+/*================================================*/
+ /* out: DB_SUCCESS or error code */
+/************************************************************************
+Adds foreign key definitions to data dictionary tables in the database. */
+
+ulint
+dict_create_add_foreigns_to_dictionary(
+/*===================================*/
+ /* out: error code or DB_SUCCESS */
+ dict_table_t* table, /* in: table */
+ trx_t* trx); /* in: transaction */
/* Table create node structure */
diff --git a/innobase/include/dict0dict.h b/innobase/include/dict0dict.h
index cec1430c9e9..56b55b8a417 100644
--- a/innobase/include/dict0dict.h
+++ b/innobase/include/dict0dict.h
@@ -138,6 +138,38 @@ dict_table_rename_in_cache(
dict_table_t* table, /* in: table */
char* new_name); /* in: new name */
/**************************************************************************
+Adds a foreign key constraint object to the dictionary cache. May free
+the object if there already is an object with the same identifier in.
+At least one of foreign table or referenced table must already be in
+the dictionary cache! */
+
+ulint
+dict_foreign_add_to_cache(
+/*======================*/
+ /* out: DB_SUCCESS or error code */
+ dict_foreign_t* foreign); /* in, own: foreign key constraint */
+/*************************************************************************
+Scans a table create SQL string and adds to the data dictionary
+the foreign key constraints declared in the string. This function
+should be called after the indexes for a table have been created.
+Each foreign key constraint must be accompanied with indexes in
+bot participating tables. The indexes are allowed to contain more
+fields than mentioned in the constraint. */
+
+ulint
+dict_create_foreign_constraints(
+/*============================*/
+ /* out: error code or DB_SUCCESS */
+ trx_t* trx, /* in: transaction */
+ char* sql_string, /* in: table create statement where
+ foreign keys are declared like:
+ FOREIGN KEY (a, b) REFERENCES table2(c, d),
+ table2 can be written also with the database
+ name before it: test.table2; the default
+ database id the database of parameter name */
+ char* name); /* in: table full name in the normalized form
+ database_name/table_name */
+/**************************************************************************
Returns a table object and memoryfixes it. NOTE! This is a high-level
function to be used mainly from outside the 'dict' directory. Inside this
directory dict_table_get_low is usually the appropriate function. */
@@ -174,6 +206,14 @@ dict_table_release(
/*===============*/
dict_table_t* table); /* in: table to be released */
/**************************************************************************
+Checks if a table is in the dictionary cache. */
+UNIV_INLINE
+dict_table_t*
+dict_table_check_if_in_cache_low(
+/*==============================*/
+ /* out: table, NULL if not found */
+ char* table_name); /* in: table name */
+/**************************************************************************
Gets a table; loads it to the dictionary cache if necessary. A low-level
function. */
UNIV_INLINE
@@ -208,6 +248,13 @@ dict_table_print(
/*=============*/
dict_table_t* table); /* in: table */
/**************************************************************************
+Prints a table data. */
+
+void
+dict_table_print_low(
+/*=================*/
+ dict_table_t* table); /* in: table */
+/**************************************************************************
Prints a table data when we know the table name. */
void
@@ -319,6 +366,16 @@ dict_table_copy_types(
dtuple_t* tuple, /* in: data tuple */
dict_table_t* table); /* in: index */
/**************************************************************************
+Looks for an index with the given id. NOTE that we do not reserve
+the dictionary mutex: this function is for emergency purposes like
+printing info of a corrupt database page! */
+
+dict_index_t*
+dict_index_find_on_id_low(
+/*======================*/
+ /* out: index or NULL if not found from cache */
+ dulint id); /* in: index id */
+/**************************************************************************
Adds an index to dictionary cache. */
ibool
@@ -640,6 +697,23 @@ dict_tree_get_space_reserve(
reserved for updates */
dict_tree_t* tree); /* in: a tree */
/*************************************************************************
+Calculates the minimum record length in an index. */
+
+ulint
+dict_index_calc_min_rec_len(
+/*========================*/
+ dict_index_t* index); /* in: index */
+/*************************************************************************
+Calculates new estimates for table and index statistics. The statistics
+are used in query optimization. */
+
+void
+dict_update_statistics_low(
+/*=======================*/
+ dict_table_t* table, /* in: table */
+ ibool has_dict_mutex);/* in: TRUE if the caller has the
+ dictionary mutex */
+/*************************************************************************
Calculates new estimates for table and index statistics. The statistics
are used in query optimization. */
@@ -661,7 +735,8 @@ dict_mutex_exit_for_mysql(void);
/*===========================*/
-extern dict_sys_t* dict_sys; /* the dictionary system */
+extern dict_sys_t* dict_sys; /* the dictionary system */
+extern rw_lock_t dict_foreign_key_check_lock;
/* Dictionary system struct */
struct dict_sys_struct{
diff --git a/innobase/include/dict0dict.ic b/innobase/include/dict0dict.ic
index 9089ebe8edd..821465f96a8 100644
--- a/innobase/include/dict0dict.ic
+++ b/innobase/include/dict0dict.ic
@@ -532,12 +532,11 @@ dict_tree_get_space_reserve(
}
/**************************************************************************
-Gets a table; loads it to the dictionary cache if necessary. A low-level
-function. */
+Checks if a table is in the dictionary cache. */
UNIV_INLINE
dict_table_t*
-dict_table_get_low(
-/*===============*/
+dict_table_check_if_in_cache_low(
+/*==============================*/
/* out: table, NULL if not found */
char* table_name) /* in: table name */
{
@@ -552,6 +551,26 @@ dict_table_get_low(
HASH_SEARCH(name_hash, dict_sys->table_hash, table_fold, table,
ut_strcmp(table->name, table_name) == 0);
+ return(table);
+}
+
+/**************************************************************************
+Gets a table; loads it to the dictionary cache if necessary. A low-level
+function. */
+UNIV_INLINE
+dict_table_t*
+dict_table_get_low(
+/*===============*/
+ /* out: table, NULL if not found */
+ char* table_name) /* in: table name */
+{
+ dict_table_t* table;
+
+ ut_ad(table_name);
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
+ table = dict_table_check_if_in_cache_low(table_name);
+
if (table == NULL) {
table = dict_load_table(table_name);
}
@@ -603,6 +622,7 @@ dict_table_get_on_id_low(
dict_table_t* table;
ulint fold;
+ ut_ad(mutex_own(&(dict_sys->mutex)));
UT_NOT_USED(trx);
/* Look for the table name in the hash table */
diff --git a/innobase/include/dict0load.h b/innobase/include/dict0load.h
index d0298d8df37..b60996a8dab 100644
--- a/innobase/include/dict0load.h
+++ b/innobase/include/dict0load.h
@@ -15,8 +15,19 @@ Created 4/24/1996 Heikki Tuuri
#include "ut0byte.h"
/************************************************************************
+Finds the first table name in the given database. */
+
+char*
+dict_get_first_table_name_in_db(
+/*============================*/
+ /* out, own: table name, NULL if does not exist;
+ the caller must free the memory in the string! */
+ char* name); /* in: database name which ends to '/' */
+/************************************************************************
Loads a table definition and also all its index definitions, and also
-the cluster definition, if the table is a member in a cluster. */
+the cluster definition if the table is a member in a cluster. Also loads
+all foreign key constraints where the foreign key is in the table or where
+a foreign key references columns in this table. */
dict_table_t*
dict_load_table(
@@ -40,6 +51,25 @@ void
dict_load_sys_table(
/*================*/
dict_table_t* table); /* in: system table */
+/***************************************************************************
+Loads foreign key constraints where the table is either the foreign key
+holder or where the table is referenced by a foreign key. Adds these
+constraints to the data dictionary. Note that we know that the dictionary
+cache already contains all constraints where the other relevant table is
+already in the dictionary cache. */
+
+ulint
+dict_load_foreigns(
+/*===============*/
+ /* out: DB_SUCCESS or error code */
+ char* table_name); /* in: table name */
+/************************************************************************
+Prints to the standard output information on all tables found in the data
+dictionary system table. */
+
+void
+dict_print(void);
+/*============*/
#ifndef UNIV_NONINL
diff --git a/innobase/include/dict0mem.h b/innobase/include/dict0mem.h
index 74ecbc8bba2..bd24d1539ca 100644
--- a/innobase/include/dict0mem.h
+++ b/innobase/include/dict0mem.h
@@ -123,6 +123,13 @@ dict_mem_index_free(
/*================*/
dict_index_t* index); /* in: index */
/**************************************************************************
+Creates and initializes a foreign constraint memory object. */
+
+dict_foreign_t*
+dict_mem_foreign_create(void);
+/*=========================*/
+ /* out, own: foreign constraint struct */
+/**************************************************************************
Creates a procedure memory object. */
dict_proc_t*
@@ -221,15 +228,56 @@ struct dict_index_struct{
dictionary cache */
btr_search_t* search_info; /* info used in optimistic searches */
/*----------------------*/
- ulint stat_n_diff_key_vals;
+ ib_longlong* stat_n_diff_key_vals;
/* approximate number of different key values
- for this index; we periodically calculate
- new estimates */
+ for this index, for each n-column prefix
+ where n <= dict_get_n_unique(index); we
+ periodically calculate new estimates */
ulint stat_index_size;
/* approximate index size in database pages */
+ ulint stat_n_leaf_pages;
+ /* approximate number of leaf pages in the
+ index tree */
ulint magic_n;/* magic number */
};
+/* Data structure for a foreign key constraint; an example:
+FOREIGN KEY (A, B) REFERENCES TABLE2 (C, D) */
+
+struct dict_foreign_struct{
+ mem_heap_t* heap; /* this object is allocated from
+ this memory heap */
+ char* id; /* id of the constraint as a
+ null-terminated string */
+ char* foreign_table_name;/* foreign table name */
+ dict_table_t* foreign_table; /* table where the foreign key is */
+ char** foreign_col_names;/* names of the columns in the
+ foreign key */
+ char* referenced_table_name;/* referenced table name */
+ dict_table_t* referenced_table;/* table where the referenced key
+ is */
+ char** referenced_col_names;/* names of the referenced
+ columns in the referenced table */
+ ulint n_fields; /* number of indexes' first fields
+ for which the the foreign key
+ constraint is defined: we allow the
+ indexes to contain more fields than
+ mentioned in the constraint, as long
+ as the first fields are as mentioned */
+ dict_index_t* foreign_index; /* foreign index; we require that
+ both tables contain explicitly defined
+ indexes for the constraint: InnoDB
+ does not generate new indexes
+ implicitly */
+ dict_index_t* referenced_index;/* referenced index */
+ UT_LIST_NODE_T(dict_foreign_t)
+ foreign_list; /* list node for foreign keys of the
+ table */
+ UT_LIST_NODE_T(dict_foreign_t)
+ referenced_list;/* list node for referenced keys of the
+ table */
+};
+
#define DICT_INDEX_MAGIC_N 76789786
/* Data structure for a database table */
@@ -247,6 +295,13 @@ struct dict_table_struct{
dict_col_t* cols; /* array of column descriptions */
UT_LIST_BASE_NODE_T(dict_index_t)
indexes; /* list of indexes of the table */
+ UT_LIST_BASE_NODE_T(dict_foreign_t)
+ foreign_list;/* list of foreign key constraints
+ in the table; these refer to columns
+ in other tables */
+ UT_LIST_BASE_NODE_T(dict_foreign_t)
+ referenced_list;/* list of foreign key constraints
+ which refer to this table */
UT_LIST_NODE_T(dict_table_t)
table_LRU; /* node of the LRU list of tables */
ulint mem_fix;/* count of how many times the table
@@ -254,6 +309,13 @@ struct dict_table_struct{
currently NOT used */
ibool cached; /* TRUE if the table object has been added
to the dictionary cache */
+ lock_t* auto_inc_lock;/* a buffer for an auto-inc lock
+ for this table: we allocate the memory here
+ so that individual transactions can get it
+ and release it without a need to allocate
+ space from the lock heap of the trx:
+ otherwise the lock heap would grow rapidly
+ if we do a large insert from a select */
UT_LIST_BASE_NODE_T(lock_t)
locks; /* list of locks on the table */
/*----------------------*/
@@ -278,7 +340,7 @@ struct dict_table_struct{
forget about value TRUE if it has to reload
the table definition from disk */
/*----------------------*/
- ulint stat_n_rows;
+ ib_longlong stat_n_rows;
/* approximate number of rows in the table;
we periodically calculate new estimates */
ulint stat_clustered_index_size;
diff --git a/innobase/include/dict0types.h b/innobase/include/dict0types.h
index fe1bad45063..498c6f46b7b 100644
--- a/innobase/include/dict0types.h
+++ b/innobase/include/dict0types.h
@@ -16,6 +16,7 @@ typedef struct dict_index_struct dict_index_t;
typedef struct dict_tree_struct dict_tree_t;
typedef struct dict_table_struct dict_table_t;
typedef struct dict_proc_struct dict_proc_t;
+typedef struct dict_foreign_struct dict_foreign_t;
/* A cluster object is a table object with the type field set to
DICT_CLUSTERED */
diff --git a/innobase/include/fil0fil.h b/innobase/include/fil0fil.h
index bfc322270fc..ca74ea4cb2c 100644
--- a/innobase/include/fil0fil.h
+++ b/innobase/include/fil0fil.h
@@ -76,6 +76,9 @@ extern fil_addr_t fil_addr_null;
#define FIL_TABLESPACE 501
#define FIL_LOG 502
+extern ulint fil_n_pending_log_flushes;
+extern ulint fil_n_pending_tablespace_flushes;
+
/***********************************************************************
Reserves a right to open a single file. The right must be released with
fil_release_right_to_open. */
diff --git a/innobase/include/ibuf0ibuf.h b/innobase/include/ibuf0ibuf.h
index f0b333192de..99fb1595f49 100644
--- a/innobase/include/ibuf0ibuf.h
+++ b/innobase/include/ibuf0ibuf.h
@@ -226,6 +226,21 @@ ibuf_contract(
issued read with the highest tablespace address
to complete */
/*************************************************************************
+Contracts insert buffer trees by reading pages to the buffer pool. */
+
+ulint
+ibuf_contract_for_n_pages(
+/*======================*/
+ /* out: a lower limit for the combined size in bytes
+ of entries which will be merged from ibuf trees to the
+ pages read, 0 if ibuf is empty */
+ ibool sync, /* in: TRUE if the caller wants to wait for the
+ issued read with the highest tablespace address
+ to complete */
+ ulint n_pages);/* in: try to read at least this many pages to
+ the buffer pool and merge the ibuf contents to
+ them */
+/*************************************************************************
Parses a redo log record of an ibuf bitmap page init. */
byte*
diff --git a/innobase/include/lock0lock.h b/innobase/include/lock0lock.h
index c492e7b8ef3..5a15b78b869 100644
--- a/innobase/include/lock0lock.h
+++ b/innobase/include/lock0lock.h
@@ -21,15 +21,13 @@ Created 5/7/1996 Heikki Tuuri
extern ibool lock_print_waits;
-/*****************************************************************
-Cancels a waiting record lock request and releases the waiting transaction
-that requested it. NOTE: does NOT check if waiting lock requests behind this
-one can now be granted! */
+/*************************************************************************
+Gets the size of a lock struct. */
-void
-lock_rec_cancel(
-/*============*/
- lock_t* lock); /* in: waiting record lock request */
+ulint
+lock_get_size(void);
+/*===============*/
+ /* out: size in bytes */
/*************************************************************************
Creates the lock system at database start. */
@@ -388,6 +386,14 @@ lock_is_on_table(
/* out: TRUE if there are lock(s) */
dict_table_t* table); /* in: database table in dictionary cache */
/*************************************************************************
+Releases an auto-inc lock a transaction possibly has on a table.
+Releases possible other transactions waiting for this lock. */
+
+void
+lock_table_unlock_auto_inc(
+/*=======================*/
+ trx_t* trx); /* in: transaction */
+/*************************************************************************
Releases transaction locks, and releases possible other transactions waiting
because of these locks. */
@@ -396,6 +402,14 @@ lock_release_off_kernel(
/*====================*/
trx_t* trx); /* in: transaction */
/*************************************************************************
+Cancels a waiting lock request and releases possible other transactions
+waiting behind it. */
+
+void
+lock_cancel_waiting_and_release(
+/*============================*/
+ lock_t* lock); /* in: waiting lock request */
+/*************************************************************************
Resets all locks, both table and record locks, on a table to be dropped.
No lock is allowed to be a wait lock. */
@@ -495,6 +509,8 @@ extern lock_sys_t* lock_sys;
#define LOCK_IX 3 /* intention exclusive */
#define LOCK_S 4 /* shared */
#define LOCK_X 5 /* exclusive */
+#define LOCK_AUTO_INC 6 /* locks the auto-inc counter of a table
+ in an exclusive mode */
#define LOCK_MODE_MASK 0xF /* mask used to extract mode from the
type_mode field in a lock */
#define LOCK_TABLE 16 /* these type values should be so high that */
diff --git a/innobase/include/log0log.h b/innobase/include/log0log.h
index 001f98cfc3c..adff9fae544 100644
--- a/innobase/include/log0log.h
+++ b/innobase/include/log0log.h
@@ -659,6 +659,11 @@ struct log_struct{
mutex! */
ulint n_log_ios; /* number of log i/os initiated thus
far */
+ ulint n_log_ios_old; /* number of log i/o's at the
+ previous printout */
+ time_t last_printout_time;/* when log_print was last time
+ called */
+
/* Fields involved in checkpoints */
ulint max_modified_age_async;
/* when this recommended value for lsn
diff --git a/innobase/include/mtr0mtr.h b/innobase/include/mtr0mtr.h
index dec8eeb1e15..0ef25b3d1ee 100644
--- a/innobase/include/mtr0mtr.h
+++ b/innobase/include/mtr0mtr.h
@@ -203,20 +203,12 @@ mtr_read_dulint(
mtr_t* mtr); /* in: mini-transaction handle */
/*************************************************************************
This macro locks an rw-lock in s-mode. */
-#ifdef UNIV_SYNC_DEBUG
#define mtr_s_lock(B, MTR) mtr_s_lock_func((B), IB__FILE__, __LINE__,\
(MTR))
-#else
-#define mtr_s_lock(B, MTR) mtr_s_lock_func((B), (MTR))
-#endif
/*************************************************************************
This macro locks an rw-lock in x-mode. */
-#ifdef UNIV_SYNC_DEBUG
#define mtr_x_lock(B, MTR) mtr_x_lock_func((B), IB__FILE__, __LINE__,\
(MTR))
-#else
-#define mtr_x_lock(B, MTR) mtr_x_lock_func((B), (MTR))
-#endif
/*************************************************************************
NOTE! Use the macro above!
Locks a lock in s-mode. */
@@ -225,10 +217,8 @@ void
mtr_s_lock_func(
/*============*/
rw_lock_t* lock, /* in: rw-lock */
-#ifdef UNIV_SYNC_DEBUG
char* file, /* in: file name */
ulint line, /* in: line number */
-#endif
mtr_t* mtr); /* in: mtr */
/*************************************************************************
NOTE! Use the macro above!
@@ -238,10 +228,8 @@ void
mtr_x_lock_func(
/*============*/
rw_lock_t* lock, /* in: rw-lock */
-#ifdef UNIV_SYNC_DEBUG
char* file, /* in: file name */
ulint line, /* in: line number */
-#endif
mtr_t* mtr); /* in: mtr */
/*******************************************************
diff --git a/innobase/include/mtr0mtr.ic b/innobase/include/mtr0mtr.ic
index 5718d872bcb..51112fc0d14 100644
--- a/innobase/include/mtr0mtr.ic
+++ b/innobase/include/mtr0mtr.ic
@@ -217,20 +217,14 @@ void
mtr_s_lock_func(
/*============*/
rw_lock_t* lock, /* in: rw-lock */
-#ifdef UNIV_SYNC_DEBUG
char* file, /* in: file name */
ulint line, /* in: line number */
-#endif
mtr_t* mtr) /* in: mtr */
{
ut_ad(mtr);
ut_ad(lock);
- rw_lock_s_lock_func(lock
- #ifdef UNIV_SYNC_DEBUG
- ,0, file, line
- #endif
- );
+ rw_lock_s_lock_func(lock, 0, file, line);
mtr_memo_push(mtr, lock, MTR_MEMO_S_LOCK);
}
@@ -242,20 +236,14 @@ void
mtr_x_lock_func(
/*============*/
rw_lock_t* lock, /* in: rw-lock */
-#ifdef UNIV_SYNC_DEBUG
char* file, /* in: file name */
ulint line, /* in: line number */
-#endif
mtr_t* mtr) /* in: mtr */
{
ut_ad(mtr);
ut_ad(lock);
- rw_lock_x_lock_func(lock, 0
- #ifdef UNIV_SYNC_DEBUG
- , file, line
- #endif
- );
+ rw_lock_x_lock_func(lock, 0, file, line);
mtr_memo_push(mtr, lock, MTR_MEMO_X_LOCK);
}
diff --git a/innobase/include/rem0cmp.h b/innobase/include/rem0cmp.h
index 10c428cb9ca..6f2a99fc8c2 100644
--- a/innobase/include/rem0cmp.h
+++ b/innobase/include/rem0cmp.h
@@ -16,6 +16,32 @@ Created 7/1/1994 Heikki Tuuri
#include "rem0rec.h"
/*****************************************************************
+Returns TRUE if two types are equal for comparison purposes. */
+
+ibool
+cmp_types_are_equal(
+/*================*/
+ /* out: TRUE if the types are considered
+ equal in comparisons */
+ dtype_t* type1, /* in: type 1 */
+ dtype_t* type2); /* in: type 2 */
+/*****************************************************************
+This function is used to compare two data fields for which we know the
+data type. */
+UNIV_INLINE
+int
+cmp_data_data(
+/*==========*/
+ /* out: 1, 0, -1, if data1 is greater, equal,
+ less than data2, respectively */
+ dtype_t* cur_type,/* in: data type of the fields */
+ byte* data1, /* in: data field (== a pointer to a memory
+ buffer) */
+ ulint len1, /* in: data field length or UNIV_SQL_NULL */
+ byte* data2, /* in: data field (== a pointer to a memory
+ buffer) */
+ ulint len2); /* in: data field length or UNIV_SQL_NULL */
+/*****************************************************************
This function is used to compare two dfields where at least the first
has its data type field set. */
UNIV_INLINE
diff --git a/innobase/include/row0ins.h b/innobase/include/row0ins.h
index 612b9e8d73a..cc3b9fa7e9a 100644
--- a/innobase/include/row0ins.h
+++ b/innobase/include/row0ins.h
@@ -16,6 +16,28 @@ Created 4/20/1996 Heikki Tuuri
#include "trx0types.h"
#include "row0types.h"
+/*******************************************************************
+Checks if foreign key constraint fails for an index entry. Sets shared locks
+which lock either the success or the failure of the constraint. NOTE that
+the caller must have a shared latch on dict_foreign_key_check_lock. */
+
+ulint
+row_ins_check_foreign_constraint(
+/*=============================*/
+ /* out: DB_SUCCESS, DB_LOCK_WAIT,
+ DB_NO_REFERENCED_ROW,
+ or DB_ROW_IS_REFERENCED */
+ ibool check_ref,/* in: TRUE If we want to check that
+ the referenced table is ok, FALSE if we
+ want to to check the foreign key table */
+ dict_foreign_t* foreign,/* in: foreign constraint; NOTE that the
+ tables mentioned in it must be in the
+ dictionary cache if they exist at all */
+ dict_table_t* table, /* in: if check_ref is TRUE, then the foreign
+ table, else the referenced table */
+ dict_index_t* index, /* in: index in table */
+ dtuple_t* entry, /* in: index entry for index */
+ que_thr_t* thr); /* in: query thread */
/*************************************************************************
Creates an insert node struct. */
diff --git a/innobase/include/row0mysql.h b/innobase/include/row0mysql.h
index 31f9e15cddc..4e90c0ac590 100644
--- a/innobase/include/row0mysql.h
+++ b/innobase/include/row0mysql.h
@@ -133,6 +133,26 @@ row_update_prebuilt_trx(
handle */
trx_t* trx); /* in: transaction handle */
/*************************************************************************
+Unlocks an AUTO_INC type lock possibly reserved by trx. */
+
+void
+row_unlock_table_autoinc_for_mysql(
+/*===============================*/
+ trx_t* trx); /* in: transaction */
+/*************************************************************************
+Sets an AUTO_INC type lock on the table mentioned in prebuilt. The
+AUTO_INC lock gives exclusive access to the auto-inc counter of the
+table. The lock is reserved only for the duration of an SQL statement.
+It is not compatible with another AUTO_INC or exclusive lock on the
+table. */
+
+int
+row_lock_table_autoinc_for_mysql(
+/*=============================*/
+ /* out: error code or DB_SUCCESS */
+ row_prebuilt_t* prebuilt); /* in: prebuilt struct in the MySQL
+ table handle */
+/*************************************************************************
Does an insert for MySQL. */
int
@@ -211,6 +231,26 @@ row_create_index_for_mysql(
dict_index_t* index, /* in: index defintion */
trx_t* trx); /* in: transaction handle */
/*************************************************************************
+Scans a table create SQL string and adds to the data dictionary
+the foreign key constraints declared in the string. This function
+should be called after the indexes for a table have been created.
+Each foreign key constraint must be accompanied with indexes in
+bot participating tables. The indexes are allowed to contain more
+fields than mentioned in the constraint. */
+
+int
+row_table_add_foreign_constraints(
+/*==============================*/
+ /* out: error code or DB_SUCCESS */
+ trx_t* trx, /* in: transaction */
+ char* sql_string, /* in: table create statement where
+ foreign keys are declared like:
+ FOREIGN KEY (a, b) REFERENCES table2(c, d),
+ table2 can be written also with the database
+ name before it: test.table2 */
+ char* name); /* in: table full name in the normalized form
+ database_name/table_name */
+/*************************************************************************
Drops a table for MySQL. If the name of the dropped table ends to
characters INNODB_MONITOR, then this also stops printing of monitor
output by the master thread. */
@@ -224,6 +264,15 @@ row_drop_table_for_mysql(
ibool has_dict_mutex);/* in: TRUE if the caller already owns the
dictionary system mutex */
/*************************************************************************
+Drops a database for MySQL. */
+
+int
+row_drop_database_for_mysql(
+/*========================*/
+ /* out: error code or DB_SUCCESS */
+ char* name, /* in: database name which ends to '/' */
+ trx_t* trx); /* in: transaction handle */
+/*************************************************************************
Renames a table for MySQL. */
int
diff --git a/innobase/include/row0upd.h b/innobase/include/row0upd.h
index 9bb73726b29..106d3866b25 100644
--- a/innobase/include/row0upd.h
+++ b/innobase/include/row0upd.h
@@ -47,8 +47,7 @@ upd_get_nth_field(
upd_t* update, /* in: update vector */
ulint n); /* in: field position in update vector */
/*************************************************************************
-Sets the clustered index field number to be updated by an update vector
-field. */
+Sets an index field number to be updated by an update vector field. */
UNIV_INLINE
void
upd_field_set_field_no(
@@ -56,7 +55,7 @@ upd_field_set_field_no(
upd_field_t* upd_field, /* in: update vector field */
ulint field_no, /* in: field number in a clustered
index */
- dict_index_t* index); /* in: clustered index */
+ dict_index_t* index); /* in: index */
/*************************************************************************
Writes into the redo log the values of trx id and roll ptr and enough info
to determine their positions within a clustered index record. */
@@ -136,13 +135,27 @@ row_upd_rec_in_place(
rec_t* rec, /* in/out: record where replaced */
upd_t* update);/* in: update vector */
/*******************************************************************
+Builds an update vector from those fields which in a secondary index entry
+differ from a record that has the equal ordering fields. NOTE: we compare
+the fields as binary strings! */
+
+upd_t*
+row_upd_build_sec_rec_difference_binary(
+/*====================================*/
+ /* out, own: update vector of differing
+ fields */
+ dict_index_t* index, /* in: index */
+ dtuple_t* entry, /* in: entry to insert */
+ rec_t* rec, /* in: secondary index record */
+ mem_heap_t* heap); /* in: memory heap from which allocated */
+/*******************************************************************
Builds an update vector from those fields, excluding the roll ptr and
trx id fields, which in an index entry differ from a record that has
-the equal ordering fields. */
+the equal ordering fields. NOTE: we compare the fields as binary strings! */
upd_t*
-row_upd_build_difference(
-/*=====================*/
+row_upd_build_difference_binary(
+/*============================*/
/* out, own: update vector of differing
fields, excluding roll ptr and trx id */
dict_index_t* index, /* in: clustered index */
@@ -175,13 +188,16 @@ row_upd_clust_index_replace_new_col_vals(
/***************************************************************
Checks if an update vector changes an ordering field of an index record.
This function is fast if the update vector is short or the number of ordering
-fields in the index is small. Otherwise, this can be quadratic. */
-
+fields in the index is small. Otherwise, this can be quadratic.
+NOTE: we compare the fields as binary strings! */
+
ibool
-row_upd_changes_ord_field(
-/*======================*/
+row_upd_changes_ord_field_binary(
+/*=============================*/
/* out: TRUE if update vector changes
- an ordering field in the index record */
+ an ordering field in the index record;
+ NOTE: the fields are compared as binary
+ strings */
dtuple_t* row, /* in: old value of row, or NULL if the
row and the data values in update are not
known when this function is called, e.g., at
@@ -191,11 +207,12 @@ row_upd_changes_ord_field(
/***************************************************************
Checks if an update vector changes an ordering field of an index record.
This function is fast if the update vector is short or the number of ordering
-fields in the index is small. Otherwise, this can be quadratic. */
+fields in the index is small. Otherwise, this can be quadratic.
+NOTE: we compare the fields as binary strings! */
ibool
-row_upd_changes_some_index_ord_field(
-/*=================================*/
+row_upd_changes_some_index_ord_field_binary(
+/*========================================*/
/* out: TRUE if update vector may change
an ordering field in an index record */
dict_table_t* table, /* in: table */
diff --git a/innobase/include/row0upd.ic b/innobase/include/row0upd.ic
index b785e52caa0..7015b2eda13 100644
--- a/innobase/include/row0upd.ic
+++ b/innobase/include/row0upd.ic
@@ -70,8 +70,7 @@ upd_get_nth_field(
}
/*************************************************************************
-Sets the clustered index field number to be updated by an update vector
-field. */
+Sets an index field number to be updated by an update vector field. */
UNIV_INLINE
void
upd_field_set_field_no(
@@ -79,12 +78,18 @@ upd_field_set_field_no(
upd_field_t* upd_field, /* in: update vector field */
ulint field_no, /* in: field number in a clustered
index */
- dict_index_t* index) /* in: clustered index */
-{
- ut_ad(index->type & DICT_CLUSTERED);
-
+ dict_index_t* index) /* in: index */
+{
upd_field->field_no = field_no;
+ if (field_no >= dict_index_get_n_fields(index)) {
+ fprintf(stderr,
+ "InnoDB: Error: trying to access field %lu in table %s\n"
+ "InnoDB: index %s, but index has only %lu fields\n",
+ field_no, index->table_name, index->name,
+ dict_index_get_n_fields(index));
+ }
+
dtype_copy(dfield_get_type(&(upd_field->new_val)),
dict_index_get_nth_type(index, field_no));
}
diff --git a/innobase/include/srv0srv.h b/innobase/include/srv0srv.h
index b77d7d5436a..ca89de4b9a8 100644
--- a/innobase/include/srv0srv.h
+++ b/innobase/include/srv0srv.h
@@ -16,6 +16,11 @@ Created 10/10/1995 Heikki Tuuri
#include "com0com.h"
#include "que0types.h"
+
+/* When this event is set the lock timeout and InnoDB monitor
+thread starts running */
+extern os_event_t srv_lock_timeout_thread_event;
+
/* Server parameters which are read from the initfile */
extern char* srv_data_home;
@@ -27,6 +32,8 @@ extern char** srv_data_file_names;
extern ulint* srv_data_file_sizes;
extern ulint* srv_data_file_is_raw_partition;
+extern ibool srv_created_new_raw;
+
#define SRV_NEW_RAW 1
#define SRV_OLD_RAW 2
@@ -39,6 +46,8 @@ extern ibool srv_log_archive_on;
extern ulint srv_log_buffer_size;
extern ibool srv_flush_log_at_trx_commit;
+extern byte srv_latin1_ordering[256];/* The sort order table of the latin1
+ character set */
extern ibool srv_use_native_aio;
extern ulint srv_pool_size;
@@ -54,6 +63,7 @@ extern ulint srv_lock_wait_timeout;
extern char* srv_unix_file_flush_method_str;
extern ulint srv_unix_file_flush_method;
+extern ulint srv_force_recovery;
extern ibool srv_use_doublewrite_buf;
@@ -71,6 +81,7 @@ extern ibool srv_print_innodb_monitor;
extern ibool srv_print_innodb_lock_monitor;
extern ibool srv_print_innodb_tablespace_monitor;
extern ibool srv_print_verbose_log;
+extern ibool srv_print_innodb_table_monitor;
extern ulint srv_n_spin_wait_rounds;
extern ulint srv_spin_wait_delay;
@@ -133,6 +144,25 @@ what these mean */
#define SRV_UNIX_LITTLESYNC 3
#define SRV_UNIX_NOSYNC 4
+/* Alternatives for srv_force_recovery. Non-zero values are intended
+to help the user get a damaged database up so that he can dump intact
+tables and rows with SELECT INTO OUTFILE. The database must not otherwise
+be used with these options! A bigger number below means that all precautions
+of lower numbers are included. */
+
+#define SRV_FORCE_IGNORE_CORRUPT 1 /* let the server run even if it
+ detects a corrupt page */
+#define SRV_FORCE_NO_BACKGROUND 2 /* prevent the main thread from
+ running: if a crash would occur
+ in purge, this prevents it */
+#define SRV_FORCE_NO_TRX_UNDO 3 /* do not run trx rollback after
+ recovery */
+#define SRV_FORCE_NO_IBUF_MERGE 4 /* prevent also ibuf operations:
+ if they would cause a crash, better
+ not do them */
+#define SRV_FORCE_NO_LOG_REDO 5 /* do not do the log roll-forward
+ in connection with recovery */
+
/*************************************************************************
Boots Innobase server. */
@@ -225,15 +255,30 @@ srv_release_mysql_thread_if_suspended(
que_thr_t* thr); /* in: query thread associated with the
MySQL OS thread */
/*************************************************************************
-A thread which wakes up threads whose lock wait may have lasted too long. */
+A thread which wakes up threads whose lock wait may have lasted too long.
+This also prints the info output by various InnoDB monitors. */
+
+#ifndef __WIN__
+void*
+#else
+ulint
+#endif
+srv_lock_timeout_and_monitor_thread(
+/*================================*/
+ /* out: a dummy parameter */
+ void* arg); /* in: a dummy parameter required by
+ os_thread_create */
+/*************************************************************************
+A thread which prints warnings about semaphore waits which have lasted
+too long. These can be used to track bugs which cause hangs. */
#ifndef __WIN__
void*
#else
ulint
#endif
-srv_lock_timeout_monitor_thread(
-/*============================*/
+srv_error_monitor_thread(
+/*=====================*/
/* out: a dummy parameter */
void* arg); /* in: a dummy parameter required by
os_thread_create */
diff --git a/innobase/include/sync0arr.h b/innobase/include/sync0arr.h
index 75d79f4c93f..f0134894997 100644
--- a/innobase/include/sync0arr.h
+++ b/innobase/include/sync0arr.h
@@ -51,13 +51,9 @@ sync_array_reserve_cell(
sync_array_t* arr, /* in: wait array */
void* object, /* in: pointer to the object to wait for */
ulint type, /* in: lock request type */
- #ifdef UNIV_SYNC_DEBUG
- char* file, /* in: in debug version file where
- requested */
- ulint line, /* in: in the debug version line where
- requested */
- #endif
- ulint* index); /* out: index of the reserved cell */
+ char* file, /* in: file where requested */
+ ulint line, /* in: line where requested */
+ ulint* index); /* out: index of the reserved cell */
/**********************************************************************
This function should be called when a thread starts to wait on
a wait array cell. In the debug version this function checks
@@ -90,6 +86,20 @@ sync_array_signal_object(
/*=====================*/
sync_array_t* arr, /* in: wait array */
void* object);/* in: wait object */
+/**************************************************************************
+If the wakeup algorithm does not work perfectly at semaphore relases,
+this function will do the waking (see the comment in mutex_exit). This
+function should be called about every 1 second in the server. */
+
+void
+sync_arr_wake_threads_if_sema_free(void);
+/*====================================*/
+/**************************************************************************
+Prints warnings of long semaphore waits to stderr. Currently > 120 sec. */
+
+void
+sync_array_print_long_waits(void);
+/*=============================*/
/************************************************************************
Validates the integrity of the wait array. Checks
that the number of reserved cells equals the count variable. */
diff --git a/innobase/include/sync0ipm.ic b/innobase/include/sync0ipm.ic
index 8487830e1dd..b8aa87ba6d6 100644
--- a/innobase/include/sync0ipm.ic
+++ b/innobase/include/sync0ipm.ic
@@ -92,7 +92,7 @@ loop:
loop_count++;
ut_ad(loop_count < 15);
- if (mutex_enter_nowait(mutex) == 0) {
+ if (mutex_enter_nowait(mutex, IB__FILE__, __LINE__) == 0) {
/* Succeeded! */
return(0);
@@ -105,7 +105,7 @@ loop:
/* Order is important here: FIRST reset event, then set waiters */
ip_mutex_set_waiters(ip_mutex, 1);
- if (mutex_enter_nowait(mutex) == 0) {
+ if (mutex_enter_nowait(mutex, IB__FILE__, __LINE__) == 0) {
/* Succeeded! */
return(0);
diff --git a/innobase/include/sync0rw.h b/innobase/include/sync0rw.h
index 4e055da3810..7ad38f5bc7f 100644
--- a/innobase/include/sync0rw.h
+++ b/innobase/include/sync0rw.h
@@ -46,9 +46,10 @@ extern ibool rw_lock_debug_waiters; /* This is set to TRUE, if
extern ulint rw_s_system_call_count;
extern ulint rw_s_spin_wait_count;
extern ulint rw_s_exit_count;
-
+extern ulint rw_s_os_wait_count;
extern ulint rw_x_system_call_count;
extern ulint rw_x_spin_wait_count;
+extern ulint rw_x_os_wait_count;
extern ulint rw_x_exit_count;
/**********************************************************************
@@ -92,32 +93,20 @@ rw_lock_validate(
NOTE! The following macros should be used in rw s-locking, not the
corresponding function. */
-#ifdef UNIV_SYNC_DEBUG
#define rw_lock_s_lock(M) rw_lock_s_lock_func(\
(M), 0, IB__FILE__, __LINE__)
-#else
-#define rw_lock_s_lock(M) rw_lock_s_lock_func(M)
-#endif
/******************************************************************
NOTE! The following macros should be used in rw s-locking, not the
corresponding function. */
-#ifdef UNIV_SYNC_DEBUG
#define rw_lock_s_lock_gen(M, P) rw_lock_s_lock_func(\
(M), (P), IB__FILE__, __LINE__)
-#else
-#define rw_lock_s_lock_gen(M, P) rw_lock_s_lock_func(M)
-#endif
/******************************************************************
NOTE! The following macros should be used in rw s-locking, not the
corresponding function. */
-#ifdef UNIV_SYNC_DEBUG
#define rw_lock_s_lock_nowait(M) rw_lock_s_lock_func_nowait(\
(M), IB__FILE__, __LINE__)
-#else
-#define rw_lock_s_lock_nowait(M) rw_lock_s_lock_func_nowait(M)
-#endif
/**********************************************************************
NOTE! Use the corresponding macro, not directly this function, except if
you supply the file name and line number. Lock an rw-lock in shared mode
@@ -129,14 +118,11 @@ UNIV_INLINE
void
rw_lock_s_lock_func(
/*================*/
- rw_lock_t* lock /* in: pointer to rw-lock */
- #ifdef UNIV_SYNC_DEBUG
- ,ulint pass, /* in: pass value; != 0, if the lock will
+ rw_lock_t* lock, /* in: pointer to rw-lock */
+ ulint pass, /* in: pass value; != 0, if the lock will
be passed to another thread to unlock */
- char* file_name, /* in: file name where lock requested */
- ulint line /* in: line where requested */
- #endif
-);
+ char* file_name,/* in: file name where lock requested */
+ ulint line); /* in: line where requested */
/**********************************************************************
NOTE! Use the corresponding macro, not directly this function, except if
you supply the file name and line number. Lock an rw-lock in shared mode
@@ -146,12 +132,9 @@ ibool
rw_lock_s_lock_func_nowait(
/*=======================*/
/* out: TRUE if success */
- rw_lock_t* lock /* in: pointer to rw-lock */
- #ifdef UNIV_SYNC_DEBUG
- ,char* file_name, /* in: file name where lock requested */
- ulint line /* in: line where requested */
- #endif
-);
+ rw_lock_t* lock, /* in: pointer to rw-lock */
+ char* file_name,/* in: file name where lock requested */
+ ulint line); /* in: line where requested */
/**********************************************************************
NOTE! Use the corresponding macro, not directly this function! Lock an
rw-lock in exclusive mode for the current thread if the lock can be
@@ -161,12 +144,9 @@ ibool
rw_lock_x_lock_func_nowait(
/*=======================*/
/* out: TRUE if success */
- rw_lock_t* lock /* in: pointer to rw-lock */
- #ifdef UNIV_SYNC_DEBUG
- ,char* file_name, /* in: file name where lock requested */
- ulint line /* in: line where requested */
- #endif
-);
+ rw_lock_t* lock, /* in: pointer to rw-lock */
+ char* file_name,/* in: file name where lock requested */
+ ulint line); /* in: line where requested */
/**********************************************************************
Releases a shared mode lock. */
UNIV_INLINE
@@ -199,32 +179,20 @@ Releases a shared mode lock. */
NOTE! The following macro should be used in rw x-locking, not the
corresponding function. */
-#ifdef UNIV_SYNC_DEBUG
#define rw_lock_x_lock(M) rw_lock_x_lock_func(\
(M), 0, IB__FILE__, __LINE__)
-#else
-#define rw_lock_x_lock(M) rw_lock_x_lock_func(M, 0)
-#endif
/******************************************************************
NOTE! The following macro should be used in rw x-locking, not the
corresponding function. */
-#ifdef UNIV_SYNC_DEBUG
#define rw_lock_x_lock_gen(M, P) rw_lock_x_lock_func(\
(M), (P), IB__FILE__, __LINE__)
-#else
-#define rw_lock_x_lock_gen(M, P) rw_lock_x_lock_func(M, P)
-#endif
/******************************************************************
NOTE! The following macros should be used in rw x-locking, not the
corresponding function. */
-#ifdef UNIV_SYNC_DEBUG
#define rw_lock_x_lock_nowait(M) rw_lock_x_lock_func_nowait(\
(M), IB__FILE__, __LINE__)
-#else
-#define rw_lock_x_lock_nowait(M) rw_lock_x_lock_func_nowait(M)
-#endif
/**********************************************************************
NOTE! Use the corresponding macro, not directly this function! Lock an
rw-lock in exclusive mode for the current thread. If the rw-lock is locked
@@ -239,13 +207,10 @@ void
rw_lock_x_lock_func(
/*================*/
rw_lock_t* lock, /* in: pointer to rw-lock */
- ulint pass /* in: pass value; != 0, if the lock will
+ ulint pass, /* in: pass value; != 0, if the lock will
be passed to another thread to unlock */
- #ifdef UNIV_SYNC_DEBUG
- ,char* file_name, /* in: file name where lock requested */
- ulint line /* in: line where requested */
- #endif
-);
+ char* file_name,/* in: file name where lock requested */
+ ulint line); /* in: line where requested */
/**********************************************************************
Releases an exclusive mode lock. */
UNIV_INLINE
@@ -283,10 +248,8 @@ void
rw_lock_s_lock_direct(
/*==================*/
rw_lock_t* lock /* in: pointer to rw-lock */
- #ifdef UNIV_SYNC_DEBUG
,char* file_name, /* in: file name where lock requested */
ulint line /* in: line where requested */
- #endif
);
/**********************************************************************
Low-level function which locks an rw-lock in x-mode when we know that it
@@ -297,10 +260,8 @@ void
rw_lock_x_lock_direct(
/*==================*/
rw_lock_t* lock /* in: pointer to rw-lock */
- #ifdef UNIV_SYNC_DEBUG
,char* file_name, /* in: file name where lock requested */
ulint line /* in: line where requested */
- #endif
);
/**********************************************************************
This function is used in the insert buffer to move the ownership of an
@@ -349,6 +310,23 @@ rw_lock_get_x_lock_count(
/*=====================*/
/* out: value of writer_count */
rw_lock_t* lock); /* in: rw-lock */
+/************************************************************************
+Accessor functions for rw lock. */
+UNIV_INLINE
+ulint
+rw_lock_get_waiters(
+/*================*/
+ rw_lock_t* lock);
+UNIV_INLINE
+ulint
+rw_lock_get_writer(
+/*===============*/
+ rw_lock_t* lock);
+UNIV_INLINE
+ulint
+rw_lock_get_reader_count(
+/*=====================*/
+ rw_lock_t* lock);
/**********************************************************************
Checks if the thread has locked the rw-lock in the specified mode, with
the pass value == 0. */
@@ -414,9 +392,6 @@ rw_lock_debug_print(
/*================*/
rw_lock_debug_t* info); /* in: debug struct */
-
-#define RW_CNAME_LEN 8
-
/* NOTE! The structure appears here only for the compiler to know its size.
Do not use its fields directly! The structure used in the spin lock
implementation of a read-write lock. Several threads may have a shared lock
@@ -447,7 +422,7 @@ struct rw_lock_struct {
ulint waiters; /* This ulint is set to 1 if there are
waiters (readers or writers) in the global
wait array, waiting for this rw_lock.
- Otherwise, = 0. */
+ Otherwise, == 0. */
ibool writer_is_wait_ex;
/* This is TRUE if the writer field is
RW_LOCK_WAIT_EX; this field is located far
@@ -463,9 +438,12 @@ struct rw_lock_struct {
info list of the lock */
ulint level; /* Debug version: level in the global latching
order; default SYNC_LEVEL_NONE */
- char cfile_name[RW_CNAME_LEN];
- /* File name where lock created */
+ char* cfile_name; /* File name where lock created */
ulint cline; /* Line where created */
+ char* last_s_file_name;/* File name where last time s-locked */
+ char* last_x_file_name;/* File name where last time x-locked */
+ ulint last_s_line; /* Line number where last time s-locked */
+ ulint last_x_line; /* Line number where last time x-locked */
ulint magic_n;
};
diff --git a/innobase/include/sync0rw.ic b/innobase/include/sync0rw.ic
index 11add13d2d0..2a02cfb6a53 100644
--- a/innobase/include/sync0rw.ic
+++ b/innobase/include/sync0rw.ic
@@ -15,14 +15,11 @@ waiting for the lock before suspending the thread. */
void
rw_lock_s_lock_spin(
/*================*/
- rw_lock_t* lock /* in: pointer to rw-lock */
- #ifdef UNIV_SYNC_DEBUG
- ,ulint pass, /* in: pass value; != 0, if the lock will
+ rw_lock_t* lock, /* in: pointer to rw-lock */
+ ulint pass, /* in: pass value; != 0, if the lock will
be passed to another thread to unlock */
- char* file_name, /* in: file name where lock requested */
- ulint line /* in: line where requested */
- #endif
-);
+ char* file_name,/* in: file name where lock requested */
+ ulint line); /* in: line where requested */
/**********************************************************************
Inserts the debug information for an rw-lock. */
@@ -128,14 +125,11 @@ ibool
rw_lock_s_lock_low(
/*===============*/
/* out: TRUE if success */
- rw_lock_t* lock /* in: pointer to rw-lock */
- #ifdef UNIV_SYNC_DEBUG
- ,ulint pass, /* in: pass value; != 0, if the lock will be
+ rw_lock_t* lock, /* in: pointer to rw-lock */
+ ulint pass, /* in: pass value; != 0, if the lock will be
passed to another thread to unlock */
char* file_name, /* in: file name where lock requested */
- ulint line /* in: line where requested */
- #endif
-)
+ ulint line) /* in: line where requested */
{
ut_ad(mutex_own(rw_lock_get_mutex(lock)));
@@ -150,6 +144,9 @@ rw_lock_s_lock_low(
line);
#endif
+ lock->last_s_file_name = file_name;
+ lock->last_s_line = line;
+
return(TRUE); /* locking succeeded */
}
@@ -164,12 +161,9 @@ UNIV_INLINE
void
rw_lock_s_lock_direct(
/*==================*/
- rw_lock_t* lock /* in: pointer to rw-lock */
- #ifdef UNIV_SYNC_DEBUG
- ,char* file_name, /* in: file name where lock requested */
- ulint line /* in: line where requested */
- #endif
-)
+ rw_lock_t* lock, /* in: pointer to rw-lock */
+ char* file_name,/* in: file name where lock requested */
+ ulint line) /* in: line where requested */
{
ut_ad(lock->writer == RW_LOCK_NOT_LOCKED);
ut_ad(rw_lock_get_reader_count(lock) == 0);
@@ -177,6 +171,9 @@ rw_lock_s_lock_direct(
/* Set the shared lock by incrementing the reader count */
lock->reader_count++;
+ lock->last_s_file_name = file_name;
+ lock->last_s_line = line;
+
#ifdef UNIV_SYNC_DEBUG
rw_lock_add_debug_info(lock, 0, RW_LOCK_SHARED, file_name, line);
#endif
@@ -190,12 +187,9 @@ UNIV_INLINE
void
rw_lock_x_lock_direct(
/*==================*/
- rw_lock_t* lock /* in: pointer to rw-lock */
- #ifdef UNIV_SYNC_DEBUG
- ,char* file_name, /* in: file name where lock requested */
- ulint line /* in: line where requested */
- #endif
-)
+ rw_lock_t* lock, /* in: pointer to rw-lock */
+ char* file_name, /* in: file name where lock requested */
+ ulint line) /* in: line where requested */
{
ut_ad(rw_lock_validate(lock));
ut_ad(rw_lock_get_reader_count(lock) == 0);
@@ -206,6 +200,9 @@ rw_lock_x_lock_direct(
lock->writer_count++;
lock->pass = 0;
+ lock->last_x_file_name = file_name;
+ lock->last_x_line = line;
+
#ifdef UNIV_SYNC_DEBUG
rw_lock_add_debug_info(lock, 0, RW_LOCK_EX, file_name, line);
#endif
@@ -221,14 +218,11 @@ UNIV_INLINE
void
rw_lock_s_lock_func(
/*================*/
- rw_lock_t* lock /* in: pointer to rw-lock */
- #ifdef UNIV_SYNC_DEBUG
- ,ulint pass, /* in: pass value; != 0, if the lock will
+ rw_lock_t* lock, /* in: pointer to rw-lock */
+ ulint pass, /* in: pass value; != 0, if the lock will
be passed to another thread to unlock */
char* file_name, /* in: file name where lock requested */
- ulint line /* in: line where requested */
- #endif
-)
+ ulint line) /* in: line where requested */
{
/* NOTE: As we do not know the thread ids for threads which have
s-locked a latch, and s-lockers will be served only after waiting
@@ -245,11 +239,7 @@ rw_lock_s_lock_func(
mutex_enter(rw_lock_get_mutex(lock));
- if (TRUE == rw_lock_s_lock_low(lock
- #ifdef UNIV_SYNC_DEBUG
- ,pass, file_name, line
- #endif
- )) {
+ if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) {
mutex_exit(rw_lock_get_mutex(lock));
return; /* Success */
@@ -257,11 +247,8 @@ rw_lock_s_lock_func(
/* Did not succeed, try spin wait */
mutex_exit(rw_lock_get_mutex(lock));
- rw_lock_s_lock_spin(lock
- #ifdef UNIV_SYNC_DEBUG
- ,pass, file_name, line
- #endif
- );
+ rw_lock_s_lock_spin(lock, pass, file_name, line);
+
return;
}
}
@@ -275,12 +262,9 @@ ibool
rw_lock_s_lock_func_nowait(
/*=======================*/
/* out: TRUE if success */
- rw_lock_t* lock /* in: pointer to rw-lock */
- #ifdef UNIV_SYNC_DEBUG
- ,char* file_name, /* in: file name where lock requested */
- ulint line /* in: line where requested */
- #endif
-)
+ rw_lock_t* lock, /* in: pointer to rw-lock */
+ char* file_name,/* in: file name where lock requested */
+ ulint line) /* in: line where requested */
{
ibool success = FALSE;
@@ -294,6 +278,9 @@ rw_lock_s_lock_func_nowait(
rw_lock_add_debug_info(lock, 0, RW_LOCK_SHARED, file_name,
line);
#endif
+
+ lock->last_s_file_name = file_name;
+ lock->last_s_line = line;
success = TRUE;
}
@@ -312,12 +299,9 @@ ibool
rw_lock_x_lock_func_nowait(
/*=======================*/
/* out: TRUE if success */
- rw_lock_t* lock /* in: pointer to rw-lock */
- #ifdef UNIV_SYNC_DEBUG
- ,char* file_name, /* in: file name where lock requested */
- ulint line /* in: line where requested */
- #endif
-)
+ rw_lock_t* lock, /* in: pointer to rw-lock */
+ char* file_name, /* in: file name where lock requested */
+ ulint line) /* in: line where requested */
{
ibool success = FALSE;
@@ -338,6 +322,9 @@ rw_lock_x_lock_func_nowait(
rw_lock_add_debug_info(lock, 0, RW_LOCK_EX, file_name, line);
#endif
+ lock->last_x_file_name = file_name;
+ lock->last_x_line = line;
+
success = TRUE;
}
diff --git a/innobase/include/sync0sync.h b/innobase/include/sync0sync.h
index cb86b2b815c..6c3bff66e27 100644
--- a/innobase/include/sync0sync.h
+++ b/innobase/include/sync0sync.h
@@ -64,24 +64,15 @@ mutex_free(
NOTE! The following macro should be used in mutex locking, not the
corresponding function. */
-#ifdef UNIV_SYNC_DEBUG
#define mutex_enter(M) mutex_enter_func((M), IB__FILE__, __LINE__)
-#else
-#define mutex_enter(M) mutex_enter_func(M)
-#endif
/******************************************************************
NOTE! The following macro should be used in mutex locking, not the
corresponding function. */
/* NOTE! currently same as mutex_enter! */
-#ifdef UNIV_SYNC_DEBUG
-#define mutex_enter_fast(M) mutex_enter_func((M), IB__FILE__, __LINE__)
-#else
-#define mutex_enter_fast(M) mutex_enter_func(M)
-#endif
-
-#define mutex_enter_fast_func mutex_enter_func;
+#define mutex_enter_fast(M) mutex_enter_func((M), IB__FILE__, __LINE__)
+#define mutex_enter_fast_func mutex_enter_func;
/**********************************************************************
NOTE! Use the corresponding macro in the header file, not this function
directly. Locks a mutex for the current thread. If the mutex is reserved
@@ -91,12 +82,9 @@ UNIV_INLINE
void
mutex_enter_func(
/*=============*/
- mutex_t* mutex /* in: pointer to mutex */
- #ifdef UNIV_SYNC_DEBUG
- ,char* file_name, /* in: file name where locked */
- ulint line /* in: line where locked */
- #endif
- );
+ mutex_t* mutex, /* in: pointer to mutex */
+ char* file_name, /* in: file name where locked */
+ ulint line); /* in: line where locked */
/************************************************************************
Tries to lock the mutex for the current thread. If the lock is not acquired
immediately, returns with return value 1. */
@@ -104,8 +92,11 @@ immediately, returns with return value 1. */
ulint
mutex_enter_nowait(
/*===============*/
- /* out: 0 if succeed, 1 if not */
- mutex_t* mutex); /* in: pointer to mutex */
+ /* out: 0 if succeed, 1 if not */
+ mutex_t* mutex, /* in: pointer to mutex */
+ char* file_name, /* in: file name where mutex
+ requested */
+ ulint line); /* in: line where requested */
/**********************************************************************
Unlocks a mutex owned by the current thread. */
UNIV_INLINE
@@ -373,6 +364,7 @@ Memory pool mutex */
#define SYNC_LEVEL_NONE 2000 /* default: level not defined */
#define SYNC_DICT 1000
#define SYNC_DICT_AUTOINC_MUTEX 999
+#define SYNC_FOREIGN_KEY_CHECK 998
#define SYNC_PURGE_IS_RUNNING 997
#define SYNC_DICT_HEADER 995
#define SYNC_IBUF_HEADER 914
@@ -418,6 +410,7 @@ Memory pool mutex */
#define SYNC_BUF_BLOCK 149
#define SYNC_DOUBLEWRITE 140
#define SYNC_ANY_LATCH 135
+#define SYNC_THR_LOCAL 133
#define SYNC_MEM_HASH 131
#define SYNC_MEM_POOL 130
@@ -429,8 +422,6 @@ Memory pool mutex */
#define RW_LOCK_WAIT_EX 353
#define SYNC_MUTEX 354
-#define MUTEX_CNAME_LEN 8
-
/* NOTE! The structure appears here only for the compiler to know its size.
Do not use its fields directly! The structure used in the spin lock
implementation of a mutual exclusion semaphore. */
@@ -457,8 +448,7 @@ struct mutex_struct {
locked */
ulint level; /* Debug version: level in the global latching
order; default SYNC_LEVEL_NONE */
- char cfile_name[MUTEX_CNAME_LEN];
- /* File name where mutex created */
+ char* cfile_name; /* File name where mutex created */
ulint cline; /* Line where created */
ulint magic_n;
};
diff --git a/innobase/include/sync0sync.ic b/innobase/include/sync0sync.ic
index f7b341cb386..9531377ce0b 100644
--- a/innobase/include/sync0sync.ic
+++ b/innobase/include/sync0sync.ic
@@ -22,13 +22,9 @@ for the mutex before suspending the thread. */
void
mutex_spin_wait(
/*============*/
- mutex_t* mutex /* in: pointer to mutex */
-
- #ifdef UNIV_SYNC_DEBUG
- ,char* file_name, /* in: file name where mutex requested */
- ulint line /* in: line where requested */
- #endif
-);
+ mutex_t* mutex, /* in: pointer to mutex */
+ char* file_name,/* in: file name where mutex requested */
+ ulint line); /* in: line where requested */
/**********************************************************************
Sets the debug information for a reserved mutex. */
@@ -209,6 +205,18 @@ mutex_exit(
#endif
mutex_reset_lock_word(mutex);
+ /* A problem: we assume that mutex_reset_lock word
+ is a memory barrier, that is when we read the waiters
+ field next, the read must be serialized in memory
+ after the reset. A speculative processor might
+ perform the read first, which could leave a waiting
+ thread hanging indefinitely.
+
+ Our current solution call every 10 seconds
+ sync_arr_wake_threads_if_sema_free()
+ to wake up possible hanging threads if
+ they are missed in mutex_signal_object. */
+
if (mutex_get_waiters(mutex) != 0) {
mutex_signal_object(mutex);
@@ -227,12 +235,9 @@ UNIV_INLINE
void
mutex_enter_func(
/*=============*/
- mutex_t* mutex /* in: pointer to mutex */
- #ifdef UNIV_SYNC_DEBUG
- ,char* file_name, /* in: file name where locked */
- ulint line /* in: line where locked */
- #endif
- )
+ mutex_t* mutex, /* in: pointer to mutex */
+ char* file_name,/* in: file name where locked */
+ ulint line) /* in: line where locked */
{
ut_ad(mutex_validate(mutex));
@@ -245,13 +250,11 @@ mutex_enter_func(
mutex_set_debug_info(mutex, file_name, line);
#endif
+ mutex->file_name = file_name;
+ mutex->line = line;
+
return; /* Succeeded! */
}
- mutex_spin_wait(mutex
- #ifdef UNIV_SYNC_DEBUG
- ,file_name,
- line
- #endif
- );
+ mutex_spin_wait(mutex, file_name, line);
}
diff --git a/innobase/include/trx0rseg.ic b/innobase/include/trx0rseg.ic
index aeb4466ff0f..423447d5566 100644
--- a/innobase/include/trx0rseg.ic
+++ b/innobase/include/trx0rseg.ic
@@ -61,7 +61,11 @@ trx_rsegf_get_nth_undo(
ulint n, /* in: index of slot */
mtr_t* mtr) /* in: mtr */
{
- ut_ad(n < TRX_RSEG_N_SLOTS);
+ if (n >= TRX_RSEG_N_SLOTS) {
+ fprintf(stderr,
+ "InnoDB: Error: trying to get slot %lu of rseg\n", n);
+ ut_a(0);
+ }
return(mtr_read_ulint(rsegf + TRX_RSEG_UNDO_SLOTS +
n * TRX_RSEG_SLOT_SIZE, MLOG_4BYTES, mtr));
@@ -78,7 +82,11 @@ trx_rsegf_set_nth_undo(
ulint page_no,/* in: page number of the undo log segment */
mtr_t* mtr) /* in: mtr */
{
- ut_ad(n < TRX_RSEG_N_SLOTS);
+ if (n >= TRX_RSEG_N_SLOTS) {
+ fprintf(stderr,
+ "InnoDB: Error: trying to set slot %lu of rseg\n", n);
+ ut_a(0);
+ }
mlog_write_ulint(rsegf + TRX_RSEG_UNDO_SLOTS + n * TRX_RSEG_SLOT_SIZE,
page_no, MLOG_4BYTES, mtr);
diff --git a/innobase/include/trx0trx.h b/innobase/include/trx0trx.h
index f179e20ad62..8db0b39d3b4 100644
--- a/innobase/include/trx0trx.h
+++ b/innobase/include/trx0trx.h
@@ -317,6 +317,19 @@ struct trx_struct{
ibool has_search_latch;
/* TRUE if this trx has latched the
search system latch in S-mode */
+ ulint search_latch_timeout;
+ /* If we notice that someone is
+ waiting for our S-lock on the search
+ latch to be released, we wait in
+ row0sel.c for BTR_SEA_TIMEOUT new
+ searches until we try to keep
+ the search latch again over
+ calls from MySQL; this is intended
+ to reduce contention on the search
+ latch */
+ lock_t* auto_inc_lock; /* possible auto-inc lock reserved by
+ the transaction; note that it is also
+ in the lock list trx_locks */
ibool ignore_duplicates_in_insert;
/* in an insert roll back only insert
of the latest row in case
@@ -401,11 +414,9 @@ struct trx_struct{
checking algorithm */
/*------------------------------*/
mem_heap_t* lock_heap; /* memory heap for the locks of the
- transaction; protected by
- lock_heap_mutex */
+ transaction */
UT_LIST_BASE_NODE_T(lock_t)
- trx_locks; /* locks reserved by the transaction;
- protected by lock_heap_mutex */
+ trx_locks; /* locks reserved by the transaction */
/*------------------------------*/
mem_heap_t* read_view_heap; /* memory heap for the read view */
read_view_t* read_view; /* consistent read view or NULL */
diff --git a/innobase/include/ut0mem.h b/innobase/include/ut0mem.h
index 8e5a4fda0d3..2d245e5f72f 100644
--- a/innobase/include/ut0mem.h
+++ b/innobase/include/ut0mem.h
@@ -13,6 +13,9 @@ Created 5/30/1994 Heikki Tuuri
#include <string.h>
#include <stdlib.h>
+/* The total amount of memory currently allocated from the OS with malloc */
+extern ulint ut_total_allocated_memory;
+
UNIV_INLINE
void*
ut_memcpy(void* dest, void* sour, ulint n);
diff --git a/innobase/lock/lock0lock.c b/innobase/lock/lock0lock.c
index df35e22005f..fa0641bad73 100644
--- a/innobase/lock/lock0lock.c
+++ b/innobase/lock/lock0lock.c
@@ -578,6 +578,17 @@ lock_sys_create(
}
/*************************************************************************
+Gets the size of a lock struct. */
+
+ulint
+lock_get_size(void)
+/*===============*/
+ /* out: size in bytes */
+{
+ return((ulint)sizeof(lock_t));
+}
+
+/*************************************************************************
Gets the mode of a lock. */
UNIV_INLINE
ulint
@@ -709,13 +720,17 @@ lock_mode_stronger_or_eq(
ulint mode2) /* in: lock mode */
{
ut_ad(mode1 == LOCK_X || mode1 == LOCK_S || mode1 == LOCK_IX
- || mode1 == LOCK_IS);
+ || mode1 == LOCK_IS || mode1 == LOCK_AUTO_INC);
ut_ad(mode2 == LOCK_X || mode2 == LOCK_S || mode2 == LOCK_IX
- || mode2 == LOCK_IS);
+ || mode2 == LOCK_IS || mode2 == LOCK_AUTO_INC);
if (mode1 == LOCK_X) {
return(TRUE);
+ } else if (mode1 == LOCK_AUTO_INC && mode2 == LOCK_AUTO_INC) {
+
+ return(TRUE);
+
} else if (mode1 == LOCK_S
&& (mode2 == LOCK_S || mode2 == LOCK_IS)) {
return(TRUE);
@@ -743,9 +758,9 @@ lock_mode_compatible(
ulint mode2) /* in: lock mode */
{
ut_ad(mode1 == LOCK_X || mode1 == LOCK_S || mode1 == LOCK_IX
- || mode1 == LOCK_IS);
+ || mode1 == LOCK_IS || mode1 == LOCK_AUTO_INC);
ut_ad(mode2 == LOCK_X || mode2 == LOCK_S || mode2 == LOCK_IX
- || mode2 == LOCK_IS);
+ || mode2 == LOCK_IS || mode2 == LOCK_AUTO_INC);
if (mode1 == LOCK_S && (mode2 == LOCK_IS || mode2 == LOCK_S)) {
@@ -755,12 +770,18 @@ lock_mode_compatible(
return(FALSE);
+ } else if (mode1 == LOCK_AUTO_INC && (mode2 == LOCK_IS
+ || mode2 == LOCK_IX)) {
+ return(TRUE);
+
} else if (mode1 == LOCK_IS && (mode2 == LOCK_IS
|| mode2 == LOCK_IX
+ || mode2 == LOCK_AUTO_INC
|| mode2 == LOCK_S)) {
return(TRUE);
} else if (mode1 == LOCK_IX && (mode2 == LOCK_IS
+ || mode2 == LOCK_AUTO_INC
|| mode2 == LOCK_IX)) {
return(TRUE);
}
@@ -1836,7 +1857,7 @@ lock_grant(
Cancels a waiting record lock request and releases the waiting transaction
that requested it. NOTE: does NOT check if waiting lock requests behind this
one can now be granted! */
-
+static
void
lock_rec_cancel(
/*============*/
@@ -2812,7 +2833,18 @@ lock_table_create(
ut_ad(table && trx);
ut_ad(mutex_own(&kernel_mutex));
- lock = mem_heap_alloc(trx->lock_heap, sizeof(lock_t));
+ if (type_mode == LOCK_AUTO_INC) {
+ /* Only one trx can have the lock on the table
+ at a time: we may use the memory preallocated
+ to the table object */
+
+ lock = table->auto_inc_lock;
+
+ ut_a(trx->auto_inc_lock == NULL);
+ trx->auto_inc_lock = lock;
+ } else {
+ lock = mem_heap_alloc(trx->lock_heap, sizeof(lock_t));
+ }
if (lock == NULL) {
@@ -2854,6 +2886,10 @@ lock_table_remove_low(
table = lock->un_member.tab_lock.table;
trx = lock->trx;
+ if (lock == trx->auto_inc_lock) {
+ trx->auto_inc_lock = NULL;
+ }
+
UT_LIST_REMOVE(trx_locks, trx->trx_locks, lock);
UT_LIST_REMOVE(un_member.tab_lock.locks, table->locks, lock);
}
@@ -2988,7 +3024,7 @@ lock_table(
if (lock_table_other_has_incompatible(trx, LOCK_WAIT, table, mode)) {
- /* Another trx has request on the table in an incompatible
+ /* Another trx has a request on the table in an incompatible
mode: this trx must wait */
err = lock_table_enqueue_waiting(mode, table, thr);
@@ -3102,6 +3138,24 @@ lock_table_dequeue(
/*=========================== LOCK RELEASE ==============================*/
/*************************************************************************
+Releases an auto-inc lock a transaction possibly has on a table.
+Releases possible other transactions waiting for this lock. */
+
+void
+lock_table_unlock_auto_inc(
+/*=======================*/
+ trx_t* trx) /* in: transaction */
+{
+ if (trx->auto_inc_lock) {
+ mutex_enter(&kernel_mutex);
+
+ lock_table_dequeue(trx->auto_inc_lock);
+
+ mutex_exit(&kernel_mutex);
+ }
+}
+
+/*************************************************************************
Releases transaction locks, and releases possible other transactions waiting
because of these locks. */
@@ -3147,6 +3201,37 @@ lock_release_off_kernel(
}
mem_heap_empty(trx->lock_heap);
+
+ ut_a(trx->auto_inc_lock == NULL);
+}
+
+/*************************************************************************
+Cancels a waiting lock request and releases possible other transactions
+waiting behind it. */
+
+void
+lock_cancel_waiting_and_release(
+/*============================*/
+ lock_t* lock) /* in: waiting lock request */
+{
+ ut_ad(mutex_own(&kernel_mutex));
+
+ if (lock_get_type(lock) == LOCK_REC) {
+
+ lock_rec_dequeue_from_page(lock);
+ } else {
+ ut_ad(lock_get_type(lock) == LOCK_TABLE);
+
+ lock_table_dequeue(lock);
+ }
+
+ /* Reset the wait flag and the back pointer to lock in trx */
+
+ lock_reset_lock_and_trx_wait(lock);
+
+ /* The following function releases the trx from lock wait */
+
+ trx_end_lock_wait(lock->trx);
}
/*************************************************************************
@@ -3237,8 +3322,10 @@ lock_table_print(
printf(" lock_mode IS");
} else if (lock_get_mode(lock) == LOCK_IX) {
printf(" lock_mode IX");
+ } else if (lock_get_mode(lock) == LOCK_AUTO_INC) {
+ printf(" lock_mode AUTO-INC");
} else {
- ut_error;
+ printf(" unknown lock_mode %lu", lock_get_mode(lock));
}
if (lock_get_wait(lock)) {
@@ -3304,10 +3391,7 @@ lock_rec_print(
page = buf_page_get_gen(space, page_no, RW_NO_LATCH,
NULL, BUF_GET_IF_IN_POOL,
-#ifdef UNIV_SYNC_DEBUG
- IB__FILE__, __LINE__,
-#endif
- &mtr);
+ IB__FILE__, __LINE__, &mtr);
if (page) {
page = buf_page_get_nowait(space, page_no, RW_S_LATCH, &mtr);
}
@@ -3417,6 +3501,11 @@ loop:
trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
i = 0;
+
+ /* Since we temporarily release the kernel mutex when
+ reading a database page in below, variable trx may be
+ obsolete now and we must loop through the trx list to
+ get probably the same trx, or some other trx. */
while (trx && (i < nth_trx)) {
trx = UT_LIST_GET_NEXT(trx_list, trx);
@@ -3466,6 +3555,9 @@ loop:
i = 0;
+ /* Look at the note about the trx loop above why we loop here:
+ lock may be an obsolete pointer now. */
+
lock = UT_LIST_GET_FIRST(trx->trx_locks);
while (lock && (i < nth_lock)) {
diff --git a/innobase/log/log0log.c b/innobase/log/log0log.c
index ec42c8f2e08..06ac7a578a5 100644
--- a/innobase/log/log0log.c
+++ b/innobase/log/log0log.c
@@ -569,9 +569,12 @@ log_init(void)
ut_a(LOG_BUFFER_SIZE >= 4 * UNIV_PAGE_SIZE);
buf = ut_malloc(LOG_BUFFER_SIZE + OS_FILE_LOG_BLOCK_SIZE);
- log_sys->buf = ut_align(buf, OS_FILE_LOG_BLOCK_SIZE);
+ log_sys->buf = ut_align(buf, OS_FILE_LOG_BLOCK_SIZE);
log_sys->buf_size = LOG_BUFFER_SIZE;
+
+ memset(log_sys->buf, '\0', LOG_BUFFER_SIZE);
+
log_sys->max_buf_free = log_sys->buf_size / LOG_BUF_FLUSH_RATIO
- LOG_BUF_FLUSH_MARGIN;
log_sys->check_flush_or_checkpoint = TRUE;
@@ -579,6 +582,8 @@ log_init(void)
log_sys->n_log_ios = 0;
+ log_sys->n_log_ios_old = log_sys->n_log_ios;
+ log_sys->last_printout_time = time(NULL);
/*----------------------------*/
log_sys->buf_next_to_write = 0;
@@ -609,6 +614,7 @@ log_init(void)
log_sys->checkpoint_buf = ut_align(
mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE),
OS_FILE_LOG_BLOCK_SIZE);
+ memset(log_sys->checkpoint_buf, '\0', OS_FILE_LOG_BLOCK_SIZE);
/*----------------------------*/
log_sys->archiving_state = LOG_ARCH_ON;
@@ -626,6 +632,8 @@ log_init(void)
OS_FILE_LOG_BLOCK_SIZE);
log_sys->archive_buf_size = LOG_ARCHIVE_BUF_SIZE;
+ memset(log_sys->archive_buf, '\0', LOG_ARCHIVE_BUF_SIZE);
+
log_sys->archiving_on = os_event_create(NULL);
/*----------------------------*/
@@ -1849,7 +1857,7 @@ log_group_archive(
os_file_t file_handle;
dulint start_lsn;
dulint end_lsn;
- char name[100];
+ char name[1024];
byte* buf;
ulint len;
ibool ret;
@@ -2796,8 +2804,35 @@ void
log_print(void)
/*===========*/
{
- printf("Log sequence number %lu %lu\n",
- ut_dulint_get_high(log_sys->lsn),
- ut_dulint_get_low(log_sys->lsn));
-}
+ double time_elapsed;
+ time_t current_time;
+
+ mutex_enter(&(log_sys->mutex));
+ printf("Log sequence number %lu %lu\n"
+ "Log flushed up to %lu %lu\n"
+ "Last checkpoint at %lu %lu\n",
+ ut_dulint_get_high(log_sys->lsn),
+ ut_dulint_get_low(log_sys->lsn),
+ ut_dulint_get_high(log_sys->written_to_some_lsn),
+ ut_dulint_get_low(log_sys->written_to_some_lsn),
+ ut_dulint_get_high(log_sys->last_checkpoint_lsn),
+ ut_dulint_get_low(log_sys->last_checkpoint_lsn));
+
+ current_time = time(NULL);
+
+ time_elapsed = difftime(current_time, log_sys->last_printout_time);
+
+ printf(
+ "%lu pending log writes, %lu pending chkp writes\n"
+ "%lu log i/o's done, %.2f log i/o's/second\n",
+ log_sys->n_pending_writes,
+ log_sys->n_pending_checkpoint_writes,
+ log_sys->n_log_ios,
+ (log_sys->n_log_ios - log_sys->n_log_ios_old) / time_elapsed);
+
+ log_sys->n_log_ios_old = log_sys->n_log_ios;
+ log_sys->last_printout_time = current_time;
+
+ mutex_exit(&(log_sys->mutex));
+}
diff --git a/innobase/log/log0recv.c b/innobase/log/log0recv.c
index edab98fa39c..eb3eadcede9 100644
--- a/innobase/log/log0recv.c
+++ b/innobase/log/log0recv.c
@@ -560,6 +560,7 @@ recv_parse_or_apply_log_rec_body(
} else if (type <= MLOG_WRITE_STRING) {
new_ptr = mlog_parse_string(ptr, end_ptr, page);
} else {
+ new_ptr = NULL; /* Eliminate compiler warning */
ut_error;
}
@@ -801,9 +802,7 @@ recv_recover_page(
mtr_set_log_mode(&mtr, MTR_LOG_NONE);
success = buf_page_get_known_nowait(RW_X_LATCH, page, BUF_KEEP_OLD,
-#ifdef UNIV_SYNC_DEBUG
IB__FILE__, __LINE__,
-#endif
&mtr);
ut_a(success);
@@ -1212,9 +1211,7 @@ recv_compare_spaces(
frame = buf_page_get_gen(space1, page_no, RW_S_LATCH, NULL,
BUF_GET_IF_IN_POOL,
-#ifdef UNIV_SYNC_DEBUG
IB__FILE__, __LINE__,
-#endif
&mtr);
if (frame) {
buf_page_dbg_add_level(frame, SYNC_NO_ORDER_CHECK);
@@ -1227,9 +1224,7 @@ recv_compare_spaces(
frame = buf_page_get_gen(space2, page_no, RW_S_LATCH, NULL,
BUF_GET_IF_IN_POOL,
-#ifdef UNIV_SYNC_DEBUG
IB__FILE__, __LINE__,
-#endif
&mtr);
if (frame) {
buf_page_dbg_add_level(frame, SYNC_NO_ORDER_CHECK);
@@ -2033,8 +2028,11 @@ recv_recovery_from_checkpoint_start(
while (group) {
old_scanned_lsn = recv_sys->scanned_lsn;
- recv_group_scan_log_recs(group, &contiguous_lsn,
+ if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
+ recv_group_scan_log_recs(group, &contiguous_lsn,
&group_scanned_lsn);
+ }
+
group->scanned_lsn = group_scanned_lsn;
if (ut_dulint_cmp(old_scanned_lsn, group_scanned_lsn) < 0) {
@@ -2120,10 +2118,12 @@ recv_recovery_from_checkpoint_finish(void)
{
/* Rollback the uncommitted transactions which have no user session */
- trx_rollback_all_without_sess();
+ if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO) {
+ trx_rollback_all_without_sess();
+ }
/* Apply the hashed log records to the respective file pages */
-
+
recv_apply_hashed_log_recs(TRUE);
if (log_debug_writes) {
diff --git a/innobase/mem/mem0pool.c b/innobase/mem/mem0pool.c
index 6c3a4adebae..48e7e686953 100644
--- a/innobase/mem/mem0pool.c
+++ b/innobase/mem/mem0pool.c
@@ -76,7 +76,7 @@ pool, and after that its locks will grow into the buffer pool. */
#define MEM_AREA_FREE 1
/* The smallest memory area total size */
-#define MEM_AREA_MIN_SIZE (2 * sizeof(struct mem_area_struct))
+#define MEM_AREA_MIN_SIZE (2 * MEM_AREA_EXTRA_SIZE)
/* Data structure for a memory pool. The space is allocated using the buddy
algorithm, where free list i contains areas of size 2 to power i. */
@@ -556,7 +556,7 @@ Returns the amount of reserved memory. */
ulint
mem_pool_get_reserved(
/*==================*/
- /* out: reserved mmeory in bytes */
+ /* out: reserved memory in bytes */
mem_pool_t* pool) /* in: memory pool */
{
ulint reserved;
diff --git a/innobase/mtr/mtr0log.c b/innobase/mtr/mtr0log.c
index 11c0c476fcb..26f5a5d1cb7 100644
--- a/innobase/mtr/mtr0log.c
+++ b/innobase/mtr/mtr0log.c
@@ -54,6 +54,13 @@ mlog_write_initial_log_record(
ut_ad(type <= MLOG_BIGGEST_TYPE);
+ if (ptr < buf_pool->frame_zero || ptr >= buf_pool->high_end) {
+ fprintf(stderr,
+ "InnoDB: Error: trying to write to a stray memory location %lx\n",
+ (ulint)ptr);
+ ut_a(0);
+ }
+
log_ptr = mlog_open(mtr, 20);
/* If no logging is requested, we may return now */
@@ -184,6 +191,13 @@ mlog_write_ulint(
{
byte* log_ptr;
+ if (ptr < buf_pool->frame_zero || ptr >= buf_pool->high_end) {
+ fprintf(stderr,
+ "InnoDB: Error: trying to write to a stray memory location %lx\n",
+ (ulint)ptr);
+ ut_a(0);
+ }
+
if (type == MLOG_1BYTE) {
mach_write_to_1(ptr, val);
} else if (type == MLOG_2BYTES) {
@@ -225,6 +239,13 @@ mlog_write_dulint(
{
byte* log_ptr;
+ if (ptr < buf_pool->frame_zero || ptr >= buf_pool->high_end) {
+ fprintf(stderr,
+ "InnoDB: Error: trying to write to a stray memory location %lx\n",
+ (ulint)ptr);
+ ut_a(0);
+ }
+
ut_ad(ptr && mtr);
ut_ad(type == MLOG_8BYTES);
@@ -262,6 +283,12 @@ mlog_write_string(
{
byte* log_ptr;
+ if (ptr < buf_pool->frame_zero || ptr >= buf_pool->high_end) {
+ fprintf(stderr,
+ "InnoDB: Error: trying to write to a stray memory location %lx\n",
+ (ulint)ptr);
+ ut_a(0);
+ }
ut_ad(ptr && mtr);
ut_ad(len < UNIV_PAGE_SIZE);
diff --git a/innobase/os/os0file.c b/innobase/os/os0file.c
index d4d30f6aabc..ced601d4de1 100644
--- a/innobase/os/os0file.c
+++ b/innobase/os/os0file.c
@@ -11,6 +11,7 @@ Created 10/21/1995 Heikki Tuuri
#include "ut0mem.h"
#include "srv0srv.h"
#include "trx0sys.h"
+#include "fil0fil.h"
#undef HAVE_FDATASYNC
@@ -109,6 +110,14 @@ os_aio_array_t* os_aio_sync_array = NULL;
ulint os_aio_n_segments = ULINT_UNDEFINED;
+ulint os_n_file_reads = 0;
+ulint os_n_file_writes = 0;
+ulint os_n_fsyncs = 0;
+ulint os_n_file_reads_old = 0;
+ulint os_n_file_writes_old = 0;
+ulint os_n_fsyncs_old = 0;
+time_t os_last_printout;
+
/***************************************************************************
Gets the operating system version. Currently works only on Windows. */
@@ -118,26 +127,26 @@ os_get_os_version(void)
/* out: OS_WIN95, OS_WIN31, OS_WINNT (2000 == NT) */
{
#ifdef __WIN__
- OSVERSIONINFO os_info;
-
- os_info.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
-
- ut_a(GetVersionEx(&os_info));
-
- if (os_info.dwPlatformId == VER_PLATFORM_WIN32s) {
- return(OS_WIN31);
- } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS) {
- return(OS_WIN95);
- } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_NT) {
- return(OS_WINNT);
- } else {
- ut_error;
- return(0);
- }
+ OSVERSIONINFO os_info;
+
+ os_info.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
+
+ ut_a(GetVersionEx(&os_info));
+
+ if (os_info.dwPlatformId == VER_PLATFORM_WIN32s) {
+ return(OS_WIN31);
+ } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS) {
+ return(OS_WIN95);
+ } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_NT) {
+ return(OS_WINNT);
+ } else {
+ ut_error;
+ return(0);
+ }
#else
- ut_error;
+ ut_error;
- return(0);
+ return(0);
#endif
}
@@ -160,7 +169,7 @@ os_file_get_last_error(void)
if (err != ERROR_FILE_EXISTS) {
fprintf(stderr,
- "InnoDB: operating system error number %li in a file operation.\n",
+ "InnoDB: Warning: operating system error number %li in a file operation.\n",
(long) err);
}
@@ -178,7 +187,7 @@ os_file_get_last_error(void)
if (err != EEXIST) {
fprintf(stderr,
- "InnoDB: operating system error number %i in a file operation.\n",
+ "InnoDB: Warning: operating system error number %i in a file operation.\n",
errno);
}
@@ -231,8 +240,10 @@ os_file_handle_error(
exit(1);
} else if (err == OS_FILE_AIO_RESOURCES_RESERVED) {
-
return(TRUE);
+
+ } else if (err == OS_FILE_ALREADY_EXISTS) {
+ return(FALSE);
} else {
fprintf(stderr, "InnoDB: Cannot continue operation.\n");
@@ -317,14 +328,10 @@ try_again:
if (file == INVALID_HANDLE_VALUE) {
*success = FALSE;
- if (create_mode != OS_FILE_OPEN
- && os_file_get_last_error() == OS_FILE_DISK_FULL) {
-
- retry = os_file_handle_error(file, name);
+ retry = os_file_handle_error(file, name);
- if (retry) {
- goto try_again;
- }
+ if (retry) {
+ goto try_again;
}
} else {
*success = TRUE;
@@ -369,14 +376,10 @@ try_again:
if (file == -1) {
*success = FALSE;
- if (create_mode != OS_FILE_OPEN
- && errno == ENOSPC) {
+ retry = os_file_handle_error(file, name);
- retry = os_file_handle_error(file, name);
-
- if (retry) {
- goto try_again;
- }
+ if (retry) {
+ goto try_again;
}
} else {
*success = TRUE;
@@ -407,6 +410,7 @@ os_file_close(
return(TRUE);
}
+ os_file_handle_error(file, NULL);
return(FALSE);
#else
int ret;
@@ -414,6 +418,7 @@ os_file_close(
ret = close(file);
if (ret == -1) {
+ os_file_handle_error(file, NULL);
return(FALSE);
}
@@ -551,6 +556,8 @@ os_file_flush(
return(TRUE);
}
+ os_file_handle_error(file, NULL);
+
return(FALSE);
#else
int ret;
@@ -560,6 +567,8 @@ os_file_flush(
#else
ret = fsync(file);
#endif
+ os_n_fsyncs++;
+
if (ret == 0) {
return(TRUE);
}
@@ -589,9 +598,12 @@ os_file_pread(
{
off_t offs = (off_t)offset;
+ os_n_file_reads++;
+
#ifdef HAVE_PREAD
return(pread(file, buf, n, offs));
#else
+ {
ssize_t ret;
ulint i;
@@ -613,6 +625,7 @@ os_file_pread(
os_mutex_exit(os_file_seek_mutexes[i]);
return(ret);
+ }
#endif
}
@@ -631,6 +644,8 @@ os_file_pwrite(
ssize_t ret;
off_t offs = (off_t)offset;
+ os_n_file_writes++;
+
#ifdef HAVE_PWRITE
ret = pwrite(file, buf, n, offs);
@@ -647,6 +662,7 @@ os_file_pwrite(
return(ret);
#else
+ {
ulint i;
/* Protect the seek / write operation with a mutex */
@@ -678,6 +694,7 @@ os_file_pwrite(
os_mutex_exit(os_file_seek_mutexes[i]);
return(ret);
+ }
#endif
}
#endif
@@ -702,12 +719,13 @@ os_file_read(
BOOL ret;
DWORD len;
DWORD ret2;
- DWORD err;
DWORD low;
DWORD high;
ibool retry;
ulint i;
+ os_n_file_reads++;
+
try_again:
ut_ad(file);
ut_ad(buf);
@@ -724,7 +742,6 @@ try_again:
ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
- err = GetLastError();
os_mutex_exit(os_file_seek_mutexes[i]);
@@ -738,8 +755,6 @@ try_again:
if (ret && len == n) {
return(TRUE);
}
-
- err = GetLastError();
#else
ibool retry;
ssize_t ret;
@@ -791,12 +806,12 @@ os_file_write(
BOOL ret;
DWORD len;
DWORD ret2;
- DWORD err;
DWORD low;
DWORD high;
ibool retry;
ulint i;
+ os_n_file_writes++;
try_again:
ut_ad(file);
ut_ad(buf);
@@ -813,7 +828,6 @@ try_again:
ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
- err = GetLastError();
os_mutex_exit(os_file_seek_mutexes[i]);
@@ -987,6 +1001,8 @@ os_aio_init(
os_aio_segment_wait_events[i] = os_event_create(NULL);
}
+ os_last_printout = time(NULL);
+
#ifdef POSIX_ASYNC_IO
/* Block aio signals from the current thread and its children:
for this to work, the current thread must be the first created
@@ -1461,6 +1477,7 @@ try_again:
} else if (mode == OS_AIO_SYNC) {
array = os_aio_sync_array;
} else {
+ array = NULL; /* Eliminate compiler warning */
ut_error;
}
@@ -1469,6 +1486,7 @@ try_again:
if (type == OS_FILE_READ) {
if (os_aio_use_native_aio) {
#ifdef WIN_ASYNC_IO
+ os_n_file_reads++;
ret = ReadFile(file, buf, (DWORD)n, &len,
&(slot->control));
#elif defined(POSIX_ASYNC_IO)
@@ -1485,6 +1503,7 @@ try_again:
} else if (type == OS_FILE_WRITE) {
if (os_aio_use_native_aio) {
#ifdef WIN_ASYNC_IO
+ os_n_file_writes++;
ret = WriteFile(file, buf, (DWORD)n, &len,
&(slot->control));
#elif defined(POSIX_ASYNC_IO)
@@ -1583,7 +1602,6 @@ os_aio_windows_handle(
ulint n;
ulint i;
ibool ret_val;
- ulint err;
BOOL ret;
DWORD len;
@@ -1635,7 +1653,8 @@ os_aio_windows_handle(
ut_a(TRUE == os_file_flush(slot->file));
}
} else {
- err = GetLastError();
+ os_file_get_last_error();
+
ut_error;
ret_val = FALSE;
@@ -2032,6 +2051,8 @@ os_aio_print(void)
os_aio_array_t* array;
os_aio_slot_t* slot;
ulint n_reserved;
+ time_t current_time;
+ double time_elapsed;
ulint i;
for (i = 0; i < srv_n_file_io_threads; i++) {
@@ -2039,7 +2060,7 @@ os_aio_print(void)
srv_io_thread_op_info[i]);
}
- printf("Pending normal aio reads: ");
+ printf("Pending normal aio reads:");
array = os_aio_read_array;
loop:
@@ -2066,12 +2087,12 @@ loop:
ut_a(array->n_reserved == n_reserved);
- printf("%lu\n", n_reserved);
+ printf(" %lu", n_reserved);
os_mutex_exit(array->mutex);
if (array == os_aio_read_array) {
- printf("Pending aio writes: ");
+ printf(", aio writes:");
array = os_aio_write_array;
@@ -2079,25 +2100,48 @@ loop:
}
if (array == os_aio_write_array) {
- printf("Pending insert buffer aio reads: ");
+ printf(",\n ibuf aio reads:");
array = os_aio_ibuf_array;
goto loop;
}
if (array == os_aio_ibuf_array) {
- printf("Pending log writes or reads: ");
+ printf(", log i/o's:");
array = os_aio_log_array;
goto loop;
}
if (array == os_aio_log_array) {
- printf("Pending synchronous reads or writes: ");
+ printf(", sync i/o's:");
array = os_aio_sync_array;
goto loop;
}
+
+ printf("\n");
+
+ current_time = time(NULL);
+ time_elapsed = difftime(current_time, os_last_printout);
+
+ printf("Pending flushes (fsync) log: %lu; buffer pool: %lu\n",
+ fil_n_pending_log_flushes, fil_n_pending_tablespace_flushes);
+ printf("%lu OS file reads, %lu OS file writes, %lu OS fsyncs\n",
+ os_n_file_reads, os_n_file_writes, os_n_fsyncs);
+ printf("%.2f reads/s, %.2f writes/s, %.2f fsyncs/s\n",
+ (os_n_file_reads - os_n_file_reads_old)
+ / time_elapsed,
+ (os_n_file_writes - os_n_file_writes_old)
+ / time_elapsed,
+ (os_n_fsyncs - os_n_fsyncs_old)
+ / time_elapsed);
+
+ os_n_file_reads_old = os_n_file_reads;
+ os_n_file_writes_old = os_n_file_writes;
+ os_n_fsyncs_old = os_n_fsyncs;
+
+ os_last_printout = current_time;
}
/**************************************************************************
diff --git a/innobase/page/page0page.c b/innobase/page/page0page.c
index 511191ecd89..427064bc89c 100644
--- a/innobase/page/page0page.c
+++ b/innobase/page/page0page.c
@@ -1258,8 +1258,8 @@ page_validate(
page_dir_get_nth_slot(page, n_slots - 1))) {
fprintf(stderr,
"Record heap and dir overlap on a page in index %s, %lu, %lu\n",
- index->name, page_header_get_ptr(page, PAGE_HEAP_TOP),
- page_dir_get_nth_slot(page, n_slots - 1));
+ index->name, (ulint)page_header_get_ptr(page, PAGE_HEAP_TOP),
+ (ulint)page_dir_get_nth_slot(page, n_slots - 1));
goto func_exit;
}
diff --git a/innobase/pars/lexyy.c b/innobase/pars/lexyy.c
index 64b8963028b..67bd12afa60 100644
--- a/innobase/pars/lexyy.c
+++ b/innobase/pars/lexyy.c
@@ -7362,7 +7362,7 @@ static void *yy_flex_alloc( size )
unsigned int size;
#endif
{
- return (void *) malloc( size );
+ return (void *) mem_alloc( size );
}
#ifdef YY_USE_PROTOS
@@ -7373,7 +7373,7 @@ void *ptr;
unsigned int size;
#endif
{
- return (void *) realloc( ptr, size );
+ return (void *) mem_realloc( ptr, size );
}
#ifdef YY_USE_PROTOS
@@ -7383,6 +7383,6 @@ static void yy_flex_free( ptr )
void *ptr;
#endif
{
- free( ptr );
+ mem_free( ptr );
}
diff --git a/innobase/pars/pars0grm.c b/innobase/pars/pars0grm.c
index e06cba4e69d..1f631c69f41 100644
--- a/innobase/pars/pars0grm.c
+++ b/innobase/pars/pars0grm.c
@@ -97,11 +97,8 @@ que_node_t */
#define YYSTYPE que_node_t*
#include "univ.i"
-#undef alloca
-#define alloca mem_alloc
#include <math.h>
#include "pars0pars.h"
-#include "mem0mem.h"
#include "que0types.h"
#include "que0que.h"
#include "row0sel.h"
@@ -705,7 +702,7 @@ int yydebug; /* nonzero means print parse trace */
/* YYINITDEPTH indicates the initial size of the parser's stacks */
#ifndef YYINITDEPTH
-#define YYINITDEPTH 200
+#define YYINITDEPTH 1000
#endif
/* YYMAXDEPTH is the maximum size the stacks can grow to
@@ -896,17 +893,22 @@ yynewstate:
if (yystacksize >= YYMAXDEPTH)
{
yyerror("parser stack overflow");
+ ut_a(0);
return 2;
}
yystacksize *= 2;
if (yystacksize > YYMAXDEPTH)
yystacksize = YYMAXDEPTH;
- yyss = (short *) alloca (yystacksize * sizeof (*yyssp));
+
+ ut_a(0); /* Prevent possible memory leaks through the following
+ mem_alloc's */
+
+ yyss = (short *) mem_alloc (yystacksize * sizeof (*yyssp));
__yy_memcpy ((char *)yyss, (char *)yyss1, size * sizeof (*yyssp));
- yyvs = (YYSTYPE *) alloca (yystacksize * sizeof (*yyvsp));
+ yyvs = (YYSTYPE *) mem_alloc (yystacksize * sizeof (*yyvsp));
__yy_memcpy ((char *)yyvs, (char *)yyvs1, size * sizeof (*yyvsp));
#ifdef YYLSP_NEEDED
- yyls = (YYLTYPE *) alloca (yystacksize * sizeof (*yylsp));
+ yyls = (YYLTYPE *) mem_alloc (yystacksize * sizeof (*yylsp));
__yy_memcpy ((char *)yyls, (char *)yyls1, size * sizeof (*yylsp));
#endif
#endif /* no yyoverflow */
@@ -1663,7 +1665,7 @@ yyerrlab: /* here on detecting error */
x < (sizeof(yytname) / sizeof(char *)); x++)
if (yycheck[x + yyn] == x)
size += strlen(yytname[x]) + 15, count++;
- msg = (char *) malloc(size + 15);
+ msg = (char *) mem_alloc(size + 15);
if (msg != 0)
{
strcpy(msg, "parse error");
@@ -1682,7 +1684,7 @@ yyerrlab: /* here on detecting error */
}
}
yyerror(msg);
- free(msg);
+ mem_free(msg);
}
else
yyerror ("parse error; also virtual memory exceeded");
diff --git a/innobase/pars/pars0opt.c b/innobase/pars/pars0opt.c
index 5d187ad2faf..6f4957f96ee 100644
--- a/innobase/pars/pars0opt.c
+++ b/innobase/pars/pars0opt.c
@@ -543,6 +543,7 @@ opt_search_plan_for_table(
/* Calculate goodness for each index of the table */
index = dict_table_get_first_index(table);
+ best_index = index; /* Eliminate compiler warning */
best_goodness = 0;
while (index) {
diff --git a/innobase/pars/pars0pars.c b/innobase/pars/pars0pars.c
index 4a298426476..8ffbca579b8 100644
--- a/innobase/pars/pars0pars.c
+++ b/innobase/pars/pars0pars.c
@@ -922,7 +922,8 @@ pars_process_assign_list(
changes_ord_field = UPD_NODE_NO_ORD_CHANGE;
- if (row_upd_changes_some_index_ord_field(node->table, node->update)) {
+ if (row_upd_changes_some_index_ord_field_binary(node->table,
+ node->update)) {
changes_ord_field = 0;
}
diff --git a/innobase/que/que0que.c b/innobase/que/que0que.c
index ddf8c8ebc43..96e505f8b80 100644
--- a/innobase/que/que0que.c
+++ b/innobase/que/que0que.c
@@ -832,7 +832,7 @@ que_thr_dec_refer_count(
sess_t* sess;
ibool send_srv_msg = FALSE;
ibool release_stored_proc = FALSE;
- ulint msg_len;
+ ulint msg_len = 0;
byte msg_buf[ODBC_DATAGRAM_SIZE];
ulint fork_type;
ibool stopped;
diff --git a/innobase/rem/rem0cmp.c b/innobase/rem/rem0cmp.c
index cdf1f363946..c3687ebb0e0 100644
--- a/innobase/rem/rem0cmp.c
+++ b/innobase/rem/rem0cmp.c
@@ -12,6 +12,8 @@ Created 7/1/1994 Heikki Tuuri
#include "rem0cmp.ic"
#endif
+#include "srv0srv.h"
+
/* ALPHABETICAL ORDER
==================
@@ -68,6 +70,54 @@ innobase_mysql_cmp(
unsigned int b_length); /* in: data field length,
not UNIV_SQL_NULL */
+/*************************************************************************
+Transforms the character code so that it is ordered appropriately for the
+language. This is only used for the latin1 char set. MySQL does the
+comparisons for other char sets. */
+UNIV_INLINE
+ulint
+cmp_collate(
+/*========*/
+ /* out: collation order position */
+ dtype_t* type, /* in: type */
+ ulint code) /* in: code of a character stored in database
+ record */
+{
+ ut_ad((type->mtype == DATA_CHAR) || (type->mtype == DATA_VARCHAR));
+
+ return((ulint) srv_latin1_ordering[code]);
+}
+
+
+/*****************************************************************
+Returns TRUE if two types are equal for comparison purposes. */
+
+ibool
+cmp_types_are_equal(
+/*================*/
+ /* out: TRUE if the types are considered
+ equal in comparisons */
+ dtype_t* type1, /* in: type 1 */
+ dtype_t* type2) /* in: type 2 */
+{
+ if (type1->mtype != type2->mtype) {
+
+ return(FALSE);
+ }
+
+ if (type1->mtype == DATA_MYSQL
+ || type1->mtype == DATA_VARMYSQL) {
+
+ if ((type1->prtype & ~DATA_NOT_NULL)
+ != (type2->prtype & ~DATA_NOT_NULL)) {
+
+ return(FALSE);
+ }
+ }
+
+ return(TRUE);
+}
+
/*****************************************************************
Innobase uses this function is to compare two data fields for which the
data type is such that we must compare whole fields. */
@@ -269,8 +319,8 @@ cmp_data_data_slow(
}
if (cur_type->mtype <= DATA_CHAR) {
- data1_byte = dtype_collate(cur_type, data1_byte);
- data2_byte = dtype_collate(cur_type, data2_byte);
+ data1_byte = cmp_collate(cur_type, data1_byte);
+ data2_byte = cmp_collate(cur_type, data2_byte);
}
if (data1_byte > data2_byte) {
@@ -482,8 +532,8 @@ cmp_dtuple_rec_with_match(
}
if (cur_type->mtype <= DATA_CHAR) {
- rec_byte = dtype_collate(cur_type, rec_byte);
- dtuple_byte = dtype_collate(cur_type,
+ rec_byte = cmp_collate(cur_type, rec_byte);
+ dtuple_byte = cmp_collate(cur_type,
dtuple_byte);
}
@@ -796,8 +846,8 @@ cmp_rec_rec_with_match(
}
if (cur_type->mtype <= DATA_CHAR) {
- rec1_byte = dtype_collate(cur_type, rec1_byte);
- rec2_byte = dtype_collate(cur_type, rec2_byte);
+ rec1_byte = cmp_collate(cur_type, rec1_byte);
+ rec2_byte = cmp_collate(cur_type, rec2_byte);
}
if (rec1_byte < rec2_byte) {
diff --git a/innobase/row/row0ins.c b/innobase/row/row0ins.c
index 8542dcae326..92cac5a55cf 100644
--- a/innobase/row/row0ins.c
+++ b/innobase/row/row0ins.c
@@ -207,16 +207,33 @@ row_ins_sec_index_entry_by_modify(
/*==============================*/
/* out: DB_SUCCESS or error code */
btr_cur_t* cursor, /* in: B-tree cursor */
+ dtuple_t* entry, /* in: index entry to insert */
que_thr_t* thr, /* in: query thread */
mtr_t* mtr) /* in: mtr */
{
- ulint err;
-
- ut_ad(((cursor->index)->type & DICT_CLUSTERED) == 0);
- ut_ad(rec_get_deleted_flag(btr_cur_get_rec(cursor)));
+ mem_heap_t* heap;
+ upd_t* update;
+ rec_t* rec;
+ ulint err;
+
+ rec = btr_cur_get_rec(cursor);
+
+ ut_ad((cursor->index->type & DICT_CLUSTERED) == 0);
+ ut_ad(rec_get_deleted_flag(rec));
- /* We just remove the delete mark from the secondary index record */
- err = btr_cur_del_mark_set_sec_rec(0, cursor, FALSE, thr, mtr);
+ /* We know that in the ordering entry and rec are identified.
+ But in their binary form there may be differences if there
+ are char fields in them. Therefore we have to calculate the
+ difference and do an update-in-place if necessary. */
+
+ heap = mem_heap_create(1024);
+
+ update = row_upd_build_sec_rec_difference_binary(cursor->index,
+ entry, rec, heap);
+
+ err = btr_cur_update_sec_rec_in_place(cursor, update, thr, mtr);
+
+ mem_heap_free(heap);
return(err);
}
@@ -262,7 +279,7 @@ row_ins_clust_index_entry_by_modify(
/* Build an update vector containing all the fields to be modified;
NOTE that this vector may contain also system columns! */
- update = row_upd_build_difference(cursor->index, entry, ext_vec,
+ update = row_upd_build_difference_binary(cursor->index, entry, ext_vec,
n_ext_vec, rec, heap);
if (mode == BTR_MODIFY_LEAF) {
/* Try optimistic updating of the record, keeping changes
@@ -348,6 +365,203 @@ row_ins_set_shared_rec_lock(
}
/*******************************************************************
+Checks if foreign key constraint fails for an index entry. Sets shared locks
+which lock either the success or the failure of the constraint. NOTE that
+the caller must have a shared latch on dict_foreign_key_check_lock. */
+
+ulint
+row_ins_check_foreign_constraint(
+/*=============================*/
+ /* out: DB_SUCCESS, DB_LOCK_WAIT,
+ DB_NO_REFERENCED_ROW,
+ or DB_ROW_IS_REFERENCED */
+ ibool check_ref,/* in: TRUE If we want to check that
+ the referenced table is ok, FALSE if we
+ want to to check the foreign key table */
+ dict_foreign_t* foreign,/* in: foreign constraint; NOTE that the
+ tables mentioned in it must be in the
+ dictionary cache if they exist at all */
+ dict_table_t* table, /* in: if check_ref is TRUE, then the foreign
+ table, else the referenced table */
+ dict_index_t* index, /* in: index in table */
+ dtuple_t* entry, /* in: index entry for index */
+ que_thr_t* thr) /* in: query thread */
+{
+ dict_table_t* check_table;
+ dict_index_t* check_index;
+ ulint n_fields_cmp;
+ rec_t* rec;
+ btr_pcur_t pcur;
+ ibool moved;
+ int cmp;
+ ulint err;
+ mtr_t mtr;
+
+ ut_ad(rw_lock_own(&dict_foreign_key_check_lock, RW_LOCK_SHARED));
+
+ if (check_ref) {
+ check_table = foreign->referenced_table;
+ check_index = foreign->referenced_index;
+ } else {
+ check_table = foreign->foreign_table;
+ check_index = foreign->foreign_index;
+ }
+
+ if (check_table == NULL) {
+ if (check_ref) {
+ return(DB_NO_REFERENCED_ROW);
+ }
+
+ return(DB_SUCCESS);
+ }
+
+ ut_a(check_table && check_index);
+
+ if (check_table != table) {
+ /* We already have a LOCK_IX on table, but not necessarily
+ on check_table */
+
+ err = lock_table(0, check_table, LOCK_IS, thr);
+
+ if (err != DB_SUCCESS) {
+
+ return(err);
+ }
+ }
+
+ mtr_start(&mtr);
+
+ /* Store old value on n_fields_cmp */
+
+ n_fields_cmp = dtuple_get_n_fields_cmp(entry);
+
+ dtuple_set_n_fields_cmp(entry, foreign->n_fields);
+
+ btr_pcur_open(check_index, entry, PAGE_CUR_GE,
+ BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ /* Scan index records and check if there is a matching record */
+
+ for (;;) {
+ rec = btr_pcur_get_rec(&pcur);
+
+ if (rec == page_get_infimum_rec(buf_frame_align(rec))) {
+
+ goto next_rec;
+ }
+
+ /* Try to place a lock on the index record */
+
+ err = row_ins_set_shared_rec_lock(rec, check_index, thr);
+
+ if (err != DB_SUCCESS) {
+
+ break;
+ }
+
+ if (rec == page_get_supremum_rec(buf_frame_align(rec))) {
+
+ goto next_rec;
+ }
+
+ cmp = cmp_dtuple_rec(entry, rec);
+
+ if (cmp == 0) {
+ if (!rec_get_deleted_flag(rec)) {
+ /* Found a matching record */
+
+ if (check_ref) {
+ err = DB_SUCCESS;
+ } else {
+ err = DB_ROW_IS_REFERENCED;
+ }
+
+ break;
+ }
+ }
+
+ if (cmp < 0) {
+ if (check_ref) {
+ err = DB_NO_REFERENCED_ROW;
+ } else {
+ err = DB_SUCCESS;
+ }
+
+ break;
+ }
+
+ ut_a(cmp == 0);
+next_rec:
+ moved = btr_pcur_move_to_next(&pcur, &mtr);
+
+ if (!moved) {
+ if (check_ref) {
+ err = DB_NO_REFERENCED_ROW;
+ } else {
+ err = DB_SUCCESS;
+ }
+
+ break;
+ }
+ }
+
+ mtr_commit(&mtr);
+
+ /* Restore old value */
+ dtuple_set_n_fields_cmp(entry, n_fields_cmp);
+
+ return(err);
+}
+
+/*******************************************************************
+Checks if foreign key constraints fail for an index entry. If index
+is not mentioned in any constraint, this function does nothing,
+Otherwise does searches to the indexes of referenced tables and
+sets shared locks which lock either the success or the failure of
+a constraint. */
+static
+ulint
+row_ins_check_foreign_constraints(
+/*==============================*/
+ /* out: DB_SUCCESS, DB_LOCK_WAIT, or error
+ code */
+ dict_table_t* table, /* in: table */
+ dict_index_t* index, /* in: index */
+ dtuple_t* entry, /* in: index entry for index */
+ que_thr_t* thr) /* in: query thread */
+{
+ dict_foreign_t* foreign;
+ ulint err;
+
+ foreign = UT_LIST_GET_FIRST(table->foreign_list);
+
+ while (foreign) {
+ if (foreign->foreign_index == index) {
+
+ if (foreign->referenced_table == NULL) {
+ dict_table_get(foreign->referenced_table_name,
+ thr_get_trx(thr));
+ }
+
+ rw_lock_s_lock(&dict_foreign_key_check_lock);
+
+ err = row_ins_check_foreign_constraint(TRUE, foreign,
+ table, index, entry, thr);
+
+ rw_lock_s_unlock(&dict_foreign_key_check_lock);
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+ }
+
+ foreign = UT_LIST_GET_NEXT(foreign_list, foreign);
+ }
+
+ return(DB_SUCCESS);
+}
+
+/*******************************************************************
Scans a unique non-clustered index at a given index entry to determine
whether a uniqueness violation has occurred for the key value of the entry.
Set shared locks on possible duplicate records. */
@@ -365,7 +579,6 @@ row_ins_scan_sec_index_for_duplicate(
ulint n_fields_cmp;
rec_t* rec;
btr_pcur_t pcur;
- trx_t* trx = thr_get_trx(thr);
ulint err = DB_SUCCESS;
ibool moved;
mtr_t mtr;
@@ -414,7 +627,7 @@ row_ins_scan_sec_index_for_duplicate(
err = DB_DUPLICATE_KEY;
- trx->error_info = index;
+ thr_get_trx(thr)->error_info = index;
break;
}
@@ -699,7 +912,7 @@ row_ins_index_entry_low(
ext_vec, n_ext_vec,
thr, &mtr);
} else {
- err = row_ins_sec_index_entry_by_modify(&cursor,
+ err = row_ins_sec_index_entry_by_modify(&cursor, entry,
thr, &mtr);
}
@@ -765,6 +978,15 @@ row_ins_index_entry(
{
ulint err;
+ if (UT_LIST_GET_FIRST(index->table->foreign_list)) {
+ err = row_ins_check_foreign_constraints(index->table, index,
+ entry, thr);
+ if (err != DB_SUCCESS) {
+
+ return(err);
+ }
+ }
+
/* Try first optimistic descent to the B-tree */
err = row_ins_index_entry_low(BTR_MODIFY_LEAF, index, entry,
@@ -812,7 +1034,7 @@ row_ins_index_entry_set_vals(
/***************************************************************
Inserts a single index entry to the table. */
-UNIV_INLINE
+static
ulint
row_ins_index_entry_step(
/*=====================*/
diff --git a/innobase/row/row0mysql.c b/innobase/row/row0mysql.c
index 373ee4ac4bd..13c0332dcef 100644
--- a/innobase/row/row0mysql.c
+++ b/innobase/row/row0mysql.c
@@ -21,6 +21,7 @@ Created 9/17/2000 Heikki Tuuri
#include "pars0pars.h"
#include "dict0dict.h"
#include "dict0crea.h"
+#include "dict0load.h"
#include "trx0roll.h"
#include "trx0purge.h"
#include "lock0lock.h"
@@ -151,7 +152,7 @@ row_mysql_handle_errors(
during the function entry */
trx_t* trx, /* in: transaction */
que_thr_t* thr, /* in: query thread */
- trx_savept_t* savept) /* in: savepoint */
+ trx_savept_t* savept) /* in: savepoint or NULL */
{
ibool timeout_expired;
ulint err;
@@ -172,12 +173,16 @@ handle_new_error:
}
} else if (err == DB_TOO_BIG_RECORD) {
/* MySQL will roll back the latest SQL statement */
+ } else if (err == DB_ROW_IS_REFERENCED
+ || err == DB_NO_REFERENCED_ROW
+ || err == DB_CANNOT_ADD_CONSTRAINT) {
+ /* MySQL will roll back the latest SQL statement */
} else if (err == DB_LOCK_WAIT) {
timeout_expired = srv_suspend_mysql_thread(thr);
if (timeout_expired) {
- trx->error_state = DB_DEADLOCK;
+ trx->error_state = DB_LOCK_WAIT_TIMEOUT;
que_thr_stop_for_mysql(thr);
@@ -188,9 +193,12 @@ handle_new_error:
return(TRUE);
- } else if (err == DB_DEADLOCK) {
- /* MySQL will roll back the latest SQL statement */
+ } else if (err == DB_DEADLOCK || err == DB_LOCK_WAIT_TIMEOUT) {
+ /* Roll back the whole transaction; this resolution was added
+ to version 3.23.43 */
+ trx_general_rollback_for_mysql(trx, FALSE, NULL);
+
} else if (err == DB_OUT_OF_FILE_SPACE) {
/* MySQL will roll back the latest SQL statement */
@@ -203,6 +211,7 @@ handle_new_error:
exit(1);
} else {
+ fprintf(stderr, "InnoDB: unknown error code %lu\n", err);
ut_a(0);
}
@@ -440,7 +449,94 @@ row_update_statistics_if_needed(
dict_update_statistics(prebuilt->table);
}
}
+
+/*************************************************************************
+Unlocks an AUTO_INC type lock possibly reserved by trx. */
+
+void
+row_unlock_table_autoinc_for_mysql(
+/*===============================*/
+ trx_t* trx) /* in: transaction */
+{
+ if (!trx->auto_inc_lock) {
+
+ return;
+ }
+
+ lock_table_unlock_auto_inc(trx);
+}
+
+/*************************************************************************
+Sets an AUTO_INC type lock on the table mentioned in prebuilt. The
+AUTO_INC lock gives exclusive access to the auto-inc counter of the
+table. The lock is reserved only for the duration of an SQL statement.
+It is not compatible with another AUTO_INC or exclusive lock on the
+table. */
+
+int
+row_lock_table_autoinc_for_mysql(
+/*=============================*/
+ /* out: error code or DB_SUCCESS */
+ row_prebuilt_t* prebuilt) /* in: prebuilt struct in the MySQL
+ table handle */
+{
+ trx_t* trx = prebuilt->trx;
+ ins_node_t* node = prebuilt->ins_node;
+ que_thr_t* thr;
+ ulint err;
+ ibool was_lock_wait;
+
+ ut_ad(trx);
+ ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
+
+ trx->op_info = "setting auto-inc lock";
+
+ if (node == NULL) {
+ row_get_prebuilt_insert_row(prebuilt);
+ node = prebuilt->ins_node;
+ }
+
+ /* We use the insert query graph as the dummy graph needed
+ in the lock module call */
+
+ thr = que_fork_get_first_thr(prebuilt->ins_graph);
+
+ que_thr_move_to_run_state_for_mysql(thr, trx);
+
+run_again:
+ thr->run_node = node;
+ thr->prev_node = node;
+
+ /* It may be that the current session has not yet started
+ its transaction, or it has been committed: */
+
+ trx_start_if_not_started(trx);
+
+ err = lock_table(0, prebuilt->table, LOCK_AUTO_INC, thr);
+
+ trx->error_state = err;
+
+ if (err != DB_SUCCESS) {
+ que_thr_stop_for_mysql(thr);
+
+ was_lock_wait = row_mysql_handle_errors(&err, trx, thr, NULL);
+
+ if (was_lock_wait) {
+ goto run_again;
+ }
+
+ trx->op_info = "";
+
+ return(err);
+ }
+
+ que_thr_stop_for_mysql_no_error(thr, trx);
+
+ trx->op_info = "";
+ return((int) err);
+}
+
/*************************************************************************
Does an insert for MySQL. */
@@ -462,6 +558,17 @@ row_insert_for_mysql(
ut_ad(trx);
ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
+ if (srv_created_new_raw || srv_force_recovery) {
+ fprintf(stderr,
+ "InnoDB: A new raw disk partition was initialized or\n"
+ "InnoDB: innodb_force_recovery is on: we do not allow\n"
+ "InnoDB: database modifications by the user. Shut down\n"
+ "InnoDB: mysqld and edit my.cnf so that newraw is replaced\n"
+ "InnoDB: with raw, and innodb_force_... is removed.\n");
+
+ return(DB_ERROR);
+ }
+
trx->op_info = "inserting";
if (node == NULL) {
@@ -634,6 +741,17 @@ row_update_for_mysql(
ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
UT_NOT_USED(mysql_rec);
+ if (srv_created_new_raw || srv_force_recovery) {
+ fprintf(stderr,
+ "InnoDB: A new raw disk partition was initialized or\n"
+ "InnoDB: innodb_force_recovery is on: we do not allow\n"
+ "InnoDB: database modifications by the user. Shut down\n"
+ "InnoDB: mysqld and edit my.cnf so that newraw is replaced\n"
+ "InnoDB: with raw, and innodb_force_... is removed.\n");
+
+ return(DB_ERROR);
+ }
+
trx->op_info = "updating or deleting";
node = prebuilt->upd_node;
@@ -816,8 +934,69 @@ row_create_table_for_mysql(
ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
+ if (srv_created_new_raw || srv_force_recovery) {
+ fprintf(stderr,
+ "InnoDB: A new raw disk partition was initialized or\n"
+ "InnoDB: innodb_force_recovery is on: we do not allow\n"
+ "InnoDB: database modifications by the user. Shut down\n"
+ "InnoDB: mysqld and edit my.cnf so that newraw is replaced\n"
+ "InnoDB: with raw, and innodb_force_... is removed.\n");
+
+ return(DB_ERROR);
+ }
+
trx->op_info = "creating table";
+ namelen = ut_strlen(table->name);
+
+ keywordlen = ut_strlen("innodb_monitor");
+
+ if (namelen >= keywordlen
+ && 0 == ut_memcmp(table->name + namelen - keywordlen,
+ "innodb_monitor", keywordlen)) {
+
+ /* Table name ends to characters innodb_monitor:
+ start monitor prints */
+
+ srv_print_innodb_monitor = TRUE;
+
+ /* The lock timeout monitor thread also takes care
+ of InnoDB monitor prints */
+
+ os_event_set(srv_lock_timeout_thread_event);
+ }
+
+ keywordlen = ut_strlen("innodb_lock_monitor");
+
+ if (namelen >= keywordlen
+ && 0 == ut_memcmp(table->name + namelen - keywordlen,
+ "innodb_lock_monitor", keywordlen)) {
+
+ srv_print_innodb_monitor = TRUE;
+ srv_print_innodb_lock_monitor = TRUE;
+ os_event_set(srv_lock_timeout_thread_event);
+ }
+
+ keywordlen = ut_strlen("innodb_tablespace_monitor");
+
+ if (namelen >= keywordlen
+ && 0 == ut_memcmp(table->name + namelen - keywordlen,
+ "innodb_tablespace_monitor", keywordlen)) {
+
+ srv_print_innodb_tablespace_monitor = TRUE;
+ os_event_set(srv_lock_timeout_thread_event);
+ }
+
+ keywordlen = ut_strlen("innodb_table_monitor");
+
+ if (namelen >= keywordlen
+ && 0 == ut_memcmp(table->name + namelen - keywordlen,
+ "innodb_table_monitor", keywordlen)) {
+
+ srv_print_innodb_table_monitor = TRUE;
+ os_event_set(srv_lock_timeout_thread_event);
+ }
+
/* Serialize data dictionary operations with dictionary mutex:
no deadlocks can occur then in these operations */
@@ -845,9 +1024,12 @@ row_create_table_for_mysql(
trx_general_rollback_for_mysql(trx, FALSE, NULL);
if (err == DB_OUT_OF_FILE_SPACE) {
+ fprintf(stderr,
+ "InnoDB: Warning: cannot create table %s because tablespace full\n",
+ table->name);
row_drop_table_for_mysql(table->name, trx, TRUE);
} else {
- assert(err == DB_DUPLICATE_KEY);
+ ut_a(err == DB_DUPLICATE_KEY);
fprintf(stderr,
"InnoDB: Error: table %s already exists in InnoDB internal\n"
"InnoDB: data dictionary. Have you deleted the .frm file\n"
@@ -864,39 +1046,6 @@ row_create_table_for_mysql(
}
trx->error_state = DB_SUCCESS;
- } else {
- namelen = ut_strlen(table->name);
-
- keywordlen = ut_strlen("innodb_monitor");
-
- if (namelen >= keywordlen
- && 0 == ut_memcmp(table->name + namelen - keywordlen,
- "innodb_monitor", keywordlen)) {
-
- /* Table name ends to characters innodb_monitor:
- start monitor prints */
-
- srv_print_innodb_monitor = TRUE;
- }
-
- keywordlen = ut_strlen("innodb_lock_monitor");
-
- if (namelen >= keywordlen
- && 0 == ut_memcmp(table->name + namelen - keywordlen,
- "innodb_lock_monitor", keywordlen)) {
-
- srv_print_innodb_monitor = TRUE;
- srv_print_innodb_lock_monitor = TRUE;
- }
-
- keywordlen = ut_strlen("innodb_tablespace_monitor");
-
- if (namelen >= keywordlen
- && 0 == ut_memcmp(table->name + namelen - keywordlen,
- "innodb_tablespace_monitor", keywordlen)) {
-
- srv_print_innodb_tablespace_monitor = TRUE;
- }
}
mutex_exit(&(dict_sys->mutex));
@@ -970,6 +1119,65 @@ row_create_index_for_mysql(
}
/*************************************************************************
+Scans a table create SQL string and adds to the data dictionary
+the foreign key constraints declared in the string. This function
+should be called after the indexes for a table have been created.
+Each foreign key constraint must be accompanied with indexes in
+bot participating tables. The indexes are allowed to contain more
+fields than mentioned in the constraint. Check also that foreign key
+constraints which reference this table are ok. */
+
+int
+row_table_add_foreign_constraints(
+/*==============================*/
+ /* out: error code or DB_SUCCESS */
+ trx_t* trx, /* in: transaction */
+ char* sql_string, /* in: table create statement where
+ foreign keys are declared like:
+ FOREIGN KEY (a, b) REFERENCES table2(c, d),
+ table2 can be written also with the database
+ name before it: test.table2 */
+ char* name) /* in: table full name in the normalized form
+ database_name/table_name */
+{
+ ulint err;
+
+ ut_a(sql_string);
+
+ trx->op_info = "adding foreign keys";
+
+ /* Serialize data dictionary operations with dictionary mutex:
+ no deadlocks can occur then in these operations */
+
+ mutex_enter(&(dict_sys->mutex));
+
+ trx->dict_operation = TRUE;
+
+ err = dict_create_foreign_constraints(trx, sql_string, name);
+
+ if (err == DB_SUCCESS) {
+ /* Check that also referencing constraints are ok */
+ err = dict_load_foreigns(name);
+ }
+
+ if (err != DB_SUCCESS) {
+ /* We have special error handling here */
+
+ trx->error_state = DB_SUCCESS;
+
+ trx_general_rollback_for_mysql(trx, FALSE, NULL);
+
+ row_drop_table_for_mysql(name, trx, TRUE);
+
+ trx->error_state = DB_SUCCESS;
+ }
+
+ mutex_exit(&(dict_sys->mutex));
+
+ return((int) err);
+}
+
+/*************************************************************************
Drops a table for MySQL. If the name of the dropped table ends to
characters INNODB_MONITOR, then this also stops printing of monitor
output by the master thread. */
@@ -997,6 +1205,17 @@ row_drop_table_for_mysql(
ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
ut_a(name != NULL);
+ if (srv_created_new_raw || srv_force_recovery) {
+ fprintf(stderr,
+ "InnoDB: A new raw disk partition was initialized or\n"
+ "InnoDB: innodb_force_recovery is on: we do not allow\n"
+ "InnoDB: database modifications by the user. Shut down\n"
+ "InnoDB: mysqld and edit my.cnf so that newraw is replaced\n"
+ "InnoDB: with raw, and innodb_force_... is removed.\n");
+
+ return(DB_ERROR);
+ }
+
trx->op_info = "dropping table";
namelen = ut_strlen(name);
@@ -1032,6 +1251,15 @@ row_drop_table_for_mysql(
srv_print_innodb_tablespace_monitor = FALSE;
}
+ keywordlen = ut_strlen("innodb_table_monitor");
+
+ if (namelen >= keywordlen
+ && 0 == ut_memcmp(name + namelen - keywordlen,
+ "innodb_table_monitor", keywordlen)) {
+
+ srv_print_innodb_table_monitor = FALSE;
+ }
+
/* We use the private SQL parser of Innobase to generate the
query graphs needed in deleting the dictionary data from system
tables in Innobase. Deleting a row from SYS_INDEXES table also
@@ -1039,21 +1267,49 @@ row_drop_table_for_mysql(
str1 =
"PROCEDURE DROP_TABLE_PROC () IS\n"
+ "table_name CHAR;\n"
+ "sys_foreign_id CHAR;\n"
"table_id CHAR;\n"
"index_id CHAR;\n"
+ "foreign_id CHAR;\n"
"found INT;\n"
"BEGIN\n"
- "SELECT ID INTO table_id\n"
- "FROM SYS_TABLES\n"
- "WHERE NAME ='";
-
+ "table_name := '";
+
str2 =
"';\n"
+ "SELECT ID INTO table_id\n"
+ "FROM SYS_TABLES\n"
+ "WHERE NAME = table_name;\n"
"IF (SQL % NOTFOUND) THEN\n"
" COMMIT WORK;\n"
" RETURN;\n"
"END IF;\n"
"found := 1;\n"
+ "SELECT ID INTO sys_foreign_id\n"
+ "FROM SYS_TABLES\n"
+ "WHERE NAME = 'SYS_FOREIGN';\n"
+ "IF (SQL % NOTFOUND) THEN\n"
+ " found := 0;\n"
+ "END IF;\n"
+ "IF (table_name = 'SYS_FOREIGN') THEN\n"
+ " found := 0;\n"
+ "END IF;\n"
+ "IF (table_name = 'SYS_FOREIGN_COLS') THEN\n"
+ " found := 0;\n"
+ "END IF;\n"
+ "WHILE found = 1 LOOP\n"
+ " SELECT ID INTO foreign_id\n"
+ " FROM SYS_FOREIGN\n"
+ " WHERE FOR_NAME = table_name;\n"
+ " IF (SQL % NOTFOUND) THEN\n"
+ " found := 0;\n"
+ " ELSE"
+ " DELETE FROM SYS_FOREIGN_COLS WHERE ID = foreign_id;\n"
+ " DELETE FROM SYS_FOREIGN WHERE ID = foreign_id;\n"
+ " END IF;\n"
+ "END LOOP;\n"
+ "found := 1;\n"
"WHILE found = 1 LOOP\n"
" SELECT ID INTO index_id\n"
" FROM SYS_INDEXES\n"
@@ -1095,6 +1351,9 @@ row_drop_table_for_mysql(
graph->fork_type = QUE_FORK_MYSQL_INTERFACE;
+ /* Prevent foreign key checks while we are dropping the table */
+ rw_lock_x_lock(&(dict_foreign_key_check_lock));
+
/* Prevent purge from running while we are dropping the table */
rw_lock_s_lock(&(purge_sys->purge_is_running));
@@ -1103,6 +1362,12 @@ row_drop_table_for_mysql(
if (!table) {
err = DB_TABLE_NOT_FOUND;
+ fprintf(stderr,
+ "InnoDB: Error: table %s does not exist in the InnoDB internal\n"
+ "InnoDB: data dictionary though MySQL is trying to drop it.\n"
+ "InnoDB: Have you copied the .frm file of the table to the\n"
+ "InnoDB: MySQL database directory from another database?\n",
+ name);
goto funct_exit;
}
@@ -1138,6 +1403,8 @@ row_drop_table_for_mysql(
funct_exit:
rw_lock_s_unlock(&(purge_sys->purge_is_running));
+ rw_lock_x_unlock(&(dict_foreign_key_check_lock));
+
if (!has_dict_mutex) {
mutex_exit(&(dict_sys->mutex));
}
@@ -1150,6 +1417,49 @@ funct_exit:
}
/*************************************************************************
+Drops a database for MySQL. */
+
+int
+row_drop_database_for_mysql(
+/*========================*/
+ /* out: error code or DB_SUCCESS */
+ char* name, /* in: database name which ends to '/' */
+ trx_t* trx) /* in: transaction handle */
+{
+ char* table_name;
+ int err = DB_SUCCESS;
+
+ ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
+ ut_a(name != NULL);
+ ut_a(name[strlen(name) - 1] == '/');
+
+ trx->op_info = "dropping database";
+
+ mutex_enter(&(dict_sys->mutex));
+
+ while (table_name = dict_get_first_table_name_in_db(name)) {
+ ut_a(memcmp(table_name, name, strlen(name)) == 0);
+
+ err = row_drop_table_for_mysql(table_name, trx, TRUE);
+
+ mem_free(table_name);
+
+ if (err != DB_SUCCESS) {
+ fprintf(stderr,
+ "InnoDB: DROP DATABASE %s failed with error %lu for table %s\n",
+ name, (ulint)err, table_name);
+ break;
+ }
+ }
+
+ mutex_exit(&(dict_sys->mutex));
+
+ trx->op_info = "";
+
+ return(err);
+}
+
+/*************************************************************************
Renames a table for MySQL. */
int
@@ -1174,18 +1484,37 @@ row_rename_table_for_mysql(
ut_a(old_name != NULL);
ut_a(new_name != NULL);
+ if (srv_created_new_raw || srv_force_recovery) {
+ fprintf(stderr,
+ "InnoDB: A new raw disk partition was initialized or\n"
+ "InnoDB: innodb_force_recovery is on: we do not allow\n"
+ "InnoDB: database modifications by the user. Shut down\n"
+ "InnoDB: mysqld and edit my.cnf so that newraw is replaced\n"
+ "InnoDB: with raw, and innodb_force_... is removed.\n");
+
+ return(DB_ERROR);
+ }
+
trx->op_info = "renaming table";
str1 =
"PROCEDURE RENAME_TABLE_PROC () IS\n"
+ "new_table_name CHAR;\n"
+ "old_table_name CHAR;\n"
"BEGIN\n"
- "UPDATE SYS_TABLES SET NAME ='";
+ "new_table_name :='";
str2 =
- "' WHERE NAME = '";
+ "';\nold_table_name := '";
str3 =
"';\n"
+ "UPDATE SYS_TABLES SET NAME = new_table_name\n"
+ "WHERE NAME = old_table_name;\n"
+ "UPDATE SYS_FOREIGN SET FOR_NAME = new_table_name\n"
+ "WHERE FOR_NAME = old_table_name;\n"
+ "UPDATE SYS_FOREIGN SET REF_NAME = new_table_name\n"
+ "WHERE REF_NAME = old_table_name;\n"
"COMMIT WORK;\n"
"END;\n";
@@ -1356,7 +1685,7 @@ row_check_table_for_mysql(
dict_table_t* table = prebuilt->table;
dict_index_t* index;
ulint n_rows;
- ulint n_rows_in_table;
+ ulint n_rows_in_table = ULINT_UNDEFINED;
ulint ret = DB_SUCCESS;
prebuilt->trx->op_info = "checking table";
diff --git a/innobase/row/row0purge.c b/innobase/row/row0purge.c
index 43bc166347a..0dffa273938 100644
--- a/innobase/row/row0purge.c
+++ b/innobase/row/row0purge.c
@@ -220,7 +220,7 @@ row_purge_remove_sec_if_poss_low(
if (!found) {
/* Not found */
- /* FIXME: printf("PURGE:........sec entry not found\n"); */
+ /* printf("PURGE:........sec entry not found\n"); */
/* dtuple_print(entry); */
btr_pcur_close(&pcur);
@@ -382,7 +382,7 @@ row_purge_upd_exist_or_extern(
while (node->index != NULL) {
index = node->index;
- if (row_upd_changes_ord_field(NULL, node->index,
+ if (row_upd_changes_ord_field_binary(NULL, node->index,
node->update)) {
/* Build the older version of the index entry */
entry = row_build_index_entry(node->row, index, heap);
diff --git a/innobase/row/row0sel.c b/innobase/row/row0sel.c
index d041e34a558..e42486f1e17 100644
--- a/innobase/row/row0sel.c
+++ b/innobase/row/row0sel.c
@@ -50,15 +50,21 @@ to que_run_threads: this is to allow canceling runaway queries */
/************************************************************************
Returns TRUE if the user-defined column values in a secondary index record
-are the same as the corresponding columns in the clustered index record. */
+are the same as the corresponding columns in the clustered index record.
+NOTE: the comparison is NOT done as a binary comparison, but character
+fields are compared with collation! */
static
ibool
row_sel_sec_rec_is_for_clust_rec(
/*=============================*/
- rec_t* sec_rec,
- dict_index_t* sec_index,
- rec_t* clust_rec,
- dict_index_t* clust_index)
+ /* out: TRUE if the secondary
+ record is equal to the corresponding
+ fields in the clustered record,
+ when compared with collation */
+ rec_t* sec_rec, /* in: secondary index record */
+ dict_index_t* sec_index, /* in: secondary index */
+ rec_t* clust_rec, /* in: clustered index record */
+ dict_index_t* clust_index) /* in: clustered index */
{
dict_col_t* col;
byte* sec_field;
@@ -84,9 +90,9 @@ row_sel_sec_rec_is_for_clust_rec(
return(FALSE);
}
- if (sec_len != UNIV_SQL_NULL
- && ut_memcmp(sec_field, clust_field, sec_len) != 0) {
-
+ if (0 != cmp_data_data(dict_col_get_type(col),
+ clust_field, clust_len,
+ sec_field, sec_len)) {
return(FALSE);
}
}
@@ -763,7 +769,7 @@ row_sel_open_pcur(
/*************************************************************************
Restores a stored pcur position to a table index. */
-UNIV_INLINE
+static
ibool
row_sel_restore_pcur_pos(
/*=====================*/
@@ -813,7 +819,8 @@ row_sel_restore_pcur_pos(
return(TRUE);
}
- ut_ad(relative_position == BTR_PCUR_AFTER);
+ ut_ad(relative_position == BTR_PCUR_AFTER
+ || relative_position == BTR_PCUR_AFTER_LAST_IN_TREE);
return(FALSE);
}
@@ -835,7 +842,8 @@ row_sel_restore_pcur_pos(
plan->stored_cursor_rec_processed is TRUE, we must move to the previous
record, else there is no need to move the cursor. */
- if (relative_position == BTR_PCUR_BEFORE) {
+ if (relative_position == BTR_PCUR_BEFORE
+ || relative_position == BTR_PCUR_BEFORE_FIRST_IN_TREE) {
return(FALSE);
}
@@ -850,7 +858,8 @@ row_sel_restore_pcur_pos(
return(FALSE);
}
- ut_ad(relative_position == BTR_PCUR_AFTER);
+ ut_ad(relative_position == BTR_PCUR_AFTER
+ || relative_position == BTR_PCUR_AFTER_LAST_IN_TREE);
return(TRUE);
}
@@ -1762,7 +1771,7 @@ fetch_step(
if (sel_node->state == SEL_NODE_CLOSED) {
/* SQL error detected */
- printf("SQL error %lu\n", DB_ERROR);
+ printf("SQL error %lu\n", (ulint)DB_ERROR);
que_thr_handle_error(thr, DB_ERROR, NULL, 0);
@@ -2251,7 +2260,7 @@ row_sel_get_clust_rec_for_mysql(
/************************************************************************
Restores cursor position after it has been stored. We have to take into
-account that the record cursor was positioned on can have been deleted.
+account that the record cursor was positioned on may have been deleted.
Then we may have to move the cursor one step up or down. */
static
ibool
@@ -2284,14 +2293,14 @@ sel_restore_position_for_mysql(
if (moves_up) {
btr_pcur_move_to_next(pcur, mtr);
-
- return(TRUE);
}
return(TRUE);
}
- if (relative_position == BTR_PCUR_AFTER) {
+ if (relative_position == BTR_PCUR_AFTER
+ || relative_position == BTR_PCUR_AFTER_LAST_IN_TREE) {
+
if (moves_up) {
return(TRUE);
}
@@ -2303,7 +2312,8 @@ sel_restore_position_for_mysql(
return(TRUE);
}
- ut_ad(relative_position == BTR_PCUR_BEFORE);
+ ut_ad(relative_position == BTR_PCUR_BEFORE
+ || relative_position == BTR_PCUR_BEFORE_FIRST_IN_TREE);
if (moves_up && btr_pcur_is_on_user_rec(pcur, mtr)) {
btr_pcur_move_to_next(pcur, mtr);
@@ -2586,21 +2596,30 @@ row_search_for_mysql(
let us try a search shortcut through the hash
index */
+ if (btr_search_latch.writer != RW_LOCK_NOT_LOCKED) {
+ /* There is an x-latch request: release
+ a possible s-latch to reduce starvation
+ and wait for BTR_SEA_TIMEOUT rounds before
+ trying to keep it again over calls from
+ MySQL */
+
+ if (trx->has_search_latch) {
+ rw_lock_s_unlock(&btr_search_latch);
+ trx->has_search_latch = FALSE;
+ }
+
+ trx->search_latch_timeout = BTR_SEA_TIMEOUT;
+
+ goto no_shortcut;
+ }
+
if (!trx->has_search_latch) {
rw_lock_s_lock(&btr_search_latch);
trx->has_search_latch = TRUE;
-
- } else if (btr_search_latch.writer_is_wait_ex) {
- /* There is an x-latch request waiting:
- release the s-latch for a moment to reduce
- starvation */
-
- rw_lock_s_unlock(&btr_search_latch);
- rw_lock_s_lock(&btr_search_latch);
}
shortcut = row_sel_try_search_shortcut_for_mysql(&rec,
- prebuilt, &mtr);
+ prebuilt, &mtr);
if (shortcut == SEL_FOUND) {
row_sel_store_mysql_rec(buf, prebuilt, rec);
@@ -2609,7 +2628,16 @@ row_search_for_mysql(
/* printf("%s shortcut\n", index->name); */
srv_n_rows_read++;
+
+ if (trx->search_latch_timeout > 0
+ && trx->has_search_latch) {
+ trx->search_latch_timeout--;
+
+ rw_lock_s_unlock(&btr_search_latch);
+ trx->has_search_latch = FALSE;
+ }
+
trx->op_info = "";
return(DB_SUCCESS);
@@ -2619,6 +2647,16 @@ row_search_for_mysql(
/* printf("%s record not found 2\n",
index->name); */
+
+ if (trx->search_latch_timeout > 0
+ && trx->has_search_latch) {
+
+ trx->search_latch_timeout--;
+
+ rw_lock_s_unlock(&btr_search_latch);
+ trx->has_search_latch = FALSE;
+ }
+
trx->op_info = "";
return(DB_RECORD_NOT_FOUND);
}
@@ -2627,7 +2665,7 @@ row_search_for_mysql(
mtr_start(&mtr);
}
}
-
+no_shortcut:
if (trx->has_search_latch) {
rw_lock_s_unlock(&btr_search_latch);
trx->has_search_latch = FALSE;
diff --git a/innobase/row/row0umod.c b/innobase/row/row0umod.c
index a7c8957d61a..37f5b1f0bc1 100644
--- a/innobase/row/row0umod.c
+++ b/innobase/row/row0umod.c
@@ -443,6 +443,8 @@ row_undo_mod_del_unmark_sec(
"InnoDB: Make a detailed bug report and send it\n");
fprintf(stderr, "InnoDB: to mysql@lists.mysql.com\n");
+ trx_print(thr_get_trx(thr));
+
mem_free(err_buf);
} else {
btr_cur = btr_pcur_get_btr_cur(&pcur);
@@ -552,7 +554,7 @@ row_undo_mod_upd_exist_sec(
while (node->index != NULL) {
index = node->index;
- if (row_upd_changes_ord_field(node->row, node->index,
+ if (row_upd_changes_ord_field_binary(node->row, node->index,
node->update)) {
/* Build the newest version of the index entry */
diff --git a/innobase/row/row0upd.c b/innobase/row/row0upd.c
index 3fa98db3a02..fa859729141 100644
--- a/innobase/row/row0upd.c
+++ b/innobase/row/row0upd.c
@@ -72,6 +72,134 @@ searched delete is obviously to keep the x-latch for several
steps of query graph execution. */
/*************************************************************************
+Checks if index currently is mentioned as a referenced index in a foreign
+key constraint. This function also loads into the dictionary cache the
+possible referencing table. */
+static
+ibool
+row_upd_index_is_referenced(
+/*========================*/
+ /* out: TRUE if referenced; NOTE that since
+ we do not hold dict_foreign_key_check_lock
+ when leaving the function, it may be that
+ the referencing table has been dropped when
+ we leave this function: this function is only
+ for heuristic use! */
+ dict_index_t* index) /* in: index */
+{
+ dict_table_t* table = index->table;
+ dict_foreign_t* foreign;
+ ulint phase = 1;
+
+try_again:
+ if (!UT_LIST_GET_FIRST(table->referenced_list)) {
+
+ return(FALSE);
+ }
+
+ if (phase == 2) {
+ mutex_enter(&(dict_sys->mutex));
+ }
+
+ rw_lock_s_lock(&dict_foreign_key_check_lock);
+
+ foreign = UT_LIST_GET_FIRST(table->referenced_list);
+
+ while (foreign) {
+ if (foreign->referenced_index == index) {
+ if (foreign->foreign_table == NULL) {
+ if (phase == 2) {
+ dict_table_get_low(foreign->
+ foreign_table_name);
+ } else {
+ phase = 2;
+ rw_lock_s_unlock(
+ &dict_foreign_key_check_lock);
+ goto try_again;
+ }
+ }
+
+ rw_lock_s_unlock(&dict_foreign_key_check_lock);
+
+ if (phase == 2) {
+ mutex_exit(&(dict_sys->mutex));
+ }
+
+ return(TRUE);
+ }
+
+ foreign = UT_LIST_GET_NEXT(referenced_list, foreign);
+ }
+
+ rw_lock_s_unlock(&dict_foreign_key_check_lock);
+
+ if (phase == 2) {
+ mutex_exit(&(dict_sys->mutex));
+ }
+
+ return(FALSE);
+}
+
+/*************************************************************************
+Checks if possible foreign key constraints hold after a delete of the record
+under pcur. NOTE that this function will temporarily commit mtr and lose
+pcur position! */
+static
+ulint
+row_upd_check_references_constraints(
+/*=================================*/
+ /* out: DB_SUCCESS, DB_LOCK_WAIT, or an error
+ code */
+ btr_pcur_t* pcur, /* in: cursor positioned on a record; NOTE: the
+ cursor position is lost in this function! */
+ dict_table_t* table, /* in: table in question */
+ dict_index_t* index, /* in: index of the cursor */
+ que_thr_t* thr, /* in: query thread */
+ mtr_t* mtr) /* in: mtr */
+{
+ dict_foreign_t* foreign;
+ mem_heap_t* heap;
+ dtuple_t* entry;
+ rec_t* rec;
+ ulint err;
+
+ rec = btr_pcur_get_rec(pcur);
+
+ heap = mem_heap_create(500);
+
+ entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, heap);
+
+ mtr_commit(mtr);
+
+ mtr_start(mtr);
+
+ rw_lock_s_lock(&dict_foreign_key_check_lock);
+
+ foreign = UT_LIST_GET_FIRST(table->referenced_list);
+
+ while (foreign) {
+ if (foreign->referenced_index == index) {
+
+ err = row_ins_check_foreign_constraint(FALSE, foreign,
+ table, index, entry, thr);
+ if (err != DB_SUCCESS) {
+ rw_lock_s_unlock(&dict_foreign_key_check_lock);
+ mem_heap_free(heap);
+
+ return(err);
+ }
+ }
+
+ foreign = UT_LIST_GET_NEXT(referenced_list, foreign);
+ }
+
+ rw_lock_s_unlock(&dict_foreign_key_check_lock);
+ mem_heap_free(heap);
+
+ return(DB_SUCCESS);
+}
+
+/*************************************************************************
Creates an update node for a query graph. */
upd_node_t*
@@ -484,13 +612,73 @@ upd_ext_vec_contains(
}
/*******************************************************************
+Builds an update vector from those fields which in a secondary index entry
+differ from a record that has the equal ordering fields. NOTE: we compare
+the fields as binary strings! */
+
+upd_t*
+row_upd_build_sec_rec_difference_binary(
+/*====================================*/
+ /* out, own: update vector of differing
+ fields */
+ dict_index_t* index, /* in: index */
+ dtuple_t* entry, /* in: entry to insert */
+ rec_t* rec, /* in: secondary index record */
+ mem_heap_t* heap) /* in: memory heap from which allocated */
+{
+ upd_field_t* upd_field;
+ dfield_t* dfield;
+ byte* data;
+ ulint len;
+ upd_t* update;
+ ulint n_diff;
+ ulint i;
+
+ /* This function is used only for a secondary index */
+ ut_ad(0 == (index->type & DICT_CLUSTERED));
+
+ update = upd_create(dtuple_get_n_fields(entry), heap);
+
+ n_diff = 0;
+
+ for (i = 0; i < dtuple_get_n_fields(entry); i++) {
+
+ data = rec_get_nth_field(rec, i, &len);
+
+ dfield = dtuple_get_nth_field(entry, i);
+
+ ut_a(len == dfield_get_len(dfield));
+
+ /* NOTE: we compare the fields as binary strings!
+ (No collation) */
+
+ if (!dfield_data_is_binary_equal(dfield, len, data)) {
+
+ upd_field = upd_get_nth_field(update, n_diff);
+
+ dfield_copy(&(upd_field->new_val), dfield);
+
+ upd_field_set_field_no(upd_field, i, index);
+
+ upd_field->extern_storage = FALSE;
+
+ n_diff++;
+ }
+ }
+
+ update->n_fields = n_diff;
+
+ return(update);
+}
+
+/*******************************************************************
Builds an update vector from those fields, excluding the roll ptr and
trx id fields, which in an index entry differ from a record that has
-the equal ordering fields. */
+the equal ordering fields. NOTE: we compare the fields as binary strings! */
upd_t*
-row_upd_build_difference(
-/*=====================*/
+row_upd_build_difference_binary(
+/*============================*/
/* out, own: update vector of differing
fields, excluding roll ptr and trx id */
dict_index_t* index, /* in: clustered index */
@@ -527,10 +715,13 @@ row_upd_build_difference(
dfield = dtuple_get_nth_field(entry, i);
+ /* NOTE: we compare the fields as binary strings!
+ (No collation) */
+
if ((rec_get_nth_field_extern_bit(rec, i)
!= upd_ext_vec_contains(ext_vec, n_ext_vec, i))
|| ((i != trx_id_pos) && (i != roll_ptr_pos)
- && !dfield_data_is_equal(dfield, len, data))) {
+ && !dfield_data_is_binary_equal(dfield, len, data))) {
upd_field = upd_get_nth_field(update, n_diff);
@@ -630,13 +821,16 @@ row_upd_clust_index_replace_new_col_vals(
/***************************************************************
Checks if an update vector changes an ordering field of an index record.
This function is fast if the update vector is short or the number of ordering
-fields in the index is small. Otherwise, this can be quadratic. */
+fields in the index is small. Otherwise, this can be quadratic.
+NOTE: we compare the fields as binary strings! */
ibool
-row_upd_changes_ord_field(
-/*======================*/
+row_upd_changes_ord_field_binary(
+/*=============================*/
/* out: TRUE if update vector changes
- an ordering field in the index record */
+ an ordering field in the index record;
+ NOTE: the fields are compared as binary
+ strings */
dtuple_t* row, /* in: old value of row, or NULL if the
row and the data values in update are not
known when this function is called, e.g., at
@@ -671,7 +865,7 @@ row_upd_changes_ord_field(
if (col_pos == upd_field->field_no
&& (row == NULL
- || !dfield_datas_are_equal(
+ || !dfield_datas_are_binary_equal(
dtuple_get_nth_field(row, col_no),
&(upd_field->new_val)))) {
return(TRUE);
@@ -683,11 +877,12 @@ row_upd_changes_ord_field(
}
/***************************************************************
-Checks if an update vector changes an ordering field of an index record. */
+Checks if an update vector changes an ordering field of an index record.
+NOTE: we compare the fields as binary strings! */
ibool
-row_upd_changes_some_index_ord_field(
-/*=================================*/
+row_upd_changes_some_index_ord_field_binary(
+/*========================================*/
/* out: TRUE if update vector may change
an ordering field in an index record */
dict_table_t* table, /* in: table */
@@ -812,6 +1007,7 @@ row_upd_sec_index_entry(
upd_node_t* node, /* in: row update node */
que_thr_t* thr) /* in: query thread */
{
+ ibool check_ref;
ibool found;
dict_index_t* index;
dtuple_t* entry;
@@ -825,6 +1021,8 @@ row_upd_sec_index_entry(
index = node->index;
+ check_ref = row_upd_index_is_referenced(index);
+
heap = mem_heap_create(1024);
/* Build old index entry */
@@ -855,6 +1053,8 @@ row_upd_sec_index_entry(
"InnoDB: Make a detailed bug report and send it\n");
fprintf(stderr, "InnoDB: to mysql@lists.mysql.com\n");
+ trx_print(thr_get_trx(thr));
+
mem_free(err_buf);
} else {
/* Delete mark the old index record; it can already be
@@ -864,9 +1064,21 @@ row_upd_sec_index_entry(
if (!rec_get_deleted_flag(rec)) {
err = btr_cur_del_mark_set_sec_rec(0, btr_cur, TRUE,
thr, &mtr);
+ if (err == DB_SUCCESS && check_ref) {
+ /* NOTE that the following call loses
+ the position of pcur ! */
+ err = row_upd_check_references_constraints(
+ &pcur, index->table,
+ index, thr, &mtr);
+ if (err != DB_SUCCESS) {
+
+ goto close_cur;
+ }
+ }
+
}
}
-
+close_cur:
btr_pcur_close(&pcur);
mtr_commit(&mtr);
@@ -907,8 +1119,8 @@ row_upd_sec_step(
ut_ad(!(node->index->type & DICT_CLUSTERED));
if (node->state == UPD_NODE_UPDATE_ALL_SEC
- || row_upd_changes_ord_field(node->row, node->index,
- node->update)) {
+ || row_upd_changes_ord_field_binary(node->row, node->index,
+ node->update)) {
err = row_upd_sec_index_entry(node, thr);
return(err);
@@ -931,6 +1143,8 @@ row_upd_clust_rec_by_insert(
upd_node_t* node, /* in: row update node */
dict_index_t* index, /* in: clustered index of the record */
que_thr_t* thr, /* in: query thread */
+ ibool check_ref,/* in: TRUE if index may be referenced in
+ a foreign key constraint */
mtr_t* mtr) /* in: mtr; gets committed here */
{
mem_heap_t* heap;
@@ -958,6 +1172,7 @@ row_upd_clust_rec_by_insert(
return(err);
}
+
/* Mark as not-owned the externally stored fields which the new
row inherits from the delete marked record: purge should not
free those externally stored fields even if the delete marked
@@ -965,6 +1180,19 @@ row_upd_clust_rec_by_insert(
btr_cur_mark_extern_inherited_fields(btr_cur_get_rec(btr_cur),
node->update, mtr);
+ if (check_ref) {
+ /* NOTE that the following call loses
+ the position of pcur ! */
+ err = row_upd_check_references_constraints(
+ pcur, table,
+ index, thr, mtr);
+ if (err != DB_SUCCESS) {
+ mtr_commit(mtr);
+
+ return(err);
+ }
+ }
+
}
mtr_commit(mtr);
@@ -1095,6 +1323,8 @@ row_upd_del_mark_clust_rec(
upd_node_t* node, /* in: row update node */
dict_index_t* index, /* in: clustered index */
que_thr_t* thr, /* in: query thread */
+ ibool check_ref,/* in: TRUE if index may be referenced in
+ a foreign key constraint */
mtr_t* mtr) /* in: mtr; gets committed here */
{
btr_pcur_t* pcur;
@@ -1120,6 +1350,18 @@ row_upd_del_mark_clust_rec(
err = btr_cur_del_mark_set_clust_rec(BTR_NO_LOCKING_FLAG, btr_cur,
TRUE, thr, mtr);
+ if (err == DB_SUCCESS && check_ref) {
+ /* NOTE that the following call loses
+ the position of pcur ! */
+ err = row_upd_check_references_constraints(pcur, index->table,
+ index, thr, mtr);
+ if (err != DB_SUCCESS) {
+ mtr_commit(mtr);
+
+ return(err);
+ }
+ }
+
mtr_commit(mtr);
return(err);
@@ -1140,12 +1382,15 @@ row_upd_clust_step(
dict_index_t* index;
btr_pcur_t* pcur;
ibool success;
+ ibool check_ref;
ulint err;
- mtr_t mtr_buf;
mtr_t* mtr;
+ mtr_t mtr_buf;
index = dict_table_get_first_index(node->table);
+ check_ref = row_upd_index_is_referenced(index);
+
pcur = node->pcur;
/* We have to restore the cursor to its position */
@@ -1210,8 +1455,8 @@ row_upd_clust_step(
/* NOTE: the following function calls will also commit mtr */
if (node->is_delete) {
- err = row_upd_del_mark_clust_rec(node, index, thr, mtr);
-
+ err = row_upd_del_mark_clust_rec(node, index, thr, check_ref,
+ mtr);
if (err != DB_SUCCESS) {
return(err);
@@ -1244,7 +1489,7 @@ row_upd_clust_step(
row_upd_store_row(node);
- if (row_upd_changes_ord_field(node->row, index, node->update)) {
+ if (row_upd_changes_ord_field_binary(node->row, index, node->update)) {
/* Update causes an ordering field (ordering fields within
the B-tree) of the clustered index record to change: perform
@@ -1257,8 +1502,8 @@ row_upd_clust_step(
choosing records to update. MySQL solves now the problem
externally! */
- err = row_upd_clust_rec_by_insert(node, index, thr, mtr);
-
+ err = row_upd_clust_rec_by_insert(node, index, thr, check_ref,
+ mtr);
if (err != DB_SUCCESS) {
return(err);
@@ -1304,8 +1549,8 @@ row_upd(
interpreter: we must calculate it on the fly: */
if (node->is_delete ||
- row_upd_changes_some_index_ord_field(node->table,
- node->update)) {
+ row_upd_changes_some_index_ord_field_binary(
+ node->table, node->update)) {
node->cmpl_info = 0;
} else {
node->cmpl_info = UPD_NODE_NO_ORD_CHANGE;
diff --git a/innobase/row/row0vers.c b/innobase/row/row0vers.c
index 4dc65669247..5b62cd2b7e3 100644
--- a/innobase/row/row0vers.c
+++ b/innobase/row/row0vers.c
@@ -269,7 +269,13 @@ row_vers_old_has_index_entry(
row = row_build(ROW_COPY_POINTERS, clust_index, rec, heap);
entry = row_build_index_entry(row, index, heap);
- if (dtuple_datas_are_equal(ientry, entry)) {
+ /* NOTE that we cannot do the comparison as binary
+ fields because the row is maybe being modified so that
+ the clustered index record has already been updated
+ to a different binary value in a char field, but the
+ collation identifies the old and new value anyway! */
+
+ if (dtuple_datas_are_ordering_equal(ientry, entry)) {
mem_heap_free(heap);
@@ -307,7 +313,13 @@ row_vers_old_has_index_entry(
prev_version, heap);
entry = row_build_index_entry(row, index, heap);
- if (dtuple_datas_are_equal(ientry, entry)) {
+ /* NOTE that we cannot do the comparison as binary
+ fields because maybe the secondary index record has
+ already been updated to a different binary value in
+ a char field, but the collation identifies the old
+ and new value anyway! */
+
+ if (dtuple_datas_are_ordering_equal(ientry, entry)) {
mem_heap_free(heap);
diff --git a/innobase/srv/srv0srv.c b/innobase/srv/srv0srv.c
index 1237a788622..45f7b1b6879 100644
--- a/innobase/srv/srv0srv.c
+++ b/innobase/srv/srv0srv.c
@@ -30,6 +30,7 @@ Created 10/8/1995 Heikki Tuuri
#include "ut0mem.h"
#include "os0proc.h"
#include "mem0mem.h"
+#include "mem0pool.h"
#include "sync0sync.h"
#include "sync0ipm.h"
#include "thr0loc.h"
@@ -46,11 +47,14 @@ Created 10/8/1995 Heikki Tuuri
#include "ibuf0ibuf.h"
#include "buf0flu.h"
#include "btr0sea.h"
+#include "dict0load.h"
/* The following counter is incremented whenever there is some user activity
in the server */
ulint srv_activity_count = 0;
+char* srv_main_thread_op_info = "";
+
/* Server parameters which are read from the initfile */
/* The following three are dir paths which are catenated before file
@@ -66,6 +70,11 @@ ulint* srv_data_file_sizes = NULL; /* size in database pages */
ulint* srv_data_file_is_raw_partition = NULL;
+/* If the following is TRUE we do not allow inserts etc. This protects
+the user from forgetting the 'newraw' keyword to my.cnf */
+
+ibool srv_created_new_raw = FALSE;
+
char** srv_log_group_home_dirs = NULL;
ulint srv_n_log_groups = ULINT_MAX;
@@ -75,6 +84,9 @@ ibool srv_log_archive_on = TRUE;
ulint srv_log_buffer_size = ULINT_MAX; /* size in database pages */
ibool srv_flush_log_at_trx_commit = TRUE;
+byte srv_latin1_ordering[256]; /* The sort order table of the latin1
+ character set */
+
ibool srv_use_native_aio = FALSE;
ulint srv_pool_size = ULINT_MAX; /* size in database pages;
@@ -93,6 +105,11 @@ ulint srv_lock_wait_timeout = 1024 * 1024 * 1024;
char* srv_unix_file_flush_method_str = NULL;
ulint srv_unix_file_flush_method = 0;
+/* If the following is != 0 we do not allow inserts etc. This protects
+the user from forgetting innodb_force_recovery keyword to my.cnf */
+
+ulint srv_force_recovery = 0;
+
ibool srv_use_doublewrite_buf = TRUE;
ibool srv_set_thread_priorities = TRUE;
@@ -115,6 +132,10 @@ ulint srv_n_rows_inserted = 0;
ulint srv_n_rows_updated = 0;
ulint srv_n_rows_deleted = 0;
ulint srv_n_rows_read = 0;
+ulint srv_n_rows_inserted_old = 0;
+ulint srv_n_rows_updated_old = 0;
+ulint srv_n_rows_deleted_old = 0;
+ulint srv_n_rows_read_old = 0;
ibool srv_print_innodb_monitor = FALSE;
ibool srv_print_innodb_lock_monitor = FALSE;
@@ -125,6 +146,7 @@ ibool srv_print_innodb_tablespace_monitor = FALSE;
stderr on startup/shutdown
*/
ibool srv_print_verbose_log = TRUE;
+ibool srv_print_innodb_table_monitor = FALSE;
/* The parameters below are obsolete: */
@@ -1745,31 +1767,153 @@ srv_release_mysql_thread_if_suspended(
}
/*************************************************************************
-A thread which wakes up threads whose lock wait may have lasted too long. */
+A thread which wakes up threads whose lock wait may have lasted too long.
+This also prints the info output by various InnoDB monitors. */
#ifndef __WIN__
void*
#else
ulint
#endif
-srv_lock_timeout_monitor_thread(
-/*============================*/
+srv_lock_timeout_and_monitor_thread(
+/*================================*/
/* out: a dummy parameter */
void* arg) /* in: a dummy parameter required by
os_thread_create */
{
+ double time_elapsed;
+ time_t current_time;
+ time_t last_monitor_time;
ibool some_waits;
srv_slot_t* slot;
double wait_time;
ulint i;
UT_NOT_USED(arg);
+ last_monitor_time = time(NULL);
loop:
/* When someone is waiting for a lock, we wake up every second
and check if a timeout has passed for a lock wait */
- os_thread_sleep(1000000);
-
+ os_thread_sleep(1000000);
+
+ /* In case mutex_exit is not a memory barrier, it is
+ theoretically possible some threads are left waiting though
+ the semaphore is already released. Wake up those threads: */
+
+ sync_arr_wake_threads_if_sema_free();
+
+ current_time = time(NULL);
+
+ time_elapsed = difftime(current_time, last_monitor_time);
+
+ if (time_elapsed > 15) {
+
+ last_monitor_time = time(NULL);
+
+ if (srv_print_innodb_monitor) {
+
+ printf("=====================================\n");
+ ut_print_timestamp(stdout);
+
+ printf(" INNODB MONITOR OUTPUT\n"
+ "=====================================\n");
+ printf("----------\n"
+ "SEMAPHORES\n"
+ "----------\n");
+ sync_print();
+ printf("------------\n"
+ "TRANSACTIONS\n"
+ "------------\n");
+ lock_print_info();
+ printf("--------\n"
+ "FILE I/O\n"
+ "--------\n");
+ os_aio_print();
+ printf("-------------\n"
+ "INSERT BUFFER\n"
+ "-------------\n");
+ ibuf_print();
+ printf("---\n"
+ "LOG\n"
+ "---\n");
+ log_print();
+ printf("----------------------\n"
+ "BUFFER POOL AND MEMORY\n"
+ "----------------------\n");
+ printf(
+ "Total memory allocated %lu; in additional pool allocated %lu\n",
+ ut_total_allocated_memory,
+ mem_pool_get_reserved(mem_comm_pool));
+ buf_print_io();
+ printf("--------------\n"
+ "ROW OPERATIONS\n"
+ "--------------\n");
+ printf("InnoDB main thread state: %s\n",
+ srv_main_thread_op_info);
+ printf(
+ "Number of rows inserted %lu, updated %lu, deleted %lu, read %lu\n",
+ srv_n_rows_inserted,
+ srv_n_rows_updated,
+ srv_n_rows_deleted,
+ srv_n_rows_read);
+ printf(
+ "%.2f inserts/s, %.2f updates/s, %.2f deletes/s, %.2f reads/s\n",
+ (srv_n_rows_inserted - srv_n_rows_inserted_old)
+ / time_elapsed,
+ (srv_n_rows_updated - srv_n_rows_updated_old)
+ / time_elapsed,
+ (srv_n_rows_deleted - srv_n_rows_deleted_old)
+ / time_elapsed,
+ (srv_n_rows_read - srv_n_rows_read_old)
+ / time_elapsed);
+
+ srv_n_rows_inserted_old = srv_n_rows_inserted;
+ srv_n_rows_updated_old = srv_n_rows_updated;
+ srv_n_rows_deleted_old = srv_n_rows_deleted;
+ srv_n_rows_read_old = srv_n_rows_read;
+
+ printf("----------------------------\n"
+ "END OF INNODB MONITOR OUTPUT\n"
+ "============================\n");
+
+
+ }
+
+ if (srv_print_innodb_tablespace_monitor) {
+
+ printf("================================================\n");
+
+ ut_print_timestamp(stdout);
+
+ printf(" INNODB TABLESPACE MONITOR OUTPUT\n"
+ "================================================\n");
+
+ fsp_print(0);
+ fprintf(stderr, "Validating tablespace\n");
+ fsp_validate(0);
+ fprintf(stderr, "Validation ok\n");
+ printf("---------------------------------------\n"
+ "END OF INNODB TABLESPACE MONITOR OUTPUT\n"
+ "=======================================\n");
+ }
+
+ if (srv_print_innodb_table_monitor) {
+
+ printf("===========================================\n");
+
+ ut_print_timestamp(stdout);
+
+ printf(" INNODB TABLE MONITOR OUTPUT\n"
+ "===========================================\n");
+ dict_print();
+
+ printf("-----------------------------------\n"
+ "END OF INNODB TABLE MONITOR OUTPUT\n"
+ "==================================\n");
+ }
+ }
+
mutex_enter(&kernel_mutex);
some_waits = FALSE;
@@ -1792,11 +1936,10 @@ loop:
/* Timeout exceeded or a wrap over in system
time counter: cancel the lock request queued
- by the transaction; NOTE that currently only
- a record lock request can be waiting in
- MySQL! */
+ by the transaction and release possible
+ other transactions waiting behind */
- lock_rec_cancel(
+ lock_cancel_waiting_and_release(
thr_get_trx(slot->thr)->wait_lock);
}
}
@@ -1806,11 +1949,15 @@ loop:
mutex_exit(&kernel_mutex);
- if (some_waits) {
+ if (some_waits || srv_print_innodb_monitor
+ || srv_print_innodb_lock_monitor
+ || srv_print_innodb_tablespace_monitor
+ || srv_print_innodb_table_monitor) {
goto loop;
}
- /* No one was waiting for a lock: suspend this thread */
+ /* No one was waiting for a lock and no monitor was active:
+ suspend this thread */
os_event_wait(srv_lock_timeout_thread_event);
@@ -1823,6 +1970,36 @@ loop:
#endif
}
+/*************************************************************************
+A thread which prints warnings about semaphore waits which have lasted
+too long. These can be used to track bugs which cause hangs. */
+
+#ifndef __WIN__
+void*
+#else
+ulint
+#endif
+srv_error_monitor_thread(
+/*=====================*/
+ /* out: a dummy parameter */
+ void* arg) /* in: a dummy parameter required by
+ os_thread_create */
+{
+ UT_NOT_USED(arg);
+loop:
+ os_thread_sleep(10000000);
+
+ sync_array_print_long_waits();
+
+ goto loop;
+
+#ifndef __WIN__
+ return(NULL);
+#else
+ return(0);
+#endif
+}
+
/***********************************************************************
Tells the InnoDB server that there has been activity in the database
and wakes up the master thread if it is suspended (not sleeping). Used
@@ -1861,15 +2038,18 @@ srv_master_thread(
os_thread_create */
{
os_event_t event;
+ time_t last_flush_time;
+ time_t current_time;
ulint old_activity_count;
ulint n_pages_purged;
ulint n_bytes_merged;
ulint n_pages_flushed;
ulint n_bytes_archived;
+ ulint n_ios;
+ ulint n_ios_old;
+ ulint n_ios_very_old;
+ ulint n_pend_ios;
ulint i;
- time_t last_flush_time;
- time_t current_time;
- time_t last_monitor_time;
UT_NOT_USED(arg);
@@ -1882,26 +2062,56 @@ srv_master_thread(
mutex_exit(&kernel_mutex);
os_event_set(srv_sys->operational);
-
- last_monitor_time = time(NULL);
loop:
+ srv_main_thread_op_info = "reserving kernel mutex";
+
+ n_ios_very_old = log_sys->n_log_ios + buf_pool->n_pages_read
+ + buf_pool->n_pages_written;
mutex_enter(&kernel_mutex);
old_activity_count = srv_activity_count;
mutex_exit(&kernel_mutex);
- /* We run purge every 10 seconds, even if the server were active: */
+ /* We run purge and a batch of ibuf_contract every 10 seconds, even
+ if the server were active: */
for (i = 0; i < 10; i++) {
+ n_ios_old = log_sys->n_log_ios + buf_pool->n_pages_read
+ + buf_pool->n_pages_written;
+
+ srv_main_thread_op_info = "sleeping";
os_thread_sleep(1000000);
+ if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND) {
+
+ goto loop;
+ }
+
/* We flush the log once in a second even if no commit
is issued or the we have specified in my.cnf no flush
at transaction commit */
+ srv_main_thread_op_info = "flushing log";
log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP);
+ /* If there were less than 10 i/os during the
+ one second sleep, we assume that there is free
+ disk i/o capacity available, and it makes sense to
+ do an insert buffer merge. */
+
+ n_pend_ios = buf_get_n_pending_ios()
+ + log_sys->n_pending_writes;
+ n_ios = log_sys->n_log_ios + buf_pool->n_pages_read
+ + buf_pool->n_pages_written;
+ if (n_pend_ios < 3 && (n_ios - n_ios_old < 10)) {
+ srv_main_thread_op_info = "doing insert buffer merge";
+ ibuf_contract_for_n_pages(TRUE, 5);
+
+ srv_main_thread_op_info = "flushing log";
+ log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP);
+ }
+
if (srv_activity_count == old_activity_count) {
if (srv_print_thread_releases) {
@@ -1916,28 +2126,48 @@ loop:
printf("Master thread wakes up!\n");
}
+ /* If there were less than 200 i/os during the 10 second period,
+ we assume that there is free disk i/o capacity available, and it
+ makes sense to do a buffer pool flush. */
+
+ n_pend_ios = buf_get_n_pending_ios() + log_sys->n_pending_writes;
+ n_ios = log_sys->n_log_ios + buf_pool->n_pages_read
+ + buf_pool->n_pages_written;
+ if (n_pend_ios < 3 && (n_ios - n_ios_very_old < 200)) {
+
+ srv_main_thread_op_info = "flushing buffer pool pages";
+ buf_flush_batch(BUF_FLUSH_LIST, 50, ut_dulint_max);
+
+ srv_main_thread_op_info = "flushing log";
+ log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP);
+ }
+
+ /* We run a batch of insert buffer merge every 10 seconds,
+ even if the server were active */
+
+ srv_main_thread_op_info = "doing insert buffer merge";
+ ibuf_contract_for_n_pages(TRUE, 5);
+
+ srv_main_thread_op_info = "flushing log";
+ log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP);
+
+ /* We run a full purge every 10 seconds, even if the server
+ were active */
+
n_pages_purged = 1;
last_flush_time = time(NULL);
while (n_pages_purged) {
- /* TODO: replace this by a check if we are running
- out of file space! */
- if (srv_print_innodb_monitor) {
- ut_print_timestamp(stdout);
- printf(" InnoDB starts purge\n");
- }
-
- n_pages_purged = trx_purge();
- if (srv_print_innodb_monitor) {
- ut_print_timestamp(stdout);
- printf(" InnoDB purged %lu pages\n", n_pages_purged);
- }
+ srv_main_thread_op_info = "purging";
+ n_pages_purged = trx_purge();
current_time = time(NULL);
if (difftime(current_time, last_flush_time) > 1) {
+ srv_main_thread_op_info = "flushing log";
+
log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP);
last_flush_time = current_time;
}
@@ -1947,67 +2177,7 @@ background_loop:
/* In this loop we run background operations when the server
is quiet */
- current_time = time(NULL);
-
- if (difftime(current_time, last_monitor_time) > 15) {
-
- last_monitor_time = time(NULL);
-
- if (srv_print_innodb_monitor) {
-
- printf("=====================================\n");
- ut_print_timestamp(stdout);
-
- printf(" INNODB MONITOR OUTPUT\n"
- "=====================================\n");
- printf("------------\n"
- "TRANSACTIONS\n"
- "------------\n");
- lock_print_info();
- printf("-----------------------------------------------\n"
- "CURRENT SEMAPHORES RESERVED AND SEMAPHORE WAITS\n"
- "-----------------------------------------------\n");
- sync_print();
- printf("CURRENT PENDING FILE I/O'S\n"
- "--------------------------\n");
- os_aio_print();
- printf("-----------\n"
- "BUFFER POOL\n"
- "-----------\n");
- buf_print_io();
- printf("--------------\n"
- "ROW OPERATIONS\n"
- "--------------\n");
- printf(
- "Number of rows inserted %lu, updated %lu, deleted %lu, read %lu\n",
- srv_n_rows_inserted,
- srv_n_rows_updated,
- srv_n_rows_deleted,
- srv_n_rows_read);
- printf("Server activity counter %lu\n", srv_activity_count);
- printf("----------------------------\n"
- "END OF INNODB MONITOR OUTPUT\n"
- "============================\n");
- }
-
- if (srv_print_innodb_tablespace_monitor) {
-
- printf("================================================\n");
-
- ut_print_timestamp(stdout);
-
- printf(" INNODB TABLESPACE MONITOR OUTPUT\n"
- "================================================\n");
-
- fsp_print(0);
- fprintf(stderr, "Validating tablespace\n");
- fsp_validate(0);
- fprintf(stderr, "Validation ok\n");
- printf("---------------------------------------\n"
- "END OF INNODB TABLESPACE MONITOR OUTPUT\n"
- "=======================================\n");
- }
- }
+ srv_main_thread_op_info = "reserving kernel mutex";
mutex_enter(&kernel_mutex);
if (srv_activity_count != old_activity_count) {
@@ -2020,17 +2190,11 @@ background_loop:
/* The server has been quiet for a while: start running background
operations */
- if (srv_print_innodb_monitor) {
- ut_print_timestamp(stdout);
- printf(" InnoDB starts purge\n");
- }
+ srv_main_thread_op_info = "purging";
n_pages_purged = trx_purge();
- if (srv_print_innodb_monitor) {
- ut_print_timestamp(stdout);
- printf(" InnoDB purged %lu pages\n", n_pages_purged);
- }
+ srv_main_thread_op_info = "reserving kernel mutex";
mutex_enter(&kernel_mutex);
if (srv_activity_count != old_activity_count) {
@@ -2039,17 +2203,10 @@ background_loop:
}
mutex_exit(&kernel_mutex);
- if (srv_print_innodb_monitor) {
- ut_print_timestamp(stdout);
- printf(" InnoDB starts insert buffer merge\n");
- }
+ srv_main_thread_op_info = "doing insert buffer merge";
+ n_bytes_merged = ibuf_contract_for_n_pages(TRUE, 20);
- n_bytes_merged = ibuf_contract(TRUE);
-
- if (srv_print_innodb_monitor) {
- ut_print_timestamp(stdout);
- printf(" InnoDB merged %lu bytes\n", n_bytes_merged);
- }
+ srv_main_thread_op_info = "reserving kernel mutex";
mutex_enter(&kernel_mutex);
if (srv_activity_count != old_activity_count) {
@@ -2058,17 +2215,10 @@ background_loop:
}
mutex_exit(&kernel_mutex);
- if (srv_print_innodb_monitor) {
- ut_print_timestamp(stdout);
- printf(" InnoDB (main thread) starts buffer pool flush\n");
- }
-
+ srv_main_thread_op_info = "flushing buffer pool pages";
n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max);
- if (srv_print_innodb_monitor) {
- ut_print_timestamp(stdout);
- printf(" InnoDB flushed %lu pages\n", n_pages_flushed);
- }
+ srv_main_thread_op_info = "reserving kernel mutex";
mutex_enter(&kernel_mutex);
if (srv_activity_count != old_activity_count) {
@@ -2077,16 +2227,23 @@ background_loop:
}
mutex_exit(&kernel_mutex);
+ srv_main_thread_op_info = "waiting for buffer pool flush to end";
buf_flush_wait_batch_end(BUF_FLUSH_LIST);
+ srv_main_thread_op_info = "making checkpoint";
+
log_checkpoint(TRUE, FALSE);
+ srv_main_thread_op_info = "reserving kernel mutex";
+
mutex_enter(&kernel_mutex);
if (srv_activity_count != old_activity_count) {
mutex_exit(&kernel_mutex);
goto loop;
}
mutex_exit(&kernel_mutex);
+
+ srv_main_thread_op_info = "archiving log (if log archive is on)";
log_archive_do(FALSE, &n_bytes_archived);
@@ -2104,12 +2261,16 @@ background_loop:
/* There is no work for background operations either: suspend
master thread to wait for more server activity */
+ srv_main_thread_op_info = "suspending";
+
mutex_enter(&kernel_mutex);
event = srv_suspend_thread();
mutex_exit(&kernel_mutex);
+ srv_main_thread_op_info = "waiting for server activity";
+
os_event_wait(event);
goto loop;
diff --git a/innobase/srv/srv0start.c b/innobase/srv/srv0start.c
index 2e9bade8b35..bdc8225a14f 100644
--- a/innobase/srv/srv0start.c
+++ b/innobase/srv/srv0start.c
@@ -73,7 +73,10 @@ os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 5];
#define SRV_N_PENDING_IOS_PER_THREAD OS_AIO_N_PENDING_IOS_PER_THREAD
#define SRV_MAX_N_PENDING_SYNC_IOS 100
-#define SRV_MAX_N_OPEN_FILES 25
+/* The following limit may be too big in some old operating systems:
+we may get an assertion failure in os0file.c */
+
+#define SRV_MAX_N_OPEN_FILES 500
#define SRV_LOG_SPACE_FIRST_ID 1000000000
@@ -315,7 +318,12 @@ open_or_create_data_files(
ulint size_high;
char name[10000];
- ut_a(srv_n_data_files < 1000);
+ if (srv_n_data_files >= 1000) {
+ fprintf(stderr, "InnoDB: can only have < 1000 data files\n"
+ "InnoDB: you have defined %lu\n",
+ srv_n_data_files);
+ return(DB_ERROR);
+ }
*sum_of_new_sizes = 0;
@@ -336,6 +344,8 @@ open_or_create_data_files(
/* The partition is opened, not created; then it is
written over */
+ srv_created_new_raw = TRUE;
+
files[i] = os_file_create(
name, OS_FILE_OPEN, OS_FILE_NORMAL,
OS_DATA_FILE, &ret);
@@ -375,6 +385,7 @@ open_or_create_data_files(
if (!ret) {
fprintf(stderr,
"InnoDB: Error in opening %s\n", name);
+ os_file_get_last_error();
return(DB_ERROR);
}
@@ -537,9 +548,6 @@ innobase_start_or_create_for_mysql(void)
/*====================================*/
/* out: DB_SUCCESS or error code */
{
- ulint i;
- ulint k;
- ulint err;
ibool create_new_db;
ibool log_file_created;
ibool log_created = FALSE;
@@ -550,6 +558,9 @@ innobase_start_or_create_for_mysql(void)
ulint max_arch_log_no;
ibool start_archive;
ulint sum_of_new_sizes;
+ ulint err;
+ ulint i;
+ ulint k;
mtr_t mtr;
log_do_write = TRUE;
@@ -866,17 +877,19 @@ innobase_start_or_create_for_mysql(void)
SRV_MAX_N_IO_THREADS); */
}
- /* Create the master thread which monitors the database
- server, and does purge and other utility operations */
-
- os_thread_create(&srv_master_thread, NULL, thread_ids + 1 +
- SRV_MAX_N_IO_THREADS);
/* fprintf(stderr, "Max allowed record size %lu\n",
page_get_free_space_of_empty() / 2); */
- /* Create the thread which watches the timeouts for lock waits */
- os_thread_create(&srv_lock_timeout_monitor_thread, NULL,
+ /* Create the thread which watches the timeouts for lock waits
+ and prints InnoDB monitor info */
+
+ os_thread_create(&srv_lock_timeout_and_monitor_thread, NULL,
thread_ids + 2 + SRV_MAX_N_IO_THREADS);
+
+ /* Create the thread which warns of long semaphore waits */
+ os_thread_create(&srv_error_monitor_thread, NULL,
+ thread_ids + 3 + SRV_MAX_N_IO_THREADS);
+
srv_was_started = TRUE;
srv_is_being_started = FALSE;
@@ -886,6 +899,17 @@ innobase_start_or_create_for_mysql(void)
trx_sys_create_doublewrite_buf();
}
+ err = dict_create_or_check_foreign_constraint_tables();
+
+ if (err != DB_SUCCESS) {
+ return((int)DB_ERROR);
+ }
+
+ /* Create the master thread which monitors the database
+ server, and does purge and other utility operations */
+
+ os_thread_create(&srv_master_thread, NULL, thread_ids + 1 +
+ SRV_MAX_N_IO_THREADS);
/* buf_debug_prints = TRUE; */
if (srv_print_verbose_log)
@@ -905,12 +929,16 @@ innobase_shutdown_for_mysql(void)
/* out: DB_SUCCESS or error code */
{
if (!srv_was_started) {
- if (srv_is_being_started) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: shutting down a not properly started database\n");
- }
- return(DB_SUCCESS);
+ if (srv_is_being_started) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: Warning: shutting down a not properly started\n");
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: or created database!\n");
+ }
+
+ return(DB_SUCCESS);
}
/* Flush buffer pool to disk, write the current lsn to
@@ -919,6 +947,6 @@ innobase_shutdown_for_mysql(void)
logs_empty_and_mark_files_at_shutdown();
ut_free_all_mem();
-
+
return((int) DB_SUCCESS);
}
diff --git a/innobase/sync/sync0arr.c b/innobase/sync/sync0arr.c
index 4183f3f1c4c..e4c351b9d21 100644
--- a/innobase/sync/sync0arr.c
+++ b/innobase/sync/sync0arr.c
@@ -14,6 +14,7 @@ Created 9/5/1995 Heikki Tuuri
#include "sync0sync.h"
#include "sync0rw.h"
#include "os0sync.h"
+#include "srv0srv.h"
/*
WAIT ARRAY
@@ -64,6 +65,8 @@ struct sync_cell_struct {
ibool event_set; /* TRUE if the event is set */
os_event_t event; /* operating system event
semaphore handle */
+ time_t reservation_time;/* time when the thread reserved
+ the wait cell */
};
/* NOTE: It is allowed for a thread to wait
@@ -321,16 +324,12 @@ sync_array_reserve_cell(
sync_array_t* arr, /* in: wait array */
void* object, /* in: pointer to the object to wait for */
ulint type, /* in: lock request type */
- #ifdef UNIV_SYNC_DEBUG
- char* file, /* in: in debug version file where
- requested */
- ulint line, /* in: in the debug version line where
- requested */
- #endif
+ char* file, /* in: file where requested */
+ ulint line, /* in: line where requested */
ulint* index) /* out: index of the reserved cell */
{
- ulint i;
sync_cell_t* cell;
+ ulint i;
ut_a(object);
ut_a(index);
@@ -350,18 +349,15 @@ sync_array_reserve_cell(
sync_cell_event_reset(cell);
}
+ cell->reservation_time = time(NULL);
+ cell->thread = os_thread_get_curr_id();
+
cell->wait_object = object;
cell->request_type = type;
- cell->thread = os_thread_get_curr_id();
cell->waiting = FALSE;
- #ifdef UNIV_SYNC_DEBUG
cell->file = file;
cell->line = line;
- #else
- cell->file = "NOT KNOWN";
- cell->line = 0;
- #endif
arr->n_reserved++;
@@ -436,6 +432,7 @@ static
void
sync_array_cell_print(
/*==================*/
+ FILE* file, /* in: file where to print */
sync_cell_t* cell) /* in: sync cell */
{
mutex_t* mutex;
@@ -445,53 +442,63 @@ sync_array_cell_print(
type = cell->request_type;
+ fprintf(file,
+"--Thread %lu has waited at %s line %lu for %.2f seconds the semaphore:\n",
+ (ulint)cell->thread, cell->file, cell->line,
+ difftime(time(NULL), cell->reservation_time));
+
if (type == SYNC_MUTEX) {
- str = "MUTEX ENTER";
mutex = (mutex_t*)cell->wait_object;
- printf("Mutex created in file %s line %lu",
- mutex->cfile_name, mutex->cline);
+ fprintf(file,
+ "Mutex at %lx created file %s line %lu, lock var %lu\n",
+ (ulint)mutex, mutex->cfile_name, mutex->cline,
+ mutex->lock_word);
+ fprintf(file,
+ "Last time reserved in file %s line %lu, waiters flag %lu\n",
+ mutex->file_name, mutex->line, mutex->waiters);
+
} else if (type == RW_LOCK_EX || type == RW_LOCK_SHARED) {
if (type == RW_LOCK_EX) {
- str = "X-LOCK";
+ fprintf(file, "X-lock on");
} else {
- str = "S_LOCK";
+ fprintf(file, "S-lock on");
}
rwlock = (rw_lock_t*)cell->wait_object;
- printf("Rw-latch created in file %s line %lu",
- rwlock->cfile_name, rwlock->cline);
+ fprintf(file, " RW-latch at %lx created in file %s line %lu\n",
+ (ulint)rwlock, rwlock->cfile_name, rwlock->cline);
if (rwlock->writer != RW_LOCK_NOT_LOCKED) {
- printf(" writer reserved with %lu", rwlock->writer);
+ fprintf(file,
+ "a writer (thread id %lu) has reserved it in mode",
+ (ulint)rwlock->writer_thread);
+ if (rwlock->writer == RW_LOCK_EX) {
+ fprintf(file, " exclusive\n");
+ } else {
+ fprintf(file, " wait exclusive\n");
+ }
}
- if (rwlock->writer == RW_LOCK_EX) {
- printf(" reserv. thread id %lu",
- (ulint)rwlock->writer_thread);
- }
-
- if (rwlock->reader_count > 0) {
- printf(" readers %lu", rwlock->reader_count);
- }
+ fprintf(file, "number of readers %lu, waiters flag %lu\n",
+ rwlock->reader_count, rwlock->waiters);
+
+ fprintf(file, "Last time read locked in file %s line %lu\n",
+ rwlock->last_s_file_name, rwlock->last_s_line);
+ fprintf(file, "Last time write locked in file %s line %lu\n",
+ rwlock->last_x_file_name, rwlock->last_x_line);
} else {
ut_error;
}
- printf(" at addr %lx waited for by thread %lu op. %s file %s line %lu ",
- (ulint)cell->wait_object,
- (ulint)cell->thread,
- str, cell->file, cell->line);
if (!cell->waiting) {
- printf("WAIT ENDED ");
+ fprintf(file, "wait has ended\n");
}
if (cell->event_set) {
- printf("EVENT SET");
+ fprintf(file, "wait is ending\n");
}
-
- printf("\n");
}
/**********************************************************************
@@ -620,14 +627,15 @@ sync_array_detect_deadlock(
released the mutex: in this case no deadlock
can occur, as the wait array cannot contain
a thread with ID_UNDEFINED value. */
+
ret = sync_array_deadlock_step(arr, start, thread, 0,
- depth);
+ depth);
if (ret) {
printf(
"Mutex %lx owned by thread %lu file %s line %lu\n",
(ulint)mutex, mutex->thread_id,
mutex->file_name, mutex->line);
- sync_array_cell_print(cell);
+ sync_array_cell_print(stdout, cell);
return(TRUE);
}
}
@@ -636,11 +644,11 @@ sync_array_detect_deadlock(
} else if (cell->request_type == RW_LOCK_EX) {
- lock = cell->wait_object;
+ lock = cell->wait_object;
- debug = UT_LIST_GET_FIRST(lock->debug_list);
+ debug = UT_LIST_GET_FIRST(lock->debug_list);
- while (debug != NULL) {
+ while (debug != NULL) {
thread = debug->thread_id;
@@ -661,23 +669,23 @@ sync_array_detect_deadlock(
if (ret) {
printf("rw-lock %lx ", (ulint) lock);
rw_lock_debug_print(debug);
- sync_array_cell_print(cell);
+ sync_array_cell_print(stdout, cell);
return(TRUE);
}
}
debug = UT_LIST_GET_NEXT(list, debug);
- }
+ }
- return(FALSE);
+ return(FALSE);
} else if (cell->request_type == RW_LOCK_SHARED) {
- lock = cell->wait_object;
- debug = UT_LIST_GET_FIRST(lock->debug_list);
+ lock = cell->wait_object;
+ debug = UT_LIST_GET_FIRST(lock->debug_list);
- while (debug != NULL) {
+ while (debug != NULL) {
thread = debug->thread_id;
@@ -694,16 +702,16 @@ sync_array_detect_deadlock(
if (ret) {
printf("rw-lock %lx ", (ulint) lock);
rw_lock_debug_print(debug);
- sync_array_cell_print(cell);
+ sync_array_cell_print(stdout, cell);
return(TRUE);
}
}
debug = UT_LIST_GET_NEXT(list, debug);
- }
+ }
- return(FALSE);
+ return(FALSE);
} else {
ut_error;
@@ -714,6 +722,55 @@ sync_array_detect_deadlock(
}
/**********************************************************************
+Determines if we can wake up the thread waiting for a sempahore. */
+static
+ibool
+sync_arr_cell_can_wake_up(
+/*======================*/
+ sync_cell_t* cell) /* in: cell to search */
+{
+ mutex_t* mutex;
+ rw_lock_t* lock;
+
+ if (cell->request_type == SYNC_MUTEX) {
+
+ mutex = cell->wait_object;
+
+ if (mutex_get_lock_word(mutex) == 0) {
+
+ return(TRUE);
+ }
+
+ } else if (cell->request_type == RW_LOCK_EX) {
+
+ lock = cell->wait_object;
+
+ if (rw_lock_get_reader_count(lock) == 0
+ && rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED) {
+
+ return(TRUE);
+ }
+
+ if (rw_lock_get_reader_count(lock) == 0
+ && rw_lock_get_writer(lock) == RW_LOCK_WAIT_EX
+ && lock->writer_thread == cell->thread) {
+
+ return(TRUE);
+ }
+
+ } else if (cell->request_type == RW_LOCK_SHARED) {
+ lock = cell->wait_object;
+
+ if (rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED) {
+
+ return(TRUE);
+ }
+ }
+
+ return(FALSE);
+}
+
+/**********************************************************************
Frees the cell. NOTE! sync_array_wait_event frees the cell
automatically! */
@@ -740,9 +797,8 @@ sync_array_free_cell(
}
/**************************************************************************
-Looks for the cells in the wait array which refer
-to the wait object specified,
-and sets their corresponding events to the signaled state. In this
+Looks for the cells in the wait array which refer to the wait object
+specified, and sets their corresponding events to the signaled state. In this
way releases the threads waiting for the object to contend for the object.
It is possible that no such cell is found, in which case does nothing. */
@@ -783,6 +839,88 @@ sync_array_signal_object(
}
/**************************************************************************
+If the wakeup algorithm does not work perfectly at semaphore relases,
+this function will do the waking (see the comment in mutex_exit). This
+function should be called about every 1 second in the server. */
+
+void
+sync_arr_wake_threads_if_sema_free(void)
+/*====================================*/
+{
+ sync_array_t* arr = sync_primary_wait_array;
+ sync_cell_t* cell;
+ ulint count;
+ ulint i;
+
+ sync_array_enter(arr);
+
+ i = 0;
+ count = 0;
+
+ while (count < arr->n_reserved) {
+
+ cell = sync_array_get_nth_cell(arr, i);
+
+ if (cell->wait_object != NULL) {
+
+ count++;
+
+ if (sync_arr_cell_can_wake_up(cell)) {
+
+ sync_cell_event_set(cell);
+ }
+ }
+
+ i++;
+ }
+
+ sync_array_exit(arr);
+}
+
+/**************************************************************************
+Prints warnings of long semaphore waits to stderr. Currently > 120 sec. */
+
+void
+sync_array_print_long_waits(void)
+/*=============================*/
+{
+ sync_cell_t* cell;
+ ibool old_val;
+ ibool noticed = FALSE;
+ ulint i;
+
+ for (i = 0; i < sync_primary_wait_array->n_cells; i++) {
+
+ cell = sync_array_get_nth_cell(sync_primary_wait_array, i);
+
+ if (cell->wait_object != NULL
+ && difftime(time(NULL), cell->reservation_time) > 120) {
+
+ fprintf(stderr,
+ "InnoDB: Warning: a long semaphore wait:\n");
+ sync_array_cell_print(stderr, cell);
+
+ noticed = TRUE;
+ }
+ }
+
+ if (noticed) {
+ fprintf(stderr,
+"InnoDB: ###### Starts InnoDB Monitor for 30 secs to print diagnostic info:\n");
+ old_val = srv_print_innodb_monitor;
+
+ srv_print_innodb_monitor = TRUE;
+ os_event_set(srv_lock_timeout_thread_event);
+
+ os_thread_sleep(30000000);
+
+ srv_print_innodb_monitor = old_val;
+ fprintf(stderr,
+"InnoDB: ###### Diagnostic info printed to the standard output\n");
+ }
+}
+
+/**************************************************************************
Prints info of the wait array. */
static
void
@@ -795,9 +933,8 @@ sync_array_output_info(
ulint count;
ulint i;
- printf("-----------------------------------------------------\n");
- printf("SYNC ARRAY INFO: reservation count %ld, signal count %ld\n",
- arr->res_count, arr->sg_count);
+ printf("OS WAIT ARRAY INFO: reservation count %ld, signal count %ld\n",
+ arr->res_count, arr->sg_count);
i = 0;
count = 0;
@@ -807,7 +944,7 @@ sync_array_output_info(
if (cell->wait_object != NULL) {
count++;
- sync_array_cell_print(cell);
+ sync_array_cell_print(stdout, cell);
}
i++;
diff --git a/innobase/sync/sync0rw.c b/innobase/sync/sync0rw.c
index dc49ce2197e..1ef2920618f 100644
--- a/innobase/sync/sync0rw.c
+++ b/innobase/sync/sync0rw.c
@@ -17,11 +17,13 @@ Created 9/11/1995 Heikki Tuuri
ulint rw_s_system_call_count = 0;
ulint rw_s_spin_wait_count = 0;
+ulint rw_s_os_wait_count = 0;
ulint rw_s_exit_count = 0;
ulint rw_x_system_call_count = 0;
ulint rw_x_spin_wait_count = 0;
+ulint rw_x_os_wait_count = 0;
ulint rw_x_exit_count = 0;
@@ -95,8 +97,7 @@ rw_lock_create_func(
mutex_create(rw_lock_get_mutex(lock));
mutex_set_level(rw_lock_get_mutex(lock), SYNC_NO_ORDER_CHECK);
- ut_memcpy(&(lock->mutex.cfile_name), cfile_name,
- ut_min(RW_CNAME_LEN - 1, ut_strlen(cfile_name)));
+ lock->mutex.cfile_name = cfile_name;
lock->mutex.cline = cline;
rw_lock_set_waiters(lock, 0);
@@ -111,11 +112,14 @@ rw_lock_create_func(
lock->magic_n = RW_LOCK_MAGIC_N;
lock->level = SYNC_LEVEL_NONE;
- ut_memcpy(&(lock->cfile_name), cfile_name,
- ut_min(RW_CNAME_LEN - 1, ut_strlen(cfile_name)));
- lock->cfile_name[RW_CNAME_LEN - 1] = '\0';
+ lock->cfile_name = cfile_name;
lock->cline = cline;
+ lock->last_s_file_name = "not yet reserved";
+ lock->last_x_file_name = "not yet reserved";
+ lock->last_s_line = 0;
+ lock->last_x_line = 0;
+
mutex_enter(&rw_lock_list_mutex);
UT_LIST_ADD_FIRST(list, rw_lock_list, lock);
@@ -186,14 +190,11 @@ for the lock, before suspending the thread. */
void
rw_lock_s_lock_spin(
/*================*/
- rw_lock_t* lock /* in: pointer to rw-lock */
- #ifdef UNIV_SYNC_DEBUG
- ,ulint pass, /* in: pass value; != 0, if the lock
+ rw_lock_t* lock, /* in: pointer to rw-lock */
+ ulint pass, /* in: pass value; != 0, if the lock
will be passed to another thread to unlock */
char* file_name, /* in: file name where lock requested */
- ulint line /* in: line where requested */
- #endif
-)
+ ulint line) /* in: line where requested */
{
ulint index; /* index of the reserved wait cell */
ulint i; /* spin round count */
@@ -203,7 +204,7 @@ rw_lock_s_lock_spin(
lock_loop:
rw_s_spin_wait_count++;
- /* Spin waiting for the writer field to become free */
+ /* Spin waiting for the writer field to become free */
i = 0;
while (rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED
@@ -223,19 +224,14 @@ lock_loop:
printf(
"Thread %lu spin wait rw-s-lock at %lx cfile %s cline %lu rnds %lu\n",
os_thread_get_curr_id(), (ulint)lock,
- &(lock->cfile_name), lock->cline, i);
+ lock->cfile_name, lock->cline, i);
}
mutex_enter(rw_lock_get_mutex(lock));
/* We try once again to obtain the lock */
- if (TRUE == rw_lock_s_lock_low(lock
- #ifdef UNIV_SYNC_DEBUG
- , pass, file_name,
- line
- #endif
- )) {
+ if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) {
mutex_exit(rw_lock_get_mutex(lock));
return; /* Success */
@@ -247,9 +243,7 @@ lock_loop:
sync_array_reserve_cell(sync_primary_wait_array,
lock, RW_LOCK_SHARED,
- #ifdef UNIV_SYNC_DEBUG
file_name, line,
- #endif
&index);
rw_lock_set_waiters(lock, 1);
@@ -260,12 +254,13 @@ lock_loop:
printf(
"Thread %lu OS wait rw-s-lock at %lx cfile %s cline %lu\n",
os_thread_get_curr_id(), (ulint)lock,
- &(lock->cfile_name), lock->cline);
+ lock->cfile_name, lock->cline);
}
rw_s_system_call_count++;
+ rw_s_os_wait_count++;
- sync_array_wait_event(sync_primary_wait_array, index);
+ sync_array_wait_event(sync_primary_wait_array, index);
goto lock_loop;
}
@@ -307,13 +302,10 @@ rw_lock_x_lock_low(
not succeed, RW_LOCK_EX if success,
RW_LOCK_WAIT_EX, if got wait reservation */
rw_lock_t* lock, /* in: pointer to rw-lock */
- ulint pass /* in: pass value; != 0, if the lock will
+ ulint pass, /* in: pass value; != 0, if the lock will
be passed to another thread to unlock */
- #ifdef UNIV_SYNC_DEBUG
- ,char* file_name, /* in: file name where lock requested */
- ulint line /* in: line where requested */
- #endif
-)
+ char* file_name,/* in: file name where lock requested */
+ ulint line) /* in: line where requested */
{
ut_ad(mutex_own(rw_lock_get_mutex(lock)));
@@ -330,6 +322,8 @@ rw_lock_x_lock_low(
rw_lock_add_debug_info(lock, pass, RW_LOCK_EX,
file_name, line);
#endif
+ lock->last_x_file_name = file_name;
+ lock->last_x_line = line;
/* Locking succeeded, we may return */
return(RW_LOCK_EX);
@@ -364,6 +358,9 @@ rw_lock_x_lock_low(
file_name, line);
#endif
+ lock->last_x_file_name = file_name;
+ lock->last_x_line = line;
+
/* Locking succeeded, we may return */
return(RW_LOCK_EX);
}
@@ -382,6 +379,9 @@ rw_lock_x_lock_low(
line);
#endif
+ lock->last_x_file_name = file_name;
+ lock->last_x_line = line;
+
/* Locking succeeded, we may return */
return(RW_LOCK_EX);
}
@@ -404,13 +404,10 @@ void
rw_lock_x_lock_func(
/*================*/
rw_lock_t* lock, /* in: pointer to rw-lock */
- ulint pass /* in: pass value; != 0, if the lock will
+ ulint pass, /* in: pass value; != 0, if the lock will
be passed to another thread to unlock */
- #ifdef UNIV_SYNC_DEBUG
- ,char* file_name, /* in: file name where lock requested */
- ulint line /* in: line where requested */
- #endif
-)
+ char* file_name,/* in: file name where lock requested */
+ ulint line) /* in: line where requested */
{
ulint index; /* index of the reserved wait cell */
ulint state; /* lock state acquired */
@@ -422,11 +419,7 @@ lock_loop:
/* Acquire the mutex protecting the rw-lock fields */
mutex_enter_fast(&(lock->mutex));
- state = rw_lock_x_lock_low(lock, pass
- #ifdef UNIV_SYNC_DEBUG
- ,file_name, line
- #endif
- );
+ state = rw_lock_x_lock_low(lock, pass, file_name, line);
mutex_exit(&(lock->mutex));
@@ -469,6 +462,7 @@ lock_loop:
os_thread_yield();
}
} else {
+ i = 0; /* Eliminate a compiler warning */
ut_error;
}
@@ -476,7 +470,7 @@ lock_loop:
printf(
"Thread %lu spin wait rw-x-lock at %lx cfile %s cline %lu rnds %lu\n",
os_thread_get_curr_id(), (ulint)lock,
- &(lock->cfile_name), lock->cline, i);
+ lock->cfile_name, lock->cline, i);
}
rw_x_spin_wait_count++;
@@ -486,11 +480,7 @@ lock_loop:
mutex_enter(rw_lock_get_mutex(lock));
- state = rw_lock_x_lock_low(lock, pass
- #ifdef UNIV_SYNC_DEBUG
- ,file_name, line
- #endif
- );
+ state = rw_lock_x_lock_low(lock, pass, file_name, line);
if (state == RW_LOCK_EX) {
mutex_exit(rw_lock_get_mutex(lock));
@@ -502,9 +492,7 @@ lock_loop:
sync_array_reserve_cell(sync_primary_wait_array,
lock, RW_LOCK_EX,
- #ifdef UNIV_SYNC_DEBUG
file_name, line,
- #endif
&index);
rw_lock_set_waiters(lock, 1);
@@ -514,11 +502,12 @@ lock_loop:
if (srv_print_latch_waits) {
printf(
"Thread %lu OS wait for rw-x-lock at %lx cfile %s cline %lu\n",
- os_thread_get_curr_id(), (ulint)lock, &(lock->cfile_name),
+ os_thread_get_curr_id(), (ulint)lock, lock->cfile_name,
lock->cline);
}
rw_x_system_call_count++;
+ rw_x_os_wait_count++;
sync_array_wait_event(sync_primary_wait_array, index);
@@ -537,8 +526,8 @@ rw_lock_debug_mutex_enter(void)
/*==========================*/
{
loop:
- if (0 == mutex_enter_nowait(&rw_lock_debug_mutex)) {
-
+ if (0 == mutex_enter_nowait(&rw_lock_debug_mutex,
+ IB__FILE__, __LINE__)) {
return;
}
@@ -546,8 +535,8 @@ loop:
rw_lock_debug_waiters = TRUE;
- if (0 == mutex_enter_nowait(&rw_lock_debug_mutex)) {
-
+ if (0 == mutex_enter_nowait(&rw_lock_debug_mutex,
+ IB__FILE__, __LINE__)) {
return;
}
@@ -747,8 +736,6 @@ rw_lock_list_print_info(void)
/*=========================*/
{
#ifndef UNIV_SYNC_DEBUG
- printf(
- "Sorry, cannot give rw-lock list info in non-debug version!\n");
#else
rw_lock_t* lock;
ulint count = 0;
@@ -756,8 +743,9 @@ rw_lock_list_print_info(void)
mutex_enter(&rw_lock_list_mutex);
- printf("----------------------------------------------\n");
- printf("RW-LOCK INFO\n");
+ printf("-------------\n");
+ printf("RW-LATCH INFO\n");
+ printf("-------------\n");
lock = UT_LIST_GET_FIRST(rw_lock_list);
@@ -810,9 +798,9 @@ rw_lock_print(
ulint count = 0;
rw_lock_debug_t* info;
- printf("-------------------------------------------------\n");
- printf("RW-LOCK INFO\n");
- printf("RW-LOCK: %lx ", (ulint)lock);
+ printf("-------------\n");
+ printf("RW-LATCH INFO\n");
+ printf("RW-LATCH: %lx ", (ulint)lock);
if ((rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED)
|| (rw_lock_get_reader_count(lock) != 0)
diff --git a/innobase/sync/sync0sync.c b/innobase/sync/sync0sync.c
index f0dbe145098..8b2a39e15eb 100644
--- a/innobase/sync/sync0sync.c
+++ b/innobase/sync/sync0sync.c
@@ -119,6 +119,7 @@ ulint mutex_system_call_count = 0;
ulint mutex_spin_round_count = 0;
ulint mutex_spin_wait_count = 0;
+ulint mutex_os_wait_count = 0;
ulint mutex_exit_count = 0;
/* The global array of wait cells for implementation of the database's own
@@ -228,12 +229,10 @@ mutex_create_func(
mutex_set_waiters(mutex, 0);
mutex->magic_n = MUTEX_MAGIC_N;
mutex->line = 0;
- mutex->file_name = "FILE NOT KNOWN";
+ mutex->file_name = "not yet reserved";
mutex->thread_id = ULINT_UNDEFINED;
mutex->level = SYNC_LEVEL_NONE;
- ut_memcpy(&(mutex->cfile_name), cfile_name,
- ut_min(MUTEX_CNAME_LEN - 1, ut_strlen(cfile_name)));
- mutex->cfile_name[MUTEX_CNAME_LEN - 1] = '\0';
+ mutex->cfile_name = cfile_name;
mutex->cline = cline;
/* Check that lock_word is aligned; this is important on Intel */
@@ -291,17 +290,23 @@ immediately, returns with return value 1. */
ulint
mutex_enter_nowait(
/*===============*/
- /* out: 0 if succeed, 1 if not */
- mutex_t* mutex) /* in: pointer to mutex */
+ /* out: 0 if succeed, 1 if not */
+ mutex_t* mutex, /* in: pointer to mutex */
+ char* file_name, /* in: file name where mutex
+ requested */
+ ulint line) /* in: line where requested */
{
ut_ad(mutex_validate(mutex));
if (!mutex_test_and_set(mutex)) {
#ifdef UNIV_SYNC_DEBUG
- mutex_set_debug_info(mutex, IB__FILE__, __LINE__);
+ mutex_set_debug_info(mutex, file_name, line);
#endif
+ mutex->file_name = file_name;
+ mutex->line = line;
+
return(0); /* Succeeded! */
}
@@ -349,13 +354,9 @@ for the mutex before suspending the thread. */
void
mutex_spin_wait(
/*============*/
- mutex_t* mutex /* in: pointer to mutex */
-
- #ifdef UNIV_SYNC_DEBUG
- ,char* file_name, /* in: file name where mutex requested */
- ulint line /* in: line where requested */
- #endif
-)
+ mutex_t* mutex, /* in: pointer to mutex */
+ char* file_name, /* in: file name where mutex requested */
+ ulint line) /* in: line where requested */
{
ulint index; /* index of the reserved wait cell */
ulint i; /* spin round count */
@@ -391,7 +392,7 @@ spin_loop:
if (srv_print_latch_waits) {
printf(
"Thread %lu spin wait mutex at %lx cfile %s cline %lu rnds %lu\n",
- os_thread_get_curr_id(), (ulint)mutex, &(mutex->cfile_name),
+ os_thread_get_curr_id(), (ulint)mutex, mutex->cfile_name,
mutex->cline, i);
}
@@ -404,6 +405,9 @@ spin_loop:
mutex_set_debug_info(mutex, file_name, line);
#endif
+ mutex->file_name = file_name;
+ mutex->line = line;
+
return;
}
@@ -423,9 +427,7 @@ spin_loop:
sync_array_reserve_cell(sync_primary_wait_array, mutex,
SYNC_MUTEX,
- #ifdef UNIV_SYNC_DEBUG
file_name, line,
- #endif
&index);
mutex_system_call_count++;
@@ -438,7 +440,9 @@ spin_loop:
mutex_set_waiters(mutex, 1);
- if (mutex_test_and_set(mutex) == 0) {
+ /* Try to reserve still a few times */
+ for (i = 0; i < 4; i++) {
+ if (mutex_test_and_set(mutex) == 0) {
/* Succeeded! Free the reserved wait cell */
@@ -448,6 +452,9 @@ spin_loop:
mutex_set_debug_info(mutex, file_name, line);
#endif
+ mutex->file_name = file_name;
+ mutex->line = line;
+
if (srv_print_latch_waits) {
printf(
"Thread %lu spin wait succeeds at 2: mutex at %lx\n",
@@ -459,6 +466,7 @@ spin_loop:
/* Note that in this case we leave the waiters field
set to 1. We cannot reset it to zero, as we do not know
if there are other waiters. */
+ }
}
/* Now we know that there has been some thread holding the mutex
@@ -468,11 +476,13 @@ spin_loop:
if (srv_print_latch_waits) {
printf(
"Thread %lu OS wait mutex at %lx cfile %s cline %lu rnds %lu\n",
- os_thread_get_curr_id(), (ulint)mutex, &(mutex->cfile_name),
+ os_thread_get_curr_id(), (ulint)mutex, mutex->cfile_name,
mutex->cline, i);
}
mutex_system_call_count++;
+ mutex_os_wait_count++;
+
sync_array_wait_event(sync_primary_wait_array, index);
goto mutex_loop;
@@ -578,7 +588,6 @@ mutex_list_print_info(void)
/*=======================*/
{
#ifndef UNIV_SYNC_DEBUG
- printf("Sorry, cannot give mutex list info in non-debug version!\n");
#else
mutex_t* mutex;
char* file_name;
@@ -586,8 +595,9 @@ mutex_list_print_info(void)
os_thread_id_t thread_id;
ulint count = 0;
- printf("-----------------------------------------------\n");
+ printf("----------\n");
printf("MUTEX INFO\n");
+ printf("----------\n");
mutex_enter(&mutex_list_mutex);
@@ -597,10 +607,10 @@ mutex_list_print_info(void)
count++;
if (mutex_get_lock_word(mutex) != 0) {
-
- mutex_get_debug_info(mutex, &file_name, &line, &thread_id);
-
- printf("Locked mutex: addr %lx thread %ld file %s line %ld\n",
+ mutex_get_debug_info(mutex, &file_name, &line,
+ &thread_id);
+ printf(
+ "Locked mutex: addr %lx thread %ld file %s line %ld\n",
(ulint)mutex, thread_id, file_name, line);
}
@@ -791,7 +801,7 @@ sync_thread_levels_g(
limit, slot->level);
if (mutex->magic_n == MUTEX_MAGIC_N) {
- printf("Mutex created at %s %lu\n", &(mutex->cfile_name),
+ printf("Mutex created at %s %lu\n", mutex->cfile_name,
mutex->cline);
if (mutex_get_lock_word(mutex) != 0) {
@@ -890,6 +900,7 @@ sync_thread_levels_empty_gen(
if (slot->latch != NULL && (!dict_mutex_allowed ||
(slot->level != SYNC_DICT
+ && slot->level != SYNC_FOREIGN_KEY_CHECK
&& slot->level != SYNC_PURGE_IS_RUNNING))) {
lock = slot->latch;
@@ -993,6 +1004,8 @@ sync_thread_add_level(
ut_a(sync_thread_levels_g(array, SYNC_RECV));
} else if (level == SYNC_LOG) {
ut_a(sync_thread_levels_g(array, SYNC_LOG));
+ } else if (level == SYNC_THR_LOCAL) {
+ ut_a(sync_thread_levels_g(array, SYNC_THR_LOCAL));
} else if (level == SYNC_ANY_LATCH) {
ut_a(sync_thread_levels_g(array, SYNC_ANY_LATCH));
} else if (level == SYNC_TRX_SYS_HEADER) {
@@ -1071,6 +1084,8 @@ sync_thread_add_level(
SYNC_IBUF_PESS_INSERT_MUTEX));
} else if (level == SYNC_DICT_AUTOINC_MUTEX) {
ut_a(sync_thread_levels_g(array, SYNC_DICT_AUTOINC_MUTEX));
+ } else if (level == SYNC_FOREIGN_KEY_CHECK) {
+ ut_a(sync_thread_levels_g(array, SYNC_FOREIGN_KEY_CHECK));
} else if (level == SYNC_DICT_HEADER) {
ut_a(sync_thread_levels_g(array, SYNC_DICT_HEADER));
} else if (level == SYNC_PURGE_IS_RUNNING) {
@@ -1231,15 +1246,17 @@ void
sync_print_wait_info(void)
/*======================*/
{
+#ifdef UNIV_SYNC_DEBUG
+ printf("Mutex exits %lu, rws exits %lu, rwx exits %lu\n",
+ mutex_exit_count, rw_s_exit_count, rw_x_exit_count);
+#endif
printf(
- "Mut ex %lu sp %lu r %lu sys %lu; rws %lu %lu %lu; rwx %lu %lu %lu\n",
- mutex_exit_count,
+"Mutex spin waits %lu, rounds %lu, OS waits %lu\n"
+"RW-shared spins %lu, OS waits %lu; RW-excl spins %lu, OS waits %lu\n",
mutex_spin_wait_count, mutex_spin_round_count,
- mutex_system_call_count,
- rw_s_exit_count,
- rw_s_spin_wait_count, rw_s_system_call_count,
- rw_x_exit_count,
- rw_x_spin_wait_count, rw_x_system_call_count);
+ mutex_os_wait_count,
+ rw_s_spin_wait_count, rw_s_os_wait_count,
+ rw_x_spin_wait_count, rw_x_os_wait_count);
}
/***********************************************************************
@@ -1249,10 +1266,8 @@ void
sync_print(void)
/*============*/
{
- printf("SYNC INFO:\n");
mutex_list_print_info();
rw_lock_list_print_info();
sync_array_print_info(sync_primary_wait_array);
sync_print_wait_info();
- printf("-----------------------------------------------------\n");
}
diff --git a/innobase/thr/thr0loc.c b/innobase/thr/thr0loc.c
index 897e53557c3..d3d7a58d313 100644
--- a/innobase/thr/thr0loc.c
+++ b/innobase/thr/thr0loc.c
@@ -224,5 +224,5 @@ thr_local_init(void)
thr_local_hash = hash_create(OS_THREAD_MAX_N + 100);
mutex_create(&thr_local_mutex);
- mutex_set_level(&thr_local_mutex, SYNC_ANY_LATCH);
+ mutex_set_level(&thr_local_mutex, SYNC_THR_LOCAL);
}
diff --git a/innobase/trx/trx0purge.c b/innobase/trx/trx0purge.c
index afb83926fa3..c50ffb65e00 100644
--- a/innobase/trx/trx0purge.c
+++ b/innobase/trx/trx0purge.c
@@ -276,6 +276,12 @@ trx_purge_add_update_undo_to_history(
if (undo->state != TRX_UNDO_CACHED) {
/* The undo log segment will not be reused */
+ if (undo->id >= TRX_RSEG_N_SLOTS) {
+ fprintf(stderr,
+ "InnoDB: Error: undo->id is %lu\n", undo->id);
+ ut_a(0);
+ }
+
trx_rsegf_set_nth_undo(rseg_header, undo->id, FIL_NULL, mtr);
hist_size = mtr_read_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE,
diff --git a/innobase/trx/trx0rec.c b/innobase/trx/trx0rec.c
index 64febb8f523..73153cfaa37 100644
--- a/innobase/trx/trx0rec.c
+++ b/innobase/trx/trx0rec.c
@@ -800,7 +800,7 @@ trx_undo_update_rec_get_update(
TRX_UNDO_DEL_MARK_REC; in the last case,
only trx id and roll ptr fields are added to
the update vector */
- dulint trx_id, /* in: transaction id from this undorecord */
+ dulint trx_id, /* in: transaction id from this undo record */
dulint roll_ptr,/* in: roll pointer from this undo record */
ulint info_bits,/* in: info bits from this undo record */
mem_heap_t* heap, /* in: memory heap from which the memory
@@ -1078,9 +1078,7 @@ trx_undo_report_row_operation(
undo_page = buf_page_get_gen(undo->space, page_no,
RW_X_LATCH, undo->guess_page,
BUF_GET,
- #ifdef UNIV_SYNC_DEBUG
IB__FILE__, __LINE__,
- #endif
&mtr);
buf_page_dbg_add_level(undo_page, SYNC_TRX_UNDO_PAGE);
@@ -1282,7 +1280,7 @@ trx_undo_prev_version_build(
return(DB_SUCCESS);
}
- rec_trx_id = row_get_rec_trx_id(rec, index);
+ rec_trx_id = row_get_rec_trx_id(rec, index);
err = trx_undo_get_undo_rec(roll_ptr, rec_trx_id, &undo_rec, heap);
diff --git a/innobase/trx/trx0trx.c b/innobase/trx/trx0trx.c
index 5d8c57edf34..13b37775dce 100644
--- a/innobase/trx/trx0trx.c
+++ b/innobase/trx/trx0trx.c
@@ -109,7 +109,10 @@ trx_create(
UT_LIST_INIT(trx->trx_locks);
trx->has_search_latch = FALSE;
+ trx->search_latch_timeout = BTR_SEA_TIMEOUT;
+ trx->auto_inc_lock = NULL;
+
trx->read_view_heap = mem_heap_create(256);
trx->read_view = NULL;
@@ -193,6 +196,7 @@ trx_free(
ut_a(UT_LIST_GET_LEN(trx->wait_thrs) == 0);
ut_a(!trx->has_search_latch);
+ ut_a(!trx->auto_inc_lock);
if (trx->lock_heap) {
mem_heap_free(trx->lock_heap);
diff --git a/innobase/trx/trx0undo.c b/innobase/trx/trx0undo.c
index 1f408428582..598090bdee2 100644
--- a/innobase/trx/trx0undo.c
+++ b/innobase/trx/trx0undo.c
@@ -361,6 +361,8 @@ trx_undo_page_init(
mach_write_to_2(page_hdr + TRX_UNDO_PAGE_FREE,
TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE);
+ fil_page_set_type(undo_page, FIL_PAGE_UNDO_LOG);
+
trx_undo_page_init_log(undo_page, type, mtr);
}
@@ -1106,6 +1108,12 @@ trx_undo_mem_create_at_db_start(
page_t* last_page;
trx_undo_rec_t* rec;
+ if (id >= TRX_RSEG_N_SLOTS) {
+ fprintf(stderr,
+ "InnoDB: Error: undo->id is %lu\n", id);
+ ut_a(0);
+ }
+
undo_page = trx_undo_page_get(rseg->space, page_no, mtr);
page_header = undo_page + TRX_UNDO_PAGE_HDR;
@@ -1251,7 +1259,13 @@ trx_undo_mem_create(
trx_undo_t* undo;
ut_ad(mutex_own(&(rseg->mutex)));
-
+
+ if (id >= TRX_RSEG_N_SLOTS) {
+ fprintf(stderr,
+ "InnoDB: Error: undo->id is %lu\n", id);
+ ut_a(0);
+ }
+
undo = mem_alloc(sizeof(trx_undo_t));
undo->id = id;
@@ -1290,6 +1304,12 @@ trx_undo_mem_init_for_reuse(
{
ut_ad(mutex_own(&((undo->rseg)->mutex)));
+ if (undo->id >= TRX_RSEG_N_SLOTS) {
+ fprintf(stderr,
+ "InnoDB: Error: undo->id is %lu\n", undo->id);
+ ut_a(0);
+ }
+
undo->state = TRX_UNDO_ACTIVE;
undo->del_marks = FALSE;
undo->trx_id = trx_id;
@@ -1308,6 +1328,12 @@ trx_undo_mem_free(
/*==============*/
trx_undo_t* undo) /* in: the undo object to be freed */
{
+ if (undo->id >= TRX_RSEG_N_SLOTS) {
+ fprintf(stderr,
+ "InnoDB: Error: undo->id is %lu\n", undo->id);
+ ut_a(0);
+ }
+
mem_free(undo);
}
@@ -1493,6 +1519,9 @@ trx_undo_assign_undo(
mutex_exit(&(rseg->mutex));
mtr_commit(&mtr);
+ fprintf(stderr, "InnoDB: no undo log slots free\n");
+ ut_a(0);
+
return(NULL);
}
}
@@ -1536,6 +1565,12 @@ trx_undo_set_state_at_finish(
ut_ad(trx && undo && mtr);
+ if (undo->id >= TRX_RSEG_N_SLOTS) {
+ fprintf(stderr,
+ "InnoDB: Error: undo->id is %lu\n", undo->id);
+ ut_a(0);
+ }
+
undo_page = trx_undo_page_get(undo->space, undo->hdr_page_no, mtr);
seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
diff --git a/innobase/ut/ut0mem.c b/innobase/ut/ut0mem.c
index ebeefe0c297..630bd3a9b71 100644
--- a/innobase/ut/ut0mem.c
+++ b/innobase/ut/ut0mem.c
@@ -13,15 +13,22 @@ Created 5/11/1994 Heikki Tuuri
#endif
#include "mem0mem.h"
-
+#include "os0sync.h"
/* This struct is placed first in every allocated memory block */
typedef struct ut_mem_block_struct ut_mem_block_t;
+/* The total amount of memory currently allocated from the OS with malloc */
+ulint ut_total_allocated_memory = 0;
+
struct ut_mem_block_struct{
- UT_LIST_NODE_T(ut_mem_block_t) mem_block_list;/* mem block list node */
+ UT_LIST_NODE_T(ut_mem_block_t) mem_block_list;
+ /* mem block list node */
+ ulint size; /* size of allocated memory */
+ ulint magic_n;
};
+#define UT_MEM_MAGIC_N 1601650166
/* List of all memory blocks allocated from the operating system
with malloc */
@@ -70,16 +77,17 @@ ut_malloc_low(
if (ret == NULL) {
fprintf(stderr,
"InnoDB: Fatal error: cannot allocate %lu bytes of\n"
- "InnoDB: memory with malloc!\n"
- "InnoDB: Operating system errno: %lu\n"
+ "InnoDB: memory with malloc! Total allocated memory\n"
+ "InnoDB: by InnoDB %lu bytes. Operating system errno: %lu\n"
"InnoDB: Cannot continue operation!\n"
"InnoDB: Check if you should increase the swap file or\n"
- "InnoDB: ulimits of your operating system.\n", n, errno);
+ "InnoDB: ulimits of your operating system.\n",
+ n, ut_total_allocated_memory, errno);
os_fast_mutex_unlock(&ut_list_mutex);
exit(1);
- }
+ }
if (set_to_zero) {
#ifdef UNIV_SET_MEM_TO_ZERO
@@ -87,6 +95,11 @@ ut_malloc_low(
#endif
}
+ ((ut_mem_block_t*)ret)->size = n + sizeof(ut_mem_block_t);
+ ((ut_mem_block_t*)ret)->magic_n = UT_MEM_MAGIC_N;
+
+ ut_total_allocated_memory += n + sizeof(ut_mem_block_t);
+
UT_LIST_ADD_FIRST(mem_block_list, ut_mem_block_list,
((ut_mem_block_t*)ret));
os_fast_mutex_unlock(&ut_list_mutex);
@@ -107,7 +120,7 @@ ut_malloc(
return(ut_malloc_low(n, TRUE));
}
/**************************************************************************
-Frees a memory bloock allocated with ut_malloc. */
+Frees a memory block allocated with ut_malloc. */
void
ut_free(
@@ -120,6 +133,11 @@ ut_free(
os_fast_mutex_lock(&ut_list_mutex);
+ ut_a(block->magic_n == UT_MEM_MAGIC_N);
+ ut_a(ut_total_allocated_memory >= block->size);
+
+ ut_total_allocated_memory -= block->size;
+
UT_LIST_REMOVE(mem_block_list, ut_mem_block_list, block);
free(block);
@@ -139,11 +157,18 @@ ut_free_all_mem(void)
while (block = UT_LIST_GET_FIRST(ut_mem_block_list)) {
+ ut_a(block->magic_n == UT_MEM_MAGIC_N);
+ ut_a(ut_total_allocated_memory >= block->size);
+
+ ut_total_allocated_memory -= block->size;
+
UT_LIST_REMOVE(mem_block_list, ut_mem_block_list, block);
free(block);
}
os_fast_mutex_unlock(&ut_list_mutex);
+
+ ut_a(ut_total_allocated_memory == 0);
}
/**************************************************************************
diff --git a/innobase/ut/ut0ut.c b/innobase/ut/ut0ut.c
index 1436f6a10a3..964d5bca567 100644
--- a/innobase/ut/ut0ut.c
+++ b/innobase/ut/ut0ut.c
@@ -187,6 +187,8 @@ ut_sprintf_buf(
for (i = 0; i < len; i++) {
if (isprint((char)(*data))) {
n += sprintf(str + n, "%c", (char)*data);
+ } else {
+ n += sprintf(str + n, ".");
}
data++;
diff --git a/libmysql/manager.c b/libmysql/manager.c
index 07595ab8dee..2f34f49209c 100644
--- a/libmysql/manager.c
+++ b/libmysql/manager.c
@@ -16,31 +16,30 @@
MA 02111-1307, USA */
#include <my_global.h>
-#if defined(__WIN__) || defined(_WIN32) || defined(_WIN64)
-#include <winsock.h>
-#include <odbcinst.h>
+#if defined(THREAD)
+#include <my_pthread.h> /* because of signal() */
#endif
+#include "mysql.h"
+#include "mysql_version.h"
+#include "mysqld_error.h"
#include <my_sys.h>
#include <mysys_err.h>
#include <m_string.h>
#include <m_ctype.h>
-#include "mysql.h"
-#include "mysql_version.h"
-#include "mysqld_error.h"
-#include "errmsg.h"
+#include <my_net.h>
+#include <errmsg.h>
#include <violite.h>
#include <sys/stat.h>
#include <signal.h>
-#include <time.h>
#include <errno.h>
-#ifdef HAVE_PWD_H
-#include <pwd.h>
+#if defined(OS2)
+# include <sys/un.h>
+#elif !defined( __WIN__)
+#include <sys/resource.h>
+#ifdef HAVE_SYS_UN_H
+# include <sys/un.h>
#endif
-#if !defined(MSDOS) && !defined(__WIN__)
-#include <sys/socket.h>
-#include <netinet/in.h>
-#include <arpa/inet.h>
#include <netdb.h>
#ifdef HAVE_SELECT_H
# include <select.h>
@@ -48,18 +47,13 @@
#ifdef HAVE_SYS_SELECT_H
#include <sys/select.h>
#endif
-#endif
-#ifdef HAVE_SYS_UN_H
-# include <sys/un.h>
-#endif
-#if defined(THREAD) && !defined(__WIN__)
-#include <my_pthread.h> /* because of signal() */
-#endif
+#include <sys/utsname.h>
+#endif /* __WIN__ */
+
#ifndef INADDR_NONE
#define INADDR_NONE -1
#endif
-
#define RES_BUF_SHIFT 5
#define SOCKET_ERROR -1
#define NET_BUF_SIZE 2048
diff --git a/libmysqld/Makefile.am b/libmysqld/Makefile.am
index bcec7386ff1..badbc6d3425 100644
--- a/libmysqld/Makefile.am
+++ b/libmysqld/Makefile.am
@@ -45,7 +45,8 @@ sqlsources = convert.cc derror.cc field.cc field_conv.cc filesort.cc \
item_func.cc item_strfunc.cc item_sum.cc item_timefunc.cc \
item_uniq.cc key.cc lock.cc log.cc log_event.cc \
mini_client.cc net_pkg.cc net_serv.cc opt_ft.cc opt_range.cc \
- opt_sum.cc procedure.cc records.cc slave.cc sql_acl.cc \
+ opt_sum.cc procedure.cc records.cc sql_acl.cc \
+ repl_failsafe.cc slave.cc \
sql_analyse.cc sql_base.cc sql_cache.cc sql_class.cc \
sql_crypt.cc sql_db.cc sql_delete.cc sql_insert.cc sql_lex.cc \
sql_list.cc sql_manager.cc sql_map.cc sql_parse.cc \
diff --git a/mysql-test/mysql-test-run.sh b/mysql-test/mysql-test-run.sh
index 7ee629e826e..a7cbacc0aa8 100644
--- a/mysql-test/mysql-test-run.sh
+++ b/mysql-test/mysql-test-run.sh
@@ -12,6 +12,7 @@
DB=test
DBPASSWD=
VERBOSE=""
+USE_MANAGER=0
TZ=GMT-3; export TZ # for UNIX_TIMESTAMP tests to work
#++
@@ -124,7 +125,7 @@ MASTER_RUNNING=0
MASTER_MYPORT=9306
SLAVE_RUNNING=0
SLAVE_MYPORT=9307
-MYSQL_MANAGER_PORT=9308
+MYSQL_MANAGER_PORT=9305 # needs to be out of the way of slaves
MYSQL_MANAGER_PW_FILE=$MYSQL_TEST_DIR/var/tmp/manager.pwd
MYSQL_MANAGER_LOG=$MYSQL_TEST_DIR/var/log/manager.log
MYSQL_MANAGER_USER=root
@@ -162,6 +163,8 @@ while test $# -gt 0; do
--ssl-ca=$BASEDIR/SSL/cacert.pem \
--ssl-cert=$BASEDIR/SSL/server-cert.pem \
--ssl-key=$BASEDIR/SSL/server-key.pem" ;;
+ --no-manager | --skip-manager) USE_MANAGER=0 ;;
+ --manager) USE_MANAGER=1 ;;
--skip-innobase)
EXTRA_MASTER_MYSQLD_OPT="$EXTRA_MASTER_MYSQLD_OPT --skip-innobase"
EXTRA_SLAVE_MYSQLD_OPT="$EXTRA_SLAVE_MYSQLD_OPT --skip-innobase" ;;
@@ -473,6 +476,14 @@ mysql_install_db () {
error "Could not install slave test DBs"
exit 1
fi
+
+ for slave_num in 1 2 ;
+ do
+ rm -rf var/slave$slave_num-data/
+ mkdir -p var/slave$slave_num-data/mysql
+ mkdir -p var/slave$slave_num-data/test
+ cp var/slave-data/mysql/* var/slave$slave_num-data/mysql
+ done
# Give mysqld some time to die.
sleep $SLEEP_TIME
return 0
@@ -526,12 +537,19 @@ abort_if_failed()
start_manager()
{
+ if [ $USE_MANAGER = 0 ] ; then
+ echo "Manager disabled, skipping manager start. Tests requiring manager will\
+ be skipped"
+ return
+ fi
+ $ECHO "Starting MySQL Manager"
MYSQL_MANAGER_PW=`$MYSQL_MANAGER_PWGEN -u $MYSQL_MANAGER_USER \
-o $MYSQL_MANAGER_PW_FILE`
$MYSQL_MANAGER --log=$MYSQL_MANAGER_LOG --port=$MYSQL_MANAGER_PORT \
--password-file=$MYSQL_MANAGER_PW_FILE
abort_if_failed "Could not start MySQL manager"
- mysqltest_manager_args="--manager-user=$MYSQL_MANAGER_USER \
+ mysqltest_manager_args="--manager-host=localhost \
+ --manager-user=$MYSQL_MANAGER_USER \
--manager-password=$MYSQL_MANAGER_PW \
--manager-port=$MYSQL_MANAGER_PORT \
--manager-wait-timeout=$START_WAIT_TIMEOUT"
@@ -542,6 +560,9 @@ start_manager()
stop_manager()
{
+ if [ $USE_MANAGER = 0 ] ; then
+ return
+ fi
$MYSQL_MANAGER_CLIENT $MANAGER_QUIET_OPT -u$MYSQL_MANAGER_USER \
-p$MYSQL_MANAGER_PW -P $MYSQL_MANAGER_PORT <<EOF
shutdown
@@ -552,6 +573,11 @@ manager_launch()
{
ident=$1
shift
+ if [ $USE_MANAGER = 0 ] ; then
+ $@ >$CUR_MYERR 2>&1 &
+ sleep 2 #hack
+ return
+ fi
$MYSQL_MANAGER_CLIENT $MANAGER_QUIET_OPT --user=$MYSQL_MANAGER_USER \
--password=$MYSQL_MANAGER_PW --port=$MYSQL_MANAGER_PORT <<EOF
def_exec $ident $@
@@ -567,6 +593,11 @@ manager_term()
{
ident=$1
shift
+ if [ $USE_MANAGER = 0 ] ; then
+ $MYSQLADMIN --no-defaults -uroot --socket=$MYSQL_TMP_DIR/$ident.sock -O \
+ connect_timeout=5 shutdown >/dev/null 2>&1
+ return
+ fi
$MYSQL_MANAGER_CLIENT $MANAGER_QUIET_OPT --user=$MYSQL_MANAGER_USER \
--password=$MYSQL_MANAGER_PW --port=$MYSQL_MANAGER_PORT <<EOF
stop_exec $ident $STOP_WAIT_TIMEOUT
@@ -593,7 +624,7 @@ start_master()
then
master_args="--no-defaults --log-bin=$MYSQL_TEST_DIR/var/log/master-bin \
--server-id=1 --rpl-recovery-rank=1 \
- --basedir=$MY_BASEDIR \
+ --basedir=$MY_BASEDIR --init-rpl-role=master \
--port=$MASTER_MYPORT \
--exit-info=256 \
--datadir=$MASTER_MYDDIR \
@@ -610,7 +641,7 @@ start_master()
else
master_args="--no-defaults --log-bin=$MYSQL_TEST_DIR/var/log/master-bin \
--server-id=1 --rpl-recovery-rank=1 \
- --basedir=$MY_BASEDIR \
+ --basedir=$MY_BASEDIR --init-rpl-role=master \
--port=$MASTER_MYPORT \
--datadir=$MASTER_MYDDIR \
--pid-file=$MASTER_MYPID \
@@ -647,10 +678,33 @@ start_master()
start_slave()
{
[ x$SKIP_SLAVE = x1 ] && return
- [ x$SLAVE_RUNNING = 1 ] && return
-
+ eval "this_slave_running=\$SLAVE$1_RUNNING"
+ [ x$this_slave_running = 1 ] && return
+ #when testing fail-safe replication, we will have more than one slave
+ #in this case, we start secondary slaves with an argument
+ slave_ident="slave$1"
+ if [ -n "$1" ] ;
+ then
+ slave_server_id=`$EXPR 2 + $1`
+ slave_rpl_rank=$slave_server_id
+ slave_port=`expr $SLAVE_MYPORT + $1`
+ slave_log="$SLAVE_MYLOG.$1"
+ slave_err="$SLAVE_MYERR.$1"
+ slave_datadir="var/$slave_ident-data/"
+ slave_pid="$MYRUN_DIR/mysqld-$slave_ident.pid"
+ slave_sock="$SLAVE_MYSOCK-$1"
+ else
+ slave_server_id=2
+ slave_rpl_rank=2
+ slave_port=$SLAVE_MYPORT
+ slave_log=$SLAVE_MYLOG
+ slave_err=$SLAVE_MYERR
+ slave_datadir=$SLAVE_MYDDIR
+ slave_pid=$SLAVE_MYPID
+ slave_sock="$SLAVE_MYSOCK"
+ fi
# Remove stale binary logs
- $RM -f $MYSQL_TEST_DIR/var/log/slave-bin.*
+ $RM -f $MYSQL_TEST_DIR/var/log/$slave_ident-bin.*
#run slave initialization shell script if one exists
if [ -f "$slave_init_script" ] ;
@@ -664,51 +718,51 @@ start_slave()
--master-host=127.0.0.1 \
--master-password= \
--master-port=$MASTER_MYPORT \
- --server-id=2 --rpl-recovery-rank=2"
+ --server-id=$slave_server_id --rpl-recovery-rank=$slave_rpl_rank"
else
master_info=$SLAVE_MASTER_INFO
fi
- $RM -f $SLAVE_MYDDIR/log.*
+ $RM -f $slave_datadir/log.*
slave_args="--no-defaults $master_info \
--exit-info=256 \
- --log-bin=$MYSQL_TEST_DIR/var/log/slave-bin \
+ --log-bin=$MYSQL_TEST_DIR/var/log/$slave_ident-bin \
--log-slave-updates \
- --log=$SLAVE_MYLOG \
+ --log=$slave_log \
--basedir=$MY_BASEDIR \
- --datadir=$SLAVE_MYDDIR \
- --pid-file=$SLAVE_MYPID \
- --port=$SLAVE_MYPORT \
- --socket=$SLAVE_MYSOCK \
+ --datadir=$slave_datadir \
+ --pid-file=$slave_pid \
+ --port=$slave_port \
+ --socket=$slave_sock \
--character-sets-dir=$CHARSETSDIR \
--default-character-set=$CHARACTER_SET \
- --core \
+ --core --init-rpl-role=slave \
--tmpdir=$MYSQL_TMP_DIR \
--language=$LANGUAGE \
--skip-innodb --skip-slave-start \
--slave-load-tmpdir=$SLAVE_LOAD_TMPDIR \
--report-host=127.0.0.1 --report-user=root \
- --report-port=$SLAVE_MYPORT \
+ --report-port=$slave_port \
--master-retry-count=5 \
$SMALL_SERVER \
$EXTRA_SLAVE_OPT $EXTRA_SLAVE_MYSQLD_OPT"
- CUR_MYERR=$SLAVE_MYERR
- CUR_MYSOCK=$SLAVE_MYSOCK
+ CUR_MYERR=$slave_err
+ CUR_MYSOCK=$slave_sock
if [ x$DO_DDD = x1 ]
then
$ECHO "set args $master_args" > $GDB_SLAVE_INIT
- manager_launch slave ddd -display $DISPLAY --debugger \
+ manager_launch $slave_ident ddd -display $DISPLAY --debugger \
"gdb -x $GDB_SLAVE_INIT" $SLAVE_MYSQLD
elif [ x$DO_GDB = x1 ]
then
$ECHO "set args $slave_args" > $GDB_SLAVE_INIT
- manager_launch slave $XTERM -display $DISPLAY -title "Slave" -e gdb -x \
+ manager_launch $slave_ident $XTERM -display $DISPLAY -title "Slave" -e gdb -x \
$GDB_SLAVE_INIT $SLAVE_MYSQLD
else
- manager_launch slave $SLAVE_MYSQLD $slave_args
+ manager_launch $slave_ident $SLAVE_MYSQLD $slave_args
fi
- SLAVE_RUNNING=1
+ eval "SLAVE$1_RUNNING=1"
}
mysql_start () {
@@ -721,23 +775,31 @@ mysql_start () {
stop_slave ()
{
- if [ x$SLAVE_RUNNING = x1 ]
+ eval "this_slave_running=\$SLAVE$1_RUNNING"
+ slave_ident="slave$1"
+ if [ -n "$1" ] ;
then
- manager_term slave
- if [ $? != 0 ] && [ -f $SLAVE_MYPID ]
+ slave_pid="$MYRUN_DIR/mysqld-$slave_ident.pid"
+ else
+ slave_pid=$SLAVE_MYPID
+ fi
+ if [ x$this_slave_running = x1 ]
+ then
+ manager_term $slave_ident
+ if [ $? != 0 ] && [ -f $slave_pid ]
then # try harder!
$ECHO "slave not cooperating with mysqladmin, will try manual kill"
- kill `$CAT $SLAVE_MYPID`
+ kill `$CAT $slave_pid`
sleep $SLEEP_TIME
if [ -f $SLAVE_MYPID ] ; then
$ECHO "slave refused to die. Sending SIGKILL"
- kill -9 `$CAT $SLAVE_MYPID`
- $RM -f $SLAVE_MYPID
+ kill -9 `$CAT $slave_pid`
+ $RM -f $slave_pid
else
$ECHO "slave responded to SIGTERM "
fi
fi
- SLAVE_RUNNING=0
+ eval "SLAVE$1_RUNNING=0"
fi
}
@@ -771,6 +833,8 @@ mysql_stop ()
stop_master
$ECHO "Master shutdown finished"
stop_slave
+ stop_slave 1
+ stop_slave 2
$ECHO "Slave shutdown finished"
return 1
@@ -800,6 +864,10 @@ run_testcase ()
slave_init_script=$TESTDIR/$tname-slave.sh
slave_master_info_file=$TESTDIR/$tname-slave-master-info.opt
SKIP_SLAVE=`$EXPR \( $tname : rpl \) = 0`
+ if [ $USE_MANAGER = 1 ] ; then
+ many_slaves=`$EXPR \( $tname : rpl_failsafe \) != 0`
+ fi
+
if [ -n "$SKIP_TEST" ] ; then
SKIP_THIS_TEST=`$EXPR \( $tname : "$SKIP_TEST" \) != 0`
if [ x$SKIP_THIS_TEST = x1 ] ;
@@ -874,6 +942,10 @@ run_testcase ()
stop_slave
start_slave
fi
+ if [ x$many_slaves = x1 ]; then
+ start_slave 1
+ start_slave 2
+ fi
fi
cd $MYSQL_TEST_DIR
@@ -906,7 +978,12 @@ run_testcase ()
timestr="$USERT $SYST $REALT"
pname=`$ECHO "$tname "|$CUT -c 1-24`
RES="$pname $timestr"
-
+
+ if [ x$many_slaves = x1 ] ; then
+ stop_slave 1
+ stop_slave 2
+ fi
+
if [ $res = 0 ]; then
total_inc
pass_inc
@@ -963,7 +1040,6 @@ then
$MYSQLADMIN --no-defaults --socket=$SLAVE_MYSOCK -u root -O connect_timeout=5 shutdown > /dev/null 2>&1
$ECHO "Installing Test Databases"
mysql_install_db
- $ECHO "Starting MySQL Manager"
start_manager
#do not automagically start deamons if we are in gdb or running only one test
#case
diff --git a/mysql-test/r/innodb.result b/mysql-test/r/innodb.result
index 596edb84705..20874981840 100644
--- a/mysql-test/r/innodb.result
+++ b/mysql-test/r/innodb.result
@@ -169,8 +169,8 @@ test.t1 optimize error The handler for the table doesn't support check/repair
show keys from t1;
Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Comment
t1 0 PRIMARY 1 id A 2 NULL NULL
-t1 1 parent_id 1 parent_id A 4 NULL NULL
-t1 1 level 1 level A 4 NULL NULL
+t1 1 parent_id 1 parent_id A 2 NULL NULL
+t1 1 level 1 level A 2 NULL NULL
drop table t1;
CREATE TABLE t1 (
gesuchnr int(11) DEFAULT '0' NOT NULL,
@@ -211,7 +211,7 @@ Table Op Msg_type Msg_text
test.t1 analyze error The handler for the table doesn't support check/repair
show keys from t1;
Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Comment
-t1 1 skr 1 a A 3 NULL NULL
+t1 1 skr 1 a A 1 NULL NULL
drop table t1;
create table t1 (a int,b varchar(20),key(a)) type=innodb;
insert into t1 values (1,""), (2,"testing");
diff --git a/mysql-test/r/rpl_failsafe.result b/mysql-test/r/rpl_failsafe.result
index 789795c0343..14b749fada9 100644
--- a/mysql-test/r/rpl_failsafe.result
+++ b/mysql-test/r/rpl_failsafe.result
@@ -6,6 +6,28 @@ slave start;
show variables like 'rpl_recovery_rank';
Variable_name Value
rpl_recovery_rank 1
+show status like 'Rpl_status';
+Variable_name Value
+Rpl_status AUTH_MASTER
+create table t1(n int);
+drop table t1;
show variables like 'rpl_recovery_rank';
Variable_name Value
rpl_recovery_rank 2
+show status like 'Rpl_status';
+Variable_name Value
+Rpl_status ACTIVE_SLAVE
+slave start;
+show variables like 'rpl_recovery_rank';
+Variable_name Value
+rpl_recovery_rank 3
+show status like 'Rpl_status';
+Variable_name Value
+Rpl_status ACTIVE_SLAVE
+slave start;
+show variables like 'rpl_recovery_rank';
+Variable_name Value
+rpl_recovery_rank 4
+show status like 'Rpl_status';
+Variable_name Value
+Rpl_status ACTIVE_SLAVE
diff --git a/mysql-test/t/rpl000018.test b/mysql-test/t/rpl000018.test
index 621147b7c3e..30d601bb375 100644
--- a/mysql-test/t/rpl000018.test
+++ b/mysql-test/t/rpl000018.test
@@ -1,3 +1,4 @@
+require_manager;
connect (master,localhost,root,,test,0,master.sock);
connect (slave,localhost,root,,test,0,slave.sock);
server_stop master;
diff --git a/mysql-test/t/rpl_failsafe.test b/mysql-test/t/rpl_failsafe.test
index b8ef5b3f365..866efbce5bf 100644
--- a/mysql-test/t/rpl_failsafe.test
+++ b/mysql-test/t/rpl_failsafe.test
@@ -1,5 +1,24 @@
+require_manager;
source include/master-slave.inc;
+connect (slave_sec,localhost,root,,test,0,slave.sock-1);
+connect (slave_ter,localhost,root,,test,0,slave.sock-2);
connection master;
show variables like 'rpl_recovery_rank';
+show status like 'Rpl_status';
+create table t1(n int);
+drop table t1;
+save_master_pos;
connection slave;
+sync_with_master;
show variables like 'rpl_recovery_rank';
+show status like 'Rpl_status';
+connection slave_sec;
+slave start;
+sync_with_master;
+show variables like 'rpl_recovery_rank';
+show status like 'Rpl_status';
+connection slave_ter;
+slave start;
+sync_with_master;
+show variables like 'rpl_recovery_rank';
+show status like 'Rpl_status';
diff --git a/mysys/mf_sort.c b/mysys/mf_sort.c
index 754a1deb1a7..383959d0de7 100644
--- a/mysys/mf_sort.c
+++ b/mysys/mf_sort.c
@@ -25,7 +25,7 @@ void my_string_ptr_sort(void *base, uint items, size_s size)
#if INT_MAX > 65536L
uchar **ptr=0;
- if (size <= 20 && items >= 1000 &&
+ if (size <= 20 && items >= 1000 && items < 100000 &&
(ptr= (uchar**) my_malloc(items*sizeof(char*),MYF(0))))
{
radixsort_for_str_ptr((uchar**) base,items,size,ptr);
diff --git a/scripts/make_binary_distribution.sh b/scripts/make_binary_distribution.sh
index 35be819cd2e..b743e228a0f 100644
--- a/scripts/make_binary_distribution.sh
+++ b/scripts/make_binary_distribution.sh
@@ -66,9 +66,12 @@ for i in extra/comp_err extra/replace extra/perror extra/resolveip \
myisam/myisampack sql/mysqld sql/mysqlbinlog \
client/mysql sql/mysqld client/mysqlshow client/mysqlcheck \
client/mysqladmin client/mysqldump client/mysqlimport client/mysqltest \
+ client/mysqlmanagerc client/mysqlmanager-pwgen tools/mysqlmanager \
client/.libs/mysql client/.libs/mysqlshow client/.libs/mysqladmin \
client/.libs/mysqldump client/.libs/mysqlimport client/.libs/mysqltest \
- client/.libs/mysqlcheck
+ client/.libs/mysqlcheck \
+ client/.libs/mysqlmanagerc client/.libs/mysqlmanager-pwgen \
+ tools/.libs/mysqlmanager
do
if [ -f $i ]
then
@@ -88,7 +91,7 @@ do
fi
done
-for i in libmysql/.libs/libmysqlclient.a libmysql/.libs/libmysqlclient.so* libmysql/libmysqlclient.* libmysql_r/.libs/libmysqlclient_r.a libmysql_r/.libs/libmysqlclient_r.so* libmysql_r/libmysqlclient_r.* mysys/libmysys.a strings/libmystrings.a dbug/libdbug.a libmysqld/.libs/libmysqld.a libmysqld/.libs/libmysqld.so* libmysqld/libmysqld.a libmysqld/libmysqld.a
+for i in libmysql/.libs/libmysqlclient.a libmysql/.libs/libmysqlclient.so* libmysql/libmysqlclient.* libmysql_r/.libs/libmysqlclient_r.a libmysql_r/.libs/libmysqlclient_r.so* libmysql_r/libmysqlclient_r.* mysys/libmysys.a strings/libmystrings.a dbug/libdbug.a libmysqld/.libs/libmysqld.a libmysqld/.libs/libmysqld.so* libmysqld/libmysqld.a
do
if [ -f $i ]
then
diff --git a/sql-bench/test-insert.sh b/sql-bench/test-insert.sh
index 655e38b1b0e..9dc3d9aa7c2 100644
--- a/sql-bench/test-insert.sh
+++ b/sql-bench/test-insert.sh
@@ -917,13 +917,19 @@ print "Time for update_with_key (" . ($opt_loop_count*3) . "): " .
timestr(timediff($end_time, $loop_time),"all") . "\n";
$loop_time=new Benchmark;
-for ($i=0 ; $i < $opt_loop_count*3 ; $i+=3)
+$count=0;
+for ($i=1 ; $i < $opt_loop_count*3 ; $i+=3)
{
$sth = $dbh->do("update bench1 set dummy1='updated' where id=$i") or die $DBI::errstr;
+ $end_time=new Benchmark;
+ last if ($estimated=predict_query_time($loop_time,$end_time,\$i,$tests,
+ $opt_loop_count));
}
-
-$end_time=new Benchmark;
-print "Time for update_with_key_prefix (" . ($opt_loop_count) . "): " .
+if ($estimated)
+{ print "Estimated time"; }
+else
+{ print "Time"; }
+print " for update_with_key_prefix (" . ($opt_loop_count) . "): " .
timestr(timediff($end_time, $loop_time),"all") . "\n";
print "\nTesting update of all rows\n";
diff --git a/sql/Makefile.am b/sql/Makefile.am
index cf762d9f6df..4621443f4d2 100644
--- a/sql/Makefile.am
+++ b/sql/Makefile.am
@@ -81,7 +81,7 @@ mysqld_SOURCES = sql_lex.cc sql_handler.cc \
sql_udf.cc sql_analyse.cc sql_analyse.h sql_cache.cc \
slave.cc sql_repl.cc sql_union.cc \
mini_client.cc mini_client_errors.c \
- stacktrace.c
+ stacktrace.c repl_failsafe.h repl_failsafe.cc
gen_lex_hash_SOURCES = gen_lex_hash.cc
gen_lex_hash_LDADD = $(LDADD) $(CXXLDFLAGS)
diff --git a/sql/ha_innobase.cc b/sql/ha_innobase.cc
index e7d98dbe406..7e11fbe46d1 100644
--- a/sql/ha_innobase.cc
+++ b/sql/ha_innobase.cc
@@ -161,7 +161,23 @@ convert_error_code_to_mysql(
} else if (error == (int) DB_DEADLOCK) {
- return(1000000);
+ return(HA_ERR_LOCK_DEADLOCK);
+
+ } else if (error == (int) DB_LOCK_WAIT_TIMEOUT) {
+
+ return(1000001);
+
+ } else if (error == (int) DB_NO_REFERENCED_ROW) {
+
+ return(1000010);
+
+ } else if (error == (int) DB_ROW_IS_REFERENCED) {
+
+ return(1000011);
+
+ } else if (error == (int) DB_CANNOT_ADD_CONSTRAINT) {
+
+ return(1000012);
} else if (error == (int) DB_OUT_OF_FILE_SPACE) {
@@ -178,7 +194,6 @@ convert_error_code_to_mysql(
} else if (error == (int) DB_TOO_BIG_RECORD) {
return(HA_ERR_TO_BIG_ROW);
-
} else {
dbug_assert(0);
@@ -220,7 +235,7 @@ innobase_mysql_print_thd(
}
if (thd->query) {
- printf(" %-.100s", thd->query);
+ printf("\n%-.100s", thd->query);
}
printf("\n");
@@ -526,9 +541,24 @@ innobase_init(void)
{
int err;
bool ret;
+ char current_lib[2], *default_path;
DBUG_ENTER("innobase_init");
+ /*
+ When using the embedded server, the datadirectory is not
+ in the current directory.
+ */
+ if (!mysql_embedded)
+ default_path=mysql_real_data_home;
+ else
+ {
+ /* It's better to use current lib, to keep path's short */
+ current_lib[0]=FN_CURLIB;
+ current_lib[1]=FN_LIBCHAR;
+ default_path=current_lib;
+ }
+
if (specialflag & SPECIAL_NO_PRIOR) {
srv_set_thread_priorities = FALSE;
} else {
@@ -544,10 +574,10 @@ innobase_init(void)
MYF(MY_WME));
srv_data_home = (innobase_data_home_dir ? innobase_data_home_dir :
- mysql_real_data_home);
+ default_path);
srv_logs_home = (char*) "";
srv_arch_dir = (innobase_log_arch_dir ? innobase_log_arch_dir :
- mysql_real_data_home);
+ default_path);
ret = innobase_parse_data_file_paths_and_sizes();
@@ -557,7 +587,7 @@ innobase_init(void)
}
if (!innobase_log_group_home_dir)
- innobase_log_group_home_dir= mysql_real_data_home;
+ innobase_log_group_home_dir= default_path;
ret = innobase_parse_log_group_home_dirs();
if (ret == FALSE) {
@@ -586,6 +616,15 @@ innobase_init(void)
srv_lock_wait_timeout = (ulint) innobase_lock_wait_timeout;
srv_print_verbose_log = mysql_embedded ? 0 : 1;
+ if (strcmp(default_charset_info->name, "latin1") == 0) {
+ /* Store the character ordering table to InnoDB.
+ For non-latin1 charsets we use the MySQL comparison
+ functions, and consequently we do not need to know
+ the ordering internally in InnoDB. */
+
+ memcpy(srv_latin1_ordering,
+ default_charset_info->sort_order, 256);
+ }
err = innobase_start_or_create_for_mysql();
@@ -636,7 +675,7 @@ innobase_flush_logs(void)
DBUG_ENTER("innobase_flush_logs");
- log_make_checkpoint_at(ut_dulint_max, TRUE);
+ log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP);
DBUG_RETURN(result);
}
@@ -869,10 +908,10 @@ ha_innobase::open(
if (NULL == (ib_table = dict_table_get(norm_name, NULL))) {
fprintf(stderr,
-"Cannot find table %s from the internal data dictionary\n"
-"of InnoDB though the .frm file for the table exists. Maybe you have deleted\n"
-"and created again an InnoDB database but forgotten to delete the\n"
-"corresponding .frm files of old InnoDB tables?\n",
+"InnoDB: Cannot find table %s from the internal data dictionary\n"
+"InnoDB: of InnoDB though the .frm file for the table exists. Maybe you\n"
+"InnoDB: have deleted and recreated InnoDB data files but have forgotten\n"
+"InnoDB: to delete the corresponding .frm files of InnoDB tables?\n",
norm_name);
free_share(share);
@@ -1392,8 +1431,36 @@ ha_innobase::write_row(
current value and the value supplied by the user, if
the auto_inc counter is already initialized
for the table */
+
+ /* We have to use the transactional lock mechanism
+ on the auto-inc counter of the table to ensure
+ that replication and roll-forward of the binlog
+ exactly imitates also the given auto-inc values.
+ The lock is released at each SQL statement's
+ end. */
+
+ error = row_lock_table_autoinc_for_mysql(prebuilt);
+
+ if (error != DB_SUCCESS) {
+
+ error = convert_error_code_to_mysql(error);
+ goto func_exit;
+ }
+
dict_table_autoinc_update(prebuilt->table, auto_inc);
} else {
+ if (!prebuilt->trx->auto_inc_lock) {
+
+ error = row_lock_table_autoinc_for_mysql(
+ prebuilt);
+ if (error != DB_SUCCESS) {
+
+ error = convert_error_code_to_mysql(
+ error);
+ goto func_exit;
+ }
+ }
+
auto_inc = dict_table_autoinc_get(prebuilt->table);
/* If auto_inc is now != 0 the autoinc counter
@@ -1451,7 +1518,7 @@ ha_innobase::write_row(
/* Tell InnoDB server that there might be work for
utility threads: */
-
+func_exit:
innobase_active_small();
DBUG_RETURN(error);
@@ -1728,7 +1795,7 @@ ha_innobase::index_init(
}
/**********************************************************************
-?????????????????????????????????? */
+Currently does nothing. */
int
ha_innobase::index_end(void)
@@ -2290,6 +2357,15 @@ ha_innobase::external_lock(
trx_search_latch_release_if_reserved(trx);
}
+ if (trx->auto_inc_lock) {
+
+ /* If we had reserved the auto-inc lock for
+ some table in this SQL statement, we release
+ it now */
+
+ row_unlock_table_autoinc_for_mysql(trx);
+ }
+
if (!(thd->options
& (OPTION_NOT_AUTO_COMMIT | OPTION_BEGIN))) {
innobase_commit(thd, trx);
@@ -2452,7 +2528,9 @@ ha_innobase::create(
const char* name, /* in: table name */
TABLE* form, /* in: information on table
columns and indexes */
- HA_CREATE_INFO* create_info) /* in: ??????? */
+ HA_CREATE_INFO* create_info) /* in: more information of the
+ created table, contains also the
+ create statement string */
{
int error;
dict_table_t* innobase_table;
@@ -2543,6 +2621,19 @@ ha_innobase::create(
}
}
+ error = row_table_add_foreign_constraints(trx,
+ create_info->create_statement, norm_name);
+
+ error = convert_error_code_to_mysql(error);
+
+ if (error) {
+ trx_commit_for_mysql(trx);
+
+ trx_free_for_mysql(trx);
+
+ DBUG_RETURN(error);
+ }
+
trx_commit_for_mysql(trx);
innobase_table = dict_table_get(norm_name, NULL);
@@ -2563,8 +2654,8 @@ ha_innobase::create(
Drops a table from an InnoDB database. Before calling this function,
MySQL calls innobase_commit to commit the transaction of the current user.
Then the current user cannot have locks set on the table. Drop table
-operation inside InnoDB will wait sleeping in a loop until no other
-user has locks on the table. */
+operation inside InnoDB will remove all locks any user has on the table
+inside InnoDB. */
int
ha_innobase::delete_table(
@@ -2606,6 +2697,53 @@ ha_innobase::delete_table(
DBUG_RETURN(error);
}
+/*********************************************************************
+Removes all tables in the named database inside InnoDB. */
+
+int
+innobase_drop_database(
+/*===================*/
+ /* out: error number */
+ char* path) /* in: database path; inside InnoDB the name
+ of the last directory in the path is used as
+ the database name: for example, in 'mysql/data/test'
+ the database name is 'test' */
+{
+ ulint len = 0;
+ trx_t* trx;
+ char* ptr;
+ int error;
+ char namebuf[10000];
+
+ ptr = strend(path) - 2;
+
+ while (ptr >= path && *ptr != '\\' && *ptr != '/') {
+ ptr--;
+ len++;
+ }
+
+ ptr++;
+
+ memcpy(namebuf, ptr, len);
+ namebuf[len] = '/';
+ namebuf[len + 1] = '\0';
+
+ trx = trx_allocate_for_mysql();
+
+ error = row_drop_database_for_mysql(namebuf, trx);
+
+ /* Tell the InnoDB server that there might be work for
+ utility threads: */
+
+ srv_active_wake_master_thread();
+
+ trx_free_for_mysql(trx);
+
+ error = convert_error_code_to_mysql(error);
+
+ return(error);
+}
+
/*************************************************************************
Renames an InnoDB table. */
@@ -2742,12 +2880,13 @@ improve the algorithm of filesort.cc. */
ha_rows
ha_innobase::estimate_number_of_rows(void)
/*======================================*/
- /* out: upper bound of rows, currently 32-bit int
- or uint */
+ /* out: upper bound of rows */
{
row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt;
- dict_table_t* ib_table;
-
+ dict_index_t* index;
+ ulonglong estimate;
+ ulonglong data_file_length;
+
if (prebuilt->trx) {
prebuilt->trx->op_info =
(char*) "estimating upper bound of table size";
@@ -2755,21 +2894,21 @@ ha_innobase::estimate_number_of_rows(void)
DBUG_ENTER("info");
- ib_table = prebuilt->table;
-
- dict_update_statistics(ib_table);
-
- data_file_length = ((ulonglong)
- ib_table->stat_clustered_index_size)
- * UNIV_PAGE_SIZE;
+ dict_update_statistics(prebuilt->table);
- /* The minimum clustered index record size is 20 bytes */
+ index = dict_table_get_first_index_noninline(prebuilt->table);
+
+ data_file_length = ((ulonglong) index->stat_n_leaf_pages)
+ * UNIV_PAGE_SIZE;
+ /* Calculate a minimum length for a clustered index record */
+ estimate = data_file_length / dict_index_calc_min_rec_len(index);
+
if (prebuilt->trx) {
prebuilt->trx->op_info = (char*) "";
}
-
- return((ha_rows) (1000 + data_file_length / 20));
+
+ return((ha_rows) estimate);
}
/*************************************************************************
@@ -2784,10 +2923,10 @@ ha_innobase::scan_time()
{
row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt;
- /* In the following formula we assume that scanning 5 pages
+ /* In the following formula we assume that scanning 10 pages
takes the same time as a disk seek: */
- return((double) (1 + prebuilt->table->stat_clustered_index_size / 5));
+ return((double) (prebuilt->table->stat_clustered_index_size / 10));
}
/*************************************************************************
@@ -2802,8 +2941,9 @@ ha_innobase::info(
row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt;
dict_table_t* ib_table;
dict_index_t* index;
- uint rec_per_key;
- uint i;
+ ulong rec_per_key;
+ ulong j;
+ ulong i;
DBUG_ENTER("info");
@@ -2821,7 +2961,7 @@ ha_innobase::info(
}
if (flag & HA_STATUS_VARIABLE) {
- records = ib_table->stat_n_rows;
+ records = (ha_rows)ib_table->stat_n_rows;
deleted = 0;
data_file_length = ((ulonglong)
ib_table->stat_clustered_index_size)
@@ -2847,16 +2987,24 @@ ha_innobase::info(
}
for (i = 0; i < table->keys; i++) {
- if (index->stat_n_diff_key_vals == 0) {
- rec_per_key = records;
- } else {
- rec_per_key = records /
- index->stat_n_diff_key_vals;
+ for (j = 0; j < table->key_info[i].key_parts; j++) {
+
+ if (index->stat_n_diff_key_vals[j + 1] == 0) {
+
+ rec_per_key = records;
+ } else {
+ rec_per_key = (ulong)(records /
+ index->stat_n_diff_key_vals[j + 1]);
+ }
+
+ if (rec_per_key == 0) {
+ rec_per_key = 1;
+ }
+
+ table->key_info[i].rec_per_key[j]
+ = rec_per_key;
}
-
- table->key_info[i].rec_per_key[
- table->key_info[i].key_parts - 1]
- = rec_per_key;
+
index = dict_table_get_next_index_noninline(index);
}
}
diff --git a/sql/ha_innobase.h b/sql/ha_innobase.h
index daa987dd757..3b0144a4fca 100644
--- a/sql/ha_innobase.h
+++ b/sql/ha_innobase.h
@@ -177,3 +177,5 @@ uint innobase_get_free_space(void);
int innobase_commit(THD *thd, void* trx_handle);
int innobase_rollback(THD *thd, void* trx_handle);
int innobase_close_connection(THD *thd);
+int innobase_drop_database(char *path);
+
diff --git a/sql/handler.cc b/sql/handler.cc
index 4876a73d2a9..742bcd3aa0a 100644
--- a/sql/handler.cc
+++ b/sql/handler.cc
@@ -181,6 +181,13 @@ int ha_panic(enum ha_panic_function flag)
return error;
} /* ha_panic */
+void ha_drop_database(char* path)
+{
+#ifdef HAVE_INNOBASE_DB
+ if (!innodb_skip)
+ innobase_drop_database(path);
+#endif
+}
void ha_close_connection(THD* thd)
{
diff --git a/sql/handler.h b/sql/handler.h
index 2f54dafe82c..e36a81cf7c7 100644
--- a/sql/handler.h
+++ b/sql/handler.h
@@ -150,6 +150,7 @@ typedef struct st_ha_create_information
ulonglong auto_increment_value;
char *comment,*password;
char *data_file_name, *index_file_name;
+ char *create_statement;
uint options; /* OR of HA_CREATE_ options */
uint raid_type,raid_chunks;
ulong raid_chunksize;
@@ -347,6 +348,7 @@ enum db_type ha_checktype(enum db_type database_type);
int ha_create_table(const char *name, HA_CREATE_INFO *create_info,
bool update_create_info);
int ha_delete_table(enum db_type db_type, const char *path);
+void ha_drop_database(char* path);
void ha_key_cache(void);
int ha_start_stmt(THD *thd);
int ha_commit_trans(THD *thd, THD_TRANS *trans);
diff --git a/sql/mysqld.cc b/sql/mysqld.cc
index 1a84877df05..65b130bfd82 100644
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -21,6 +21,7 @@
#include "sql_acl.h"
#include "slave.h"
#include "sql_repl.h"
+#include "repl_failsafe.h"
#include "stacktrace.h"
#ifdef HAVE_BERKELEY_DB
#include "ha_berkeley.h"
@@ -76,9 +77,7 @@ extern "C" { // Because of SCO 3.2V4.2
#include <sys/select.h>
#endif
#include <sys/utsname.h>
-#else
-#include <windows.h>
-#endif // __WIN__
+#endif /* __WIN__ */
#ifdef HAVE_LIBWRAP
#include <tcpd.h>
@@ -1684,6 +1683,7 @@ int main(int argc, char **argv)
(void) pthread_mutex_init(&LOCK_slave, MY_MUTEX_INIT_FAST);
(void) pthread_mutex_init(&LOCK_server_id, MY_MUTEX_INIT_FAST);
(void) pthread_mutex_init(&LOCK_user_conn, MY_MUTEX_INIT_FAST);
+ (void) pthread_mutex_init(&LOCK_rpl_status, MY_MUTEX_INIT_FAST);
(void) pthread_cond_init(&COND_thread_count,NULL);
(void) pthread_cond_init(&COND_refresh,NULL);
(void) pthread_cond_init(&COND_thread_cache,NULL);
@@ -1692,6 +1692,7 @@ int main(int argc, char **argv)
(void) pthread_cond_init(&COND_binlog_update, NULL);
(void) pthread_cond_init(&COND_slave_stopped, NULL);
(void) pthread_cond_init(&COND_slave_start, NULL);
+ (void) pthread_cond_init(&COND_rpl_status, NULL);
init_signals();
if (set_default_charset_by_name(default_charset, MYF(MY_WME)))
@@ -2595,7 +2596,7 @@ enum options {
OPT_REPORT_USER, OPT_REPORT_PASSWORD, OPT_REPORT_PORT,
OPT_SHOW_SLAVE_AUTH_INFO, OPT_OLD_RPL_COMPAT,
OPT_SLAVE_LOAD_TMPDIR, OPT_NO_MIX_TYPE,
- OPT_RPL_RECOVERY_RANK
+ OPT_RPL_RECOVERY_RANK,OPT_INIT_RPL_ROLE
};
static struct option long_options[] = {
@@ -2631,6 +2632,7 @@ static struct option long_options[] = {
{"enable-pstack", no_argument, 0, (int) OPT_DO_PSTACK},
{"exit-info", optional_argument, 0, 'T'},
{"flush", no_argument, 0, (int) OPT_FLUSH},
+ {"init-rpl-role", required_argument, 0, (int) OPT_INIT_RPL_ROLE},
/* We must always support this option to make scripts like mysqltest easier
to do */
{"innodb_data_file_path", required_argument, 0,
@@ -3052,6 +3054,8 @@ struct show_var_st status_vars[]= {
{"Open_streams", (char*) &my_stream_opened, SHOW_INT_CONST},
{"Opened_tables", (char*) &opened_tables, SHOW_LONG},
{"Questions", (char*) 0, SHOW_QUESTION},
+ {"Rpl_status", (char*) 0,
+ SHOW_RPL_STATUS},
{"Select_full_join", (char*) &select_full_join_count, SHOW_LONG},
{"Select_full_range_join", (char*) &select_full_range_join_count, SHOW_LONG},
{"Select_range", (char*) &select_range_count, SHOW_LONG},
@@ -3489,6 +3493,17 @@ static void get_options(int argc,char **argv)
opt_log_slave_updates = 1;
break;
+ case (int) OPT_INIT_RPL_ROLE:
+ {
+ int role;
+ if ((role=find_type(optarg, &rpl_role_typelib, 2)) <= 0)
+ {
+ fprintf(stderr, "Unknown replication role: %s\n", optarg);
+ exit(1);
+ }
+ rpl_status = (role == 1) ? RPL_AUTH_MASTER : RPL_IDLE_SLAVE;
+ break;
+ }
case (int)OPT_REPLICATE_IGNORE_DB:
{
i_string *db = new i_string(optarg);
diff --git a/sql/repl_failsafe.cc b/sql/repl_failsafe.cc
new file mode 100644
index 00000000000..40eb3b8bb7c
--- /dev/null
+++ b/sql/repl_failsafe.cc
@@ -0,0 +1,43 @@
+/* Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB & Sasha
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+// Sasha Pachev <sasha@mysql.com> is currently in charge of this file
+
+#include "mysql_priv.h"
+#include "repl_failsafe.h"
+
+RPL_STATUS rpl_status=RPL_NULL;
+pthread_mutex_t LOCK_rpl_status;
+pthread_cond_t COND_rpl_status;
+
+const char *rpl_role_type[] = {"MASTER","SLAVE",NullS};
+TYPELIB rpl_role_typelib = {array_elements(rpl_role_type)-1,"",
+ rpl_role_type};
+
+const char* rpl_status_type[] = {"AUTH_MASTER","ACTIVE_SLAVE","IDLE_SLAVE",
+ "LOST_SOLDIER","TROOP_SOLDIER",
+ "RECOVERY_CAPTAIN","NULL",NullS};
+TYPELIB rpl_status_typelib= {array_elements(rpl_status_type)-1,"",
+ rpl_status_type};
+
+void change_rpl_status(RPL_STATUS from_status, RPL_STATUS to_status)
+{
+ pthread_mutex_lock(&LOCK_rpl_status);
+ if (rpl_status == from_status || rpl_status == RPL_ANY)
+ rpl_status = to_status;
+ pthread_mutex_unlock(&LOCK_rpl_status);
+}
+
diff --git a/sql/repl_failsafe.h b/sql/repl_failsafe.h
new file mode 100644
index 00000000000..42b386e6255
--- /dev/null
+++ b/sql/repl_failsafe.h
@@ -0,0 +1,16 @@
+#ifndef REPL_FAILSAFE_H
+#define REPL_FAILSAFE_H
+
+typedef enum {RPL_AUTH_MASTER=0,RPL_ACTIVE_SLAVE,RPL_IDLE_SLAVE,
+ RPL_LOST_SOLDIER,RPL_TROOP_SOLDIER,
+ RPL_RECOVERY_CAPTAIN,RPL_NULL /* inactive */,
+ RPL_ANY /* wild card used by change_rpl_status */ } RPL_STATUS;
+extern RPL_STATUS rpl_status;
+
+extern pthread_mutex_t LOCK_rpl_status;
+extern pthread_cond_t COND_rpl_status;
+extern TYPELIB rpl_role_typelib, rpl_status_typelib;
+extern const char* rpl_role_type[], *rpl_status_type[];
+
+void change_rpl_status(RPL_STATUS from_status, RPL_STATUS to_status);
+#endif
diff --git a/sql/slave.cc b/sql/slave.cc
index d2e038bef88..7c065a89c9d 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -21,6 +21,7 @@
#include "mini_client.h"
#include "slave.h"
#include "sql_repl.h"
+#include "repl_failsafe.h"
#include <thr_alarm.h>
#include <my_dir.h>
@@ -1220,6 +1221,7 @@ position %s",
thd->proc_info = "Waiting for slave mutex on exit";
pthread_mutex_lock(&LOCK_slave);
slave_running = 0;
+ change_rpl_status(RPL_ACTIVE_SLAVE,RPL_IDLE_SLAVE);
abort_slave = 0;
save_temporary_tables = thd->temporary_tables;
thd->temporary_tables = 0; // remove tempation from destructor to close them
@@ -1257,6 +1259,7 @@ static int safe_connect(THD* thd, MYSQL* mysql, MASTER_INFO* mi)
if(!slave_was_killed)
{
+ change_rpl_status(RPL_IDLE_SLAVE,RPL_ACTIVE_SLAVE);
mysql_log.write(thd, COM_CONNECT_OUT, "%s@%s:%d",
mi->user, mi->host, mi->port);
#ifdef SIGNAL_WITH_VIO_CLOSE
@@ -1298,9 +1301,15 @@ static int safe_reconnect(THD* thd, MYSQL* mysql, MASTER_INFO* mi)
mi->connect_retry);
safe_sleep(thd, mi->connect_retry);
}
- if (err_count++ == master_retry_count)
+ /* by default we try forever. The reason is that failure will trigger
+ master election, so if the user did not set master_retry_count we
+ do not want to have electioin triggered on the first failure to
+ connect
+ */
+ if (master_retry_count && err_count++ == master_retry_count)
{
slave_was_killed=1;
+ change_rpl_status(RPL_ACTIVE_SLAVE,RPL_LOST_SOLDIER);
break;
}
}
diff --git a/sql/sql_db.cc b/sql/sql_db.cc
index 05b380ebd77..64ad205803e 100644
--- a/sql/sql_db.cc
+++ b/sql/sql_db.cc
@@ -158,6 +158,7 @@ int mysql_rm_db(THD *thd,char *db,bool if_exists, bool silent)
error = -1;
if ((deleted=mysql_rm_known_files(thd, dirp, db, path,0)) >= 0 && thd)
{
+ ha_drop_database(path);
if (!silent)
{
if (!thd->query)
@@ -333,7 +334,7 @@ bool mysql_change_db(THD *thd,const char *name)
x_free(dbname);
DBUG_RETURN(1);
}
- DBUG_PRINT("general",("Use database: %s", dbname));
+ DBUG_PRINT("info",("Use database: %s", dbname));
if (test_all_bits(thd->master_access,DB_ACLS))
db_access=DB_ACLS;
else
diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc
index 7104e41fc46..b83a50aec16 100644
--- a/sql/sql_parse.cc
+++ b/sql/sql_parse.cc
@@ -120,7 +120,7 @@ static bool check_user(THD *thd,enum_server_command command, const char *user,
protocol_version == 9 ||
!(thd->client_capabilities &
CLIENT_LONG_PASSWORD));
- DBUG_PRINT("general",
+ DBUG_PRINT("info",
("Capabilities: %d packet_length: %d Host: '%s' User: '%s' Using password: %s Access: %u db: '%s'",
thd->client_capabilities, thd->max_packet_length,
thd->host_or_ip, thd->priv_user,
@@ -323,7 +323,7 @@ check_connections(THD *thd)
*/
DBUG_PRINT("info", (("check_connections called by thread %d"),
thd->thread_id));
- DBUG_PRINT("general",("New connection received on %s",
+ DBUG_PRINT("info",("New connection received on %s",
vio_description(net->vio)));
if (!thd->host) // If TCP/IP connection
{
@@ -347,15 +347,15 @@ check_connections(THD *thd)
if (connect_errors > max_connect_errors)
return(ER_HOST_IS_BLOCKED);
}
- DBUG_PRINT("general",("Host: %s ip: %s",
- thd->host ? thd->host : "unknown host",
- thd->ip ? thd->ip : "unknown ip"));
+ DBUG_PRINT("info",("Host: %s ip: %s",
+ thd->host ? thd->host : "unknown host",
+ thd->ip ? thd->ip : "unknown ip"));
if (acl_check_host(thd->host,thd->ip))
return(ER_HOST_NOT_PRIVILEGED);
}
else /* Hostname given means that the connection was on a socket */
{
- DBUG_PRINT("general",("Host: %s",thd->host));
+ DBUG_PRINT("info",("Host: %s",thd->host));
thd->host_or_ip=thd->host;
thd->ip=0;
bzero((char*) &thd->remote,sizeof(struct sockaddr));
@@ -731,17 +731,17 @@ bool do_command(THD *thd)
net_new_transaction(net);
if ((packet_length=my_net_read(net)) == packet_error)
{
- DBUG_PRINT("general",("Got error reading command from socket %s",
- vio_description(net->vio) ));
+ DBUG_PRINT("info",("Got error reading command from socket %s",
+ vio_description(net->vio) ));
return TRUE;
}
else
{
packet=(char*) net->read_pos;
command = (enum enum_server_command) (uchar) packet[0];
- DBUG_PRINT("general",("Command on %s = %d (%s)",
- vio_description(net->vio), command,
- command_name[command]));
+ DBUG_PRINT("info",("Command on %s = %d (%s)",
+ vio_description(net->vio), command,
+ command_name[command]));
}
net->timeout=old_timeout; // Timeout for writing
DBUG_RETURN(dispatch_command(command,thd, packet+1, packet_length));
diff --git a/sql/sql_show.cc b/sql/sql_show.cc
index 5a9777e24a4..19c3d89caaf 100644
--- a/sql/sql_show.cc
+++ b/sql/sql_show.cc
@@ -20,6 +20,7 @@
#include "mysql_priv.h"
#include "sql_select.h" // For select_describe
#include "sql_acl.h"
+#include "repl_failsafe.h"
#include <my_dir.h>
#ifdef HAVE_BERKELEY_DB
@@ -1164,6 +1165,9 @@ int mysqld_show(THD *thd, const char *wild, show_var_st *variables)
case SHOW_QUESTION:
net_store_data(&packet2,(uint32) thd->query_id);
break;
+ case SHOW_RPL_STATUS:
+ net_store_data(&packet2, rpl_status_type[(int)rpl_status]);
+ break;
case SHOW_OPENTABLES:
net_store_data(&packet2,(uint32) cached_tables());
break;
diff --git a/sql/sql_table.cc b/sql/sql_table.cc
index 9306a6d4d9a..d76c6bbd627 100644
--- a/sql/sql_table.cc
+++ b/sql/sql_table.cc
@@ -598,6 +598,7 @@ int mysql_create_table(THD *thd,const char *db, const char *table_name,
thd->proc_info="creating table";
+ create_info->create_statement = thd->query;
create_info->table_options=db_options;
if (rea_create_table(path, create_info, fields, key_count,
key_info_buffer))
diff --git a/sql/structs.h b/sql/structs.h
index 2f6f850bc9e..439384c7191 100644
--- a/sql/structs.h
+++ b/sql/structs.h
@@ -140,6 +140,7 @@ enum SHOW_TYPE { SHOW_LONG,SHOW_CHAR,SHOW_INT,SHOW_CHAR_PTR,SHOW_BOOL,
,SHOW_SSL_CTX_SESS_TIMEOUTS, SHOW_SSL_CTX_SESS_CACHE_FULL
,SHOW_SSL_GET_CIPHER_LIST
#endif /* HAVE_OPENSSL */
+ ,SHOW_RPL_STATUS
};
enum SHOW_COMP_OPTION { SHOW_OPTION_YES, SHOW_OPTION_NO, SHOW_OPTION_DISABLED};
diff --git a/sql/table.cc b/sql/table.cc
index 1ed856f7854..927119f45de 100644
--- a/sql/table.cc
+++ b/sql/table.cc
@@ -261,7 +261,7 @@ int openfrm(const char *name, const char *alias, uint db_stat, uint prgflag,
outparam->comment=strdup_root(&outparam->mem_root,
(char*) head+47);
- DBUG_PRINT("form",("i_count: %d i_parts: %d index: %d n_length: %d int_length: %d", interval_count,interval_parts, outparam->keys,n_length,int_length));
+ DBUG_PRINT("info",("i_count: %d i_parts: %d index: %d n_length: %d int_length: %d", interval_count,interval_parts, outparam->keys,n_length,int_length));
if (!(field_ptr = (Field **)
alloc_root(&outparam->mem_root,
diff --git a/support-files/mysql.spec.sh b/support-files/mysql.spec.sh
index b136fbf5bf6..95d42fdc24d 100644
--- a/support-files/mysql.spec.sh
+++ b/support-files/mysql.spec.sh
@@ -423,6 +423,9 @@ fi
%files bench
%attr(-, root, root) /usr/share/sql-bench
%attr(-, root, root) /usr/share/mysql-test
+%attr(755, root, root) /usr/bin/mysqlmanager
+%attr(755, root, root) /usr/bin/mysqlmanager-pwgen
+%attr(755, root, root) /usr/bin/mysqlmanagerc
%files Max
%attr(755, root, root) /usr/sbin/mysqld-max
diff --git a/tools/mysqlmanager.c b/tools/mysqlmanager.c
index 3a95b4e9a49..9cb04505e2f 100644
--- a/tools/mysqlmanager.c
+++ b/tools/mysqlmanager.c
@@ -88,6 +88,8 @@
#define MAX_LAUNCHER_MSG 256
#endif
+#define MAX_RETRY_COUNT 100
+
/* Variable naming convention - if starts with manager_, either is set
directly by the user, or used closely in ocnjunction with a variable
set by the user
@@ -297,9 +299,8 @@ static int authenticate(struct manager_thd* thd);
static char* read_line(struct manager_thd* thd); /* returns pointer to end of
line
*/
-static pthread_handler_decl(process_connection,arg);
-static pthread_handler_decl(process_launcher_messages,
- __attribute__((unused)) arg);
+static pthread_handler_decl(process_connection, arg);
+static pthread_handler_decl(process_launcher_messages, arg);
static int exec_line(struct manager_thd* thd,char* buf,char* buf_end);
#ifdef DO_STACKTRACE
@@ -1024,7 +1025,8 @@ static void log_msg(const char* fmt, int msg_type, va_list args)
pthread_mutex_unlock(&lock_log);
}
-#define LOG_MSG_FUNC(type,TYPE) inline static void type \
+/* No 'inline' here becasue functions with ... can't do that portable */
+#define LOG_MSG_FUNC(type,TYPE) static void type \
(const char* fmt,...) { \
va_list args; \
va_start(args,fmt); \
@@ -1038,7 +1040,7 @@ LOG_MSG_FUNC(log_info,LOG_INFO)
#ifndef DBUG_OFF
LOG_MSG_FUNC(log_debug,LOG_DEBUG)
#else
-inline void log_debug(const char* __attribute__((unused)) fmt,...) {}
+void log_debug(const char* __attribute__((unused)) fmt,...) {}
#endif
static pthread_handler_decl(process_launcher_messages,
@@ -1161,10 +1163,15 @@ static char* read_line(struct manager_thd* thd)
{
int len,read_len;
char *block_end,*p_back;
+ uint retry_count=0;
+
read_len = min(NET_BLOCK,(uint)(buf_end-p));
- if ((len=vio_read(thd->vio,p,read_len))<=0)
+ while ((len=vio_read(thd->vio,p,read_len))<=0)
{
- log_err("Error reading command from client");
+ if (vio_should_retry(thd->vio) && retry_count++ < MAX_RETRY_COUNT)
+ continue;
+ log_err("Error reading command from client (Error: %d)",
+ vio_errno(thd->vio));
thd->fatal=1;
return 0;
}
@@ -1367,6 +1374,12 @@ static int run_server_loop()
int client_sock;
uint len;
Vio* vio;
+ pthread_attr_t thr_attr;
+ (void) pthread_attr_init(&thr_attr);
+#if !defined(HAVE_DEC_3_2_THREADS)
+ pthread_attr_setscope(&thr_attr,PTHREAD_SCOPE_SYSTEM);
+ (void) pthread_attr_setdetachstate(&thr_attr,PTHREAD_CREATE_DETACHED);
+#endif
for (;!shutdown_requested;)
{
@@ -1412,7 +1425,7 @@ static int run_server_loop()
manager_thd_free(thd);
continue;
}
- else if (pthread_create(&th,0,process_connection,(void*)thd))
+ else if (pthread_create(&th,&thr_attr,process_connection,(void*)thd))
{
client_msg(vio,MANAGER_INTERNAL_ERR,"Could not create thread, errno=%d",
errno);
@@ -1420,6 +1433,7 @@ static int run_server_loop()
continue;
}
}
+ (void) pthread_attr_destroy(&thr_attr);
return 0;
}
@@ -1543,10 +1557,11 @@ static struct manager_exec* manager_exec_new(char* arg_start,char* arg_end)
tmp->error="Too few arguments";
return tmp;
}
- tmp->data_buf=(char*)tmp+sizeof(*tmp);
+ /* We have to allocate 'args' first as this must be alligned */
+ tmp->args=(char**)(tmp +1);
+ tmp->data_buf= (char*) (tmp->args + num_args);
memcpy(tmp->data_buf,arg_start,arg_len);
tmp->data_buf_size=arg_len;
- tmp->args=(char**)(tmp->data_buf+arg_len);
tmp->num_args=num_args;
tmp->ident=tmp->data_buf;
tmp->ident_len=strlen(tmp->ident);
@@ -1658,13 +1673,20 @@ static void init_user_hash()
static void init_globals()
{
+ pthread_attr_t thr_attr;
if (hash_init(&exec_hash,1024,0,0,get_exec_key,manager_exec_free,MYF(0)))
die("Exec hash initialization failed");
if (!one_thread)
{
+ (void) pthread_attr_init(&thr_attr);
+#if !defined(HAVE_DEC_3_2_THREADS)
+ pthread_attr_setscope(&thr_attr,PTHREAD_SCOPE_SYSTEM);
+ (void) pthread_attr_setdetachstate(&thr_attr,PTHREAD_CREATE_DETACHED);
+#endif
fork_launcher();
- if (pthread_create(&launch_msg_th,0,process_launcher_messages,0))
+ if (pthread_create(&launch_msg_th,&thr_attr,process_launcher_messages,0))
die("Could not start launcher message handler thread");
+ /* (void) pthread_attr_destroy(&thr_attr); */
}
init_user_hash();
loop_th=pthread_self();