diff options
Diffstat (limited to 'storage')
110 files changed, 9668 insertions, 875 deletions
diff --git a/storage/archive/CMakeLists.txt b/storage/archive/CMakeLists.txt index a631f194b1a..127942d4043 100644 --- a/storage/archive/CMakeLists.txt +++ b/storage/archive/CMakeLists.txt @@ -3,6 +3,7 @@ SET(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX") INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/zlib ${CMAKE_SOURCE_DIR}/sql + ${CMAKE_SOURCE_DIR}/regex ${CMAKE_SOURCE_DIR}/extra/yassl/include) ADD_LIBRARY(archive azio.c ha_archive.cc ha_archive.h) TARGET_LINK_LIBRARIES(archive zlib mysys dbug strings) diff --git a/storage/blackhole/CMakeLists.txt b/storage/blackhole/CMakeLists.txt index ea3a7eae38e..a90f8e14ca0 100644 --- a/storage/blackhole/CMakeLists.txt +++ b/storage/blackhole/CMakeLists.txt @@ -2,5 +2,6 @@ SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX") SET(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX") INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/sql + ${CMAKE_SOURCE_DIR}/regex ${CMAKE_SOURCE_DIR}/extra/yassl/include) ADD_LIBRARY(blackhole ha_blackhole.cc ha_blackhole.h) diff --git a/storage/blackhole/plug.in b/storage/blackhole/plug.in new file mode 100644 index 00000000000..725db0facba --- /dev/null +++ b/storage/blackhole/plug.in @@ -0,0 +1,6 @@ +MYSQL_STORAGE_ENGINE(blackhole,,[Blackhole Storage Engine], + [Basic Write-only Read-never tables], [max,max-no-ndb]) +MYSQL_PLUGIN_DIRECTORY(blackhole, [storage/blackhole]) +MYSQL_PLUGIN_STATIC(blackhole, [libblackhole.a]) +MYSQL_PLUGIN_DYNAMIC(blackhole, [ha_blackhole.la]) + diff --git a/storage/csv/CMakeLists.txt b/storage/csv/CMakeLists.txt index 28748527cc3..55e9b50fbfc 100644 --- a/storage/csv/CMakeLists.txt +++ b/storage/csv/CMakeLists.txt @@ -2,5 +2,6 @@ SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX") SET(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX") INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/sql + ${CMAKE_SOURCE_DIR}/regex ${CMAKE_SOURCE_DIR}/extra/yassl/include) ADD_LIBRARY(csv ha_tina.cc ha_tina.h) diff --git a/storage/csv/ha_tina.cc b/storage/csv/ha_tina.cc index bec236becd0..2fe2afeb470 100644 --- a/storage/csv/ha_tina.cc +++ b/storage/csv/ha_tina.cc @@ -157,6 +157,7 @@ static int tina_init_func() VOID(pthread_mutex_init(&tina_mutex,MY_MUTEX_INIT_FAST)); (void) hash_init(&tina_open_tables,system_charset_info,32,0,0, (hash_get_key) tina_get_key,0,0); + bzero(&tina_hton, sizeof(handlerton)); tina_hton.state= SHOW_OPTION_YES; tina_hton.db_type= DB_TYPE_CSV_DB; tina_hton.create= tina_create_handler; @@ -229,6 +230,11 @@ static TINA_SHARE *get_share(const char *table_name, TABLE *table) MY_REPLACE_EXT|MY_UNPACK_FILENAME); fn_format(meta_file_name, table_name, "", CSM_EXT, MY_REPLACE_EXT|MY_UNPACK_FILENAME); + + if (my_stat(share->data_file_name, &file_stat, MYF(MY_WME)) == NULL) + goto error; + share->saved_data_file_length= file_stat.st_size; + if (my_hash_insert(&tina_open_tables, (byte*) share)) goto error; thr_lock_init(&share->lock); @@ -250,10 +256,6 @@ static TINA_SHARE *get_share(const char *table_name, TABLE *table) */ if (read_meta_file(share->meta_file, &share->rows_recorded)) share->crashed= TRUE; - - if (my_stat(share->data_file_name, &file_stat, MYF(MY_WME)) == NULL) - goto error2; - share->saved_data_file_length= file_stat.st_size; } share->use_count++; pthread_mutex_unlock(&tina_mutex); diff --git a/storage/csv/plug.in b/storage/csv/plug.in new file mode 100644 index 00000000000..bbc69680fcd --- /dev/null +++ b/storage/csv/plug.in @@ -0,0 +1,5 @@ +MYSQL_STORAGE_ENGINE(csv,, [CSV Storage Engine], + [Stores tables in text CSV format]) +MYSQL_PLUGIN_DIRECTORY(csv, [storage/csv]) +MYSQL_PLUGIN_STATIC(csv, [libcsv.a]) +MYSQL_PLUGIN_MANDATORY(csv) dnl Used for logging diff --git a/storage/example/CMakeLists.txt b/storage/example/CMakeLists.txt index f4579aa0c66..384631a66c4 100644 --- a/storage/example/CMakeLists.txt +++ b/storage/example/CMakeLists.txt @@ -2,5 +2,6 @@ SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX") SET(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX") INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/sql + ${CMAKE_SOURCE_DIR}/regex ${CMAKE_SOURCE_DIR}/extra/yassl/include) ADD_LIBRARY(example ha_example.cc) diff --git a/storage/example/ha_example.cc b/storage/example/ha_example.cc index feabad2e356..704ea757749 100644 --- a/storage/example/ha_example.cc +++ b/storage/example/ha_example.cc @@ -67,6 +67,7 @@ #pragma implementation // gcc: Class implementation #endif +#define MYSQL_SERVER 1 #include "mysql_priv.h" #include "ha_example.h" diff --git a/storage/federated/CMakeLists.txt b/storage/federated/CMakeLists.txt new file mode 100644 index 00000000000..97a4f318a11 --- /dev/null +++ b/storage/federated/CMakeLists.txt @@ -0,0 +1,7 @@ +SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX") +SET(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX") + +INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/sql + ${CMAKE_SOURCE_DIR}/regex + ${CMAKE_SOURCE_DIR}/extra/yassl/include) +ADD_LIBRARY(federated ha_federated.cc) diff --git a/storage/federated/Makefile.am b/storage/federated/Makefile.am new file mode 100644 index 00000000000..813455ed5c7 --- /dev/null +++ b/storage/federated/Makefile.am @@ -0,0 +1,52 @@ +# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +#called from the top level Makefile + +MYSQLDATAdir = $(localstatedir) +MYSQLSHAREdir = $(pkgdatadir) +MYSQLBASEdir= $(prefix) +MYSQLLIBdir= $(pkglibdir) +INCLUDES = -I$(top_srcdir)/include -I$(top_builddir)/include \ + -I$(top_srcdir)/regex \ + -I$(top_srcdir)/sql \ + -I$(srcdir) +WRAPLIBS= + +LDADD = + +DEFS = @DEFS@ + +noinst_HEADERS = ha_federated.h + +EXTRA_LTLIBRARIES = ha_federated.la +pkglib_LTLIBRARIES = @plugin_federated_shared_target@ +ha_federated_la_LDFLAGS = -module -rpath $(MYSQLLIBdir) +ha_federated_la_CXXFLAGS= $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN +ha_federated_la_CFLAGS = $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN +ha_federated_la_SOURCES = ha_federated.cc + + +EXTRA_LIBRARIES = libfederated.a +noinst_LIBRARIES = @plugin_federated_static_target@ +libfederated_a_CXXFLAGS = $(AM_CFLAGS) +libfederated_a_CFLAGS = $(AM_CFLAGS) +libfederated_a_SOURCES= ha_federated.cc + + +EXTRA_DIST = CMakeLists.txt +# Don't update the files from bitkeeper +%::SCCS/s.% diff --git a/storage/federated/ha_federated.cc b/storage/federated/ha_federated.cc new file mode 100644 index 00000000000..98f48b09ba6 --- /dev/null +++ b/storage/federated/ha_federated.cc @@ -0,0 +1,2902 @@ +/* Copyright (C) 2004 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + + MySQL Federated Storage Engine + + ha_federated.cc - MySQL Federated Storage Engine + Patrick Galbraith and Brian Aker, 2004 + + This is a handler which uses a foreign database as the data file, as + opposed to a handler like MyISAM, which uses .MYD files locally. + + How this handler works + ---------------------------------- + Normal database files are local and as such: You create a table called + 'users', a file such as 'users.MYD' is created. A handler reads, inserts, + deletes, updates data in this file. The data is stored in particular format, + so to read, that data has to be parsed into fields, to write, fields have to + be stored in this format to write to this data file. + + With MySQL Federated storage engine, there will be no local files + for each table's data (such as .MYD). A foreign database will store + the data that would normally be in this file. This will necessitate + the use of MySQL client API to read, delete, update, insert this + data. The data will have to be retrieve via an SQL call "SELECT * + FROM users". Then, to read this data, it will have to be retrieved + via mysql_fetch_row one row at a time, then converted from the + column in this select into the format that the handler expects. + + The create table will simply create the .frm file, and within the + "CREATE TABLE" SQL, there SHALL be any of the following : + + comment=scheme://username:password@hostname:port/database/tablename + comment=scheme://username@hostname/database/tablename + comment=scheme://username:password@hostname/database/tablename + comment=scheme://username:password@hostname/database/tablename + + An example would be: + + comment=mysql://username:password@hostname:port/database/tablename + + ***IMPORTANT*** + + This is a first release, conceptual release + Only 'mysql://' is supported at this release. + + + This comment connection string is necessary for the handler to be + able to connect to the foreign server. + + + The basic flow is this: + + SQL calls issues locally -> + mysql handler API (data in handler format) -> + mysql client API (data converted to SQL calls) -> + foreign database -> mysql client API -> + convert result sets (if any) to handler format -> + handler API -> results or rows affected to local + + What this handler does and doesn't support + ------------------------------------------ + * Tables MUST be created on the foreign server prior to any action on those + tables via the handler, first version. IMPORTANT: IF you MUST use the + federated storage engine type on the REMOTE end, MAKE SURE [ :) ] That + the table you connect to IS NOT a table pointing BACK to your ORIGNAL + table! You know and have heard the screaching of audio feedback? You + know putting two mirror in front of each other how the reflection + continues for eternity? Well, need I say more?! + * There will not be support for transactions. + * There is no way for the handler to know if the foreign database or table + has changed. The reason for this is that this database has to work like a + data file that would never be written to by anything other than the + database. The integrity of the data in the local table could be breached + if there was any change to the foreign database. + * Support for SELECT, INSERT, UPDATE , DELETE, indexes. + * No ALTER TABLE, DROP TABLE or any other Data Definition Language calls. + * Prepared statements will not be used in the first implementation, it + remains to to be seen whether the limited subset of the client API for the + server supports this. + * This uses SELECT, INSERT, UPDATE, DELETE and not HANDLER for its + implementation. + * This will not work with the query cache. + + Method calls + + A two column table, with one record: + + (SELECT) + + "SELECT * FROM foo" + ha_federated::info + ha_federated::scan_time: + ha_federated::rnd_init: share->select_query SELECT * FROM foo + ha_federated::extra + + <for every row of data retrieved> + ha_federated::rnd_next + ha_federated::convert_row_to_internal_format + ha_federated::rnd_next + </for every row of data retrieved> + + ha_federated::rnd_end + ha_federated::extra + ha_federated::reset + + (INSERT) + + "INSERT INTO foo (id, ts) VALUES (2, now());" + + ha_federated::write_row + + ha_federated::reset + + (UPDATE) + + "UPDATE foo SET ts = now() WHERE id = 1;" + + ha_federated::index_init + ha_federated::index_read + ha_federated::index_read_idx + ha_federated::rnd_next + ha_federated::convert_row_to_internal_format + ha_federated::update_row + + ha_federated::extra + ha_federated::extra + ha_federated::extra + ha_federated::external_lock + ha_federated::reset + + + How do I use this handler? + -------------------------- + First of all, you need to build this storage engine: + + ./configure --with-federated-storage-engine + make + + Next, to use this handler, it's very simple. You must + have two databases running, either both on the same host, or + on different hosts. + + One the server that will be connecting to the foreign + host (client), you create your table as such: + + CREATE TABLE test_table ( + id int(20) NOT NULL auto_increment, + name varchar(32) NOT NULL default '', + other int(20) NOT NULL default '0', + PRIMARY KEY (id), + KEY name (name), + KEY other_key (other)) + ENGINE="FEDERATED" + DEFAULT CHARSET=latin1 + COMMENT='root@127.0.0.1:9306/federated/test_federated'; + + Notice the "COMMENT" and "ENGINE" field? This is where you + respectively set the engine type, "FEDERATED" and foreign + host information, this being the database your 'client' database + will connect to and use as the "data file". Obviously, the foreign + database is running on port 9306, so you want to start up your other + database so that it is indeed on port 9306, and your federated + database on a port other than that. In my setup, I use port 5554 + for federated, and port 5555 for the foreign database. + + Then, on the foreign database: + + CREATE TABLE test_table ( + id int(20) NOT NULL auto_increment, + name varchar(32) NOT NULL default '', + other int(20) NOT NULL default '0', + PRIMARY KEY (id), + KEY name (name), + KEY other_key (other)) + ENGINE="<NAME>" <-- whatever you want, or not specify + DEFAULT CHARSET=latin1 ; + + This table is exactly the same (and must be exactly the same), + except that it is not using the federated handler and does + not need the URL. + + + How to see the handler in action + -------------------------------- + + When developing this handler, I compiled the federated database with + debugging: + + ./configure --with-federated-storage-engine + --prefix=/home/mysql/mysql-build/federated/ --with-debug + + Once compiled, I did a 'make install' (not for the purpose of installing + the binary, but to install all the files the binary expects to see in the + diretory I specified in the build with --prefix, + "/home/mysql/mysql-build/federated". + + Then, I started the foreign server: + + /usr/local/mysql/bin/mysqld_safe + --user=mysql --log=/tmp/mysqld.5555.log -P 5555 + + Then, I went back to the directory containing the newly compiled mysqld, + <builddir>/sql/, started up gdb: + + gdb ./mysqld + + Then, withn the (gdb) prompt: + (gdb) run --gdb --port=5554 --socket=/tmp/mysqld.5554 --skip-innodb --debug + + Next, I open several windows for each: + + 1. Tail the debug trace: tail -f /tmp/mysqld.trace|grep ha_fed + 2. Tail the SQL calls to the foreign database: tail -f /tmp/mysqld.5555.log + 3. A window with a client open to the federated server on port 5554 + 4. A window with a client open to the federated server on port 5555 + + I would create a table on the client to the foreign server on port + 5555, and then to the federated server on port 5554. At this point, + I would run whatever queries I wanted to on the federated server, + just always remembering that whatever changes I wanted to make on + the table, or if I created new tables, that I would have to do that + on the foreign server. + + Another thing to look for is 'show variables' to show you that you have + support for federated handler support: + + show variables like '%federat%' + + and: + + show storage engines; + + Both should display the federated storage handler. + + + Testing + ------- + + There is a test for MySQL Federated Storage Handler in ./mysql-test/t, + federatedd.test It starts both a slave and master database using + the same setup that the replication tests use, with the exception that + it turns off replication, and sets replication to ignore the test tables. + After ensuring that you actually do have support for the federated storage + handler, numerous queries/inserts/updates/deletes are run, many derived + from the MyISAM tests, plus som other tests which were meant to reveal + any issues that would be most likely to affect this handler. All tests + should work! ;) + + To run these tests, go into ./mysql-test (based in the directory you + built the server in) + + ./mysql-test-run federatedd + + To run the test, or if you want to run the test and have debug info: + + ./mysql-test-run --debug federated + + This will run the test in debug mode, and you can view the trace and + log files in the ./mysql-test/var/log directory + + ls -l mysql-test/var/log/ + -rw-r--r-- 1 patg patg 17 4 Dec 12:27 current_test + -rw-r--r-- 1 patg patg 692 4 Dec 12:52 manager.log + -rw-rw---- 1 patg patg 21246 4 Dec 12:51 master-bin.000001 + -rw-rw---- 1 patg patg 68 4 Dec 12:28 master-bin.index + -rw-r--r-- 1 patg patg 1620 4 Dec 12:51 master.err + -rw-rw---- 1 patg patg 23179 4 Dec 12:51 master.log + -rw-rw---- 1 patg patg 16696550 4 Dec 12:51 master.trace + -rw-r--r-- 1 patg patg 0 4 Dec 12:28 mysqltest-time + -rw-r--r-- 1 patg patg 2024051 4 Dec 12:51 mysqltest.trace + -rw-rw---- 1 patg patg 94992 4 Dec 12:51 slave-bin.000001 + -rw-rw---- 1 patg patg 67 4 Dec 12:28 slave-bin.index + -rw-rw---- 1 patg patg 249 4 Dec 12:52 slave-relay-bin.000003 + -rw-rw---- 1 patg patg 73 4 Dec 12:28 slave-relay-bin.index + -rw-r--r-- 1 patg patg 1349 4 Dec 12:51 slave.err + -rw-rw---- 1 patg patg 96206 4 Dec 12:52 slave.log + -rw-rw---- 1 patg patg 15706355 4 Dec 12:51 slave.trace + -rw-r--r-- 1 patg patg 0 4 Dec 12:51 warnings + + Of course, again, you can tail the trace log: + + tail -f mysql-test/var/log/master.trace |grep ha_fed + + As well as the slave query log: + + tail -f mysql-test/var/log/slave.log + + Files that comprise the test suit + --------------------------------- + mysql-test/t/federated.test + mysql-test/r/federated.result + mysql-test/r/have_federated_db.require + mysql-test/include/have_federated_db.inc + + + Other tidbits + ------------- + + These were the files that were modified or created for this + Federated handler to work: + + ./configure.in + ./sql/Makefile.am + ./config/ac_macros/ha_federated.m4 + ./sql/handler.cc + ./sql/mysqld.cc + ./sql/set_var.cc + ./sql/field.h + ./sql/sql_string.h + ./mysql-test/mysql-test-run(.sh) + ./mysql-test/t/federated.test + ./mysql-test/r/federated.result + ./mysql-test/r/have_federated_db.require + ./mysql-test/include/have_federated_db.inc + ./sql/ha_federated.cc + ./sql/ha_federated.h + +*/ + + +#define MYSQL_SERVER 1 +#include "mysql_priv.h" +#include <mysql/plugin.h> + +#ifdef USE_PRAGMA_IMPLEMENTATION +#pragma implementation // gcc: Class implementation +#endif + +#include "ha_federated.h" + +#include "m_string.h" + +#include <mysql/plugin.h> + +/* Variables for federated share methods */ +static HASH federated_open_tables; // To track open tables +pthread_mutex_t federated_mutex; // To init the hash +static int federated_init= FALSE; // Checking the state of hash + +/* Variables used when chopping off trailing characters */ +static const uint sizeof_trailing_comma= sizeof(", ") - 1; +static const uint sizeof_trailing_closeparen= sizeof(") ") - 1; +static const uint sizeof_trailing_and= sizeof(" AND ") - 1; +static const uint sizeof_trailing_where= sizeof(" WHERE ") - 1; + +/* Static declaration for handerton */ +static handler *federated_create_handler(TABLE_SHARE *table, + MEM_ROOT *mem_root); +static int federated_commit(THD *thd, bool all); +static int federated_rollback(THD *thd, bool all); + +/* Federated storage engine handlerton */ + +handlerton federated_hton; + +static handler *federated_create_handler(TABLE_SHARE *table, + MEM_ROOT *mem_root) +{ + return new (mem_root) ha_federated(table); +} + + +/* Function we use in the creation of our hash to get key */ + +static byte *federated_get_key(FEDERATED_SHARE *share, uint *length, + my_bool not_used __attribute__ ((unused))) +{ + *length= share->connect_string_length; + return (byte*) share->scheme; +} + +/* + Initialize the federated handler. + + SYNOPSIS + federated_db_init() + void + + RETURN + FALSE OK + TRUE Error +*/ + +int federated_db_init() +{ + DBUG_ENTER("federated_db_init"); + + federated_hton.state= SHOW_OPTION_YES; + federated_hton.db_type= DB_TYPE_FEDERATED_DB; + federated_hton.commit= federated_commit; + federated_hton.rollback= federated_rollback; + federated_hton.create= federated_create_handler; + federated_hton.panic= federated_db_end; + federated_hton.flags= HTON_ALTER_NOT_SUPPORTED; + + if (pthread_mutex_init(&federated_mutex, MY_MUTEX_INIT_FAST)) + goto error; + if (!hash_init(&federated_open_tables, &my_charset_bin, 32, 0, 0, + (hash_get_key) federated_get_key, 0, 0)) + { + federated_init= TRUE; + DBUG_RETURN(FALSE); + } + + VOID(pthread_mutex_destroy(&federated_mutex)); +error: + have_federated_db= SHOW_OPTION_DISABLED; // If we couldn't use handler + DBUG_RETURN(TRUE); +} + + +/* + Release the federated handler. + + SYNOPSIS + federated_db_end() + + RETURN + FALSE OK +*/ + +int federated_db_end(ha_panic_function type) +{ + if (federated_init) + { + hash_free(&federated_open_tables); + VOID(pthread_mutex_destroy(&federated_mutex)); + } + federated_init= 0; + return 0; +} + + +/* + Check (in create) whether the tables exists, and that it can be connected to + + SYNOPSIS + check_foreign_data_source() + share pointer to FEDERATED share + table_create_flag tells us that ::create is the caller, + therefore, return CANT_CREATE_FEDERATED_TABLE + + DESCRIPTION + This method first checks that the connection information that parse url + has populated into the share will be sufficient to connect to the foreign + table, and if so, does the foreign table exist. +*/ + +static int check_foreign_data_source(FEDERATED_SHARE *share, + bool table_create_flag) +{ + char escaped_table_name[NAME_LEN*2]; + char query_buffer[FEDERATED_QUERY_BUFFER_SIZE]; + char error_buffer[FEDERATED_QUERY_BUFFER_SIZE]; + uint error_code; + String query(query_buffer, sizeof(query_buffer), &my_charset_bin); + MYSQL *mysql; + DBUG_ENTER("ha_federated::check_foreign_data_source"); + + /* Zero the length, otherwise the string will have misc chars */ + query.length(0); + + /* error out if we can't alloc memory for mysql_init(NULL) (per Georg) */ + if (!(mysql= mysql_init(NULL))) + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + /* check if we can connect */ + if (!mysql_real_connect(mysql, + share->hostname, + share->username, + share->password, + share->database, + share->port, + share->socket, 0)) + { + /* + we want the correct error message, but it to return + ER_CANT_CREATE_FEDERATED_TABLE if called by ::create + */ + error_code= (table_create_flag ? + ER_CANT_CREATE_FEDERATED_TABLE : + ER_CONNECT_TO_FOREIGN_DATA_SOURCE); + + my_sprintf(error_buffer, + (error_buffer, + "database: '%s' username: '%s' hostname: '%s'", + share->database, share->username, share->hostname)); + + my_error(ER_CONNECT_TO_FOREIGN_DATA_SOURCE, MYF(0), error_buffer); + goto error; + } + else + { + int escaped_table_name_length= 0; + /* + Since we do not support transactions at this version, we can let the + client API silently reconnect. For future versions, we will need more + logic to deal with transactions + */ + mysql->reconnect= 1; + /* + Note: I am not using INORMATION_SCHEMA because this needs to work with + versions prior to 5.0 + + if we can connect, then make sure the table exists + + the query will be: SELECT * FROM `tablename` WHERE 1=0 + */ + query.append(STRING_WITH_LEN("SELECT * FROM `")); + escaped_table_name_length= + escape_string_for_mysql(&my_charset_bin, (char*)escaped_table_name, + sizeof(escaped_table_name), + share->table_name, + share->table_name_length); + query.append(escaped_table_name, escaped_table_name_length); + query.append(STRING_WITH_LEN("` WHERE 1=0")); + + if (mysql_real_query(mysql, query.ptr(), query.length())) + { + error_code= table_create_flag ? + ER_CANT_CREATE_FEDERATED_TABLE : ER_FOREIGN_DATA_SOURCE_DOESNT_EXIST; + my_sprintf(error_buffer, (error_buffer, "error: %d '%s'", + mysql_errno(mysql), mysql_error(mysql))); + + my_error(error_code, MYF(0), error_buffer); + goto error; + } + } + error_code=0; + +error: + mysql_close(mysql); + DBUG_RETURN(error_code); +} + + +static int parse_url_error(FEDERATED_SHARE *share, TABLE *table, int error_num) +{ + char buf[FEDERATED_QUERY_BUFFER_SIZE]; + int buf_len; + DBUG_ENTER("ha_federated parse_url_error"); + + if (share->scheme) + { + DBUG_PRINT("info", + ("error: parse_url. Returning error code %d \ + freeing share->scheme %lx", error_num, share->scheme)); + my_free((gptr) share->scheme, MYF(0)); + share->scheme= 0; + } + buf_len= min(table->s->connect_string.length, + FEDERATED_QUERY_BUFFER_SIZE-1); + strmake(buf, table->s->connect_string.str, buf_len); + my_error(error_num, MYF(0), buf); + DBUG_RETURN(error_num); +} + +/* + Parse connection info from table->s->connect_string + + SYNOPSIS + parse_url() + share pointer to FEDERATED share + table pointer to current TABLE class + table_create_flag determines what error to throw + + DESCRIPTION + Populates the share with information about the connection + to the foreign database that will serve as the data source. + This string must be specified (currently) in the "comment" field, + listed in the CREATE TABLE statement. + + This string MUST be in the format of any of these: + + scheme://username:password@hostname:port/database/table + scheme://username@hostname/database/table + scheme://username@hostname:port/database/table + scheme://username:password@hostname/database/table + + An Example: + + mysql://joe:joespass@192.168.1.111:9308/federated/testtable + + ***IMPORTANT*** + Currently, only "mysql://" is supported. + + 'password' and 'port' are both optional. + + RETURN VALUE + 0 success + error_num particular error code + +*/ + +static int parse_url(FEDERATED_SHARE *share, TABLE *table, + uint table_create_flag) +{ + uint error_num= (table_create_flag ? + ER_FOREIGN_DATA_STRING_INVALID_CANT_CREATE : + ER_FOREIGN_DATA_STRING_INVALID); + DBUG_ENTER("ha_federated::parse_url"); + + share->port= 0; + share->socket= 0; + DBUG_PRINT("info", ("Length: %d", table->s->connect_string.length)); + DBUG_PRINT("info", ("String: '%.*s'", table->s->connect_string.length, + table->s->connect_string.str)); + share->scheme= my_strndup(table->s->connect_string.str, + table->s->connect_string.length, + MYF(0)); + + share->connect_string_length= table->s->connect_string.length; + DBUG_PRINT("info",("parse_url alloced share->scheme %lx", share->scheme)); + + /* + remove addition of null terminator and store length + for each string in share + */ + if (!(share->username= strstr(share->scheme, "://"))) + goto error; + share->scheme[share->username - share->scheme]= '\0'; + + if (strcmp(share->scheme, "mysql") != 0) + goto error; + + share->username+= 3; + + if (!(share->hostname= strchr(share->username, '@'))) + goto error; + + share->username[share->hostname - share->username]= '\0'; + share->hostname++; + + if ((share->password= strchr(share->username, ':'))) + { + share->username[share->password - share->username]= '\0'; + share->password++; + share->username= share->username; + /* make sure there isn't an extra / or @ */ + if ((strchr(share->password, '/') || strchr(share->hostname, '@'))) + goto error; + /* + Found that if the string is: + user:@hostname:port/database/table + Then password is a null string, so set to NULL + */ + if ((share->password[0] == '\0')) + share->password= NULL; + } + else + share->username= share->username; + + /* make sure there isn't an extra / or @ */ + if ((strchr(share->username, '/')) || (strchr(share->hostname, '@'))) + goto error; + + if (!(share->database= strchr(share->hostname, '/'))) + goto error; + share->hostname[share->database - share->hostname]= '\0'; + share->database++; + + if ((share->sport= strchr(share->hostname, ':'))) + { + share->hostname[share->sport - share->hostname]= '\0'; + share->sport++; + if (share->sport[0] == '\0') + share->sport= NULL; + else + share->port= atoi(share->sport); + } + + if (!(share->table_name= strchr(share->database, '/'))) + goto error; + share->database[share->table_name - share->database]= '\0'; + share->table_name++; + + share->table_name_length= strlen(share->table_name); + + /* make sure there's not an extra / */ + if ((strchr(share->table_name, '/'))) + goto error; + + if (share->hostname[0] == '\0') + share->hostname= NULL; + + if (!share->port) + { + if (strcmp(share->hostname, my_localhost) == 0) + share->socket= my_strdup(MYSQL_UNIX_ADDR, MYF(0)); + else + share->port= MYSQL_PORT; + } + + DBUG_PRINT("info", + ("scheme: %s username: %s password: %s \ + hostname: %s port: %d database: %s tablename: %s", + share->scheme, share->username, share->password, + share->hostname, share->port, share->database, + share->table_name)); + + DBUG_RETURN(0); + +error: + DBUG_RETURN(parse_url_error(share, table, error_num)); +} + + +/***************************************************************************** +** FEDERATED tables +*****************************************************************************/ + +ha_federated::ha_federated(TABLE_SHARE *table_arg) + :handler(&federated_hton, table_arg), + mysql(0), stored_result(0) +{ + trx_next= 0; +} + + +/* + Convert MySQL result set row to handler internal format + + SYNOPSIS + convert_row_to_internal_format() + record Byte pointer to record + row MySQL result set row from fetchrow() + result Result set to use + + DESCRIPTION + This method simply iterates through a row returned via fetchrow with + values from a successful SELECT , and then stores each column's value + in the field object via the field object pointer (pointing to the table's + array of field object pointers). This is how the handler needs the data + to be stored to then return results back to the user + + RETURN VALUE + 0 After fields have had field values stored from record +*/ + +uint ha_federated::convert_row_to_internal_format(byte *record, + MYSQL_ROW row, + MYSQL_RES *result) +{ + ulong *lengths; + Field **field; + my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->write_set); + DBUG_ENTER("ha_federated::convert_row_to_internal_format"); + + lengths= mysql_fetch_lengths(result); + + for (field= table->field; *field; field++, row++, lengths++) + { + /* + index variable to move us through the row at the + same iterative step as the field + */ + my_ptrdiff_t old_ptr; + old_ptr= (my_ptrdiff_t) (record - table->record[0]); + (*field)->move_field_offset(old_ptr); + if (!*row) + (*field)->set_null(); + else + { + if (bitmap_is_set(table->read_set, (*field)->field_index)) + { + (*field)->set_notnull(); + (*field)->store(*row, *lengths, &my_charset_bin); + } + } + (*field)->move_field_offset(-old_ptr); + } + dbug_tmp_restore_column_map(table->write_set, old_map); + DBUG_RETURN(0); +} + +static bool emit_key_part_name(String *to, KEY_PART_INFO *part) +{ + DBUG_ENTER("emit_key_part_name"); + if (to->append(STRING_WITH_LEN("`")) || + to->append(part->field->field_name) || + to->append(STRING_WITH_LEN("`"))) + DBUG_RETURN(1); // Out of memory + DBUG_RETURN(0); +} + +static bool emit_key_part_element(String *to, KEY_PART_INFO *part, + bool needs_quotes, bool is_like, + const byte *ptr, uint len) +{ + Field *field= part->field; + DBUG_ENTER("emit_key_part_element"); + + if (needs_quotes && to->append(STRING_WITH_LEN("'"))) + DBUG_RETURN(1); + + if (part->type == HA_KEYTYPE_BIT) + { + char buff[STRING_BUFFER_USUAL_SIZE], *buf= buff; + + *buf++= '0'; + *buf++= 'x'; + buf= octet2hex(buf, (char*) ptr, len); + if (to->append((char*) buff, (uint)(buf - buff))) + DBUG_RETURN(1); + } + else if (part->key_part_flag & HA_BLOB_PART) + { + String blob; + uint blob_length= uint2korr(ptr); + blob.set_quick((char*) ptr+HA_KEY_BLOB_LENGTH, + blob_length, &my_charset_bin); + if (append_escaped(to, &blob)) + DBUG_RETURN(1); + } + else if (part->key_part_flag & HA_VAR_LENGTH_PART) + { + String varchar; + uint var_length= uint2korr(ptr); + varchar.set_quick((char*) ptr+HA_KEY_BLOB_LENGTH, + var_length, &my_charset_bin); + if (append_escaped(to, &varchar)) + DBUG_RETURN(1); + } + else + { + char strbuff[MAX_FIELD_WIDTH]; + String str(strbuff, sizeof(strbuff), part->field->charset()), *res; + + res= field->val_str(&str, (char *)ptr); + + if (field->result_type() == STRING_RESULT) + { + if (append_escaped(to, res)) + DBUG_RETURN(1); + } + else if (to->append(res->ptr(), res->length())) + DBUG_RETURN(1); + } + + if (is_like && to->append(STRING_WITH_LEN("%"))) + DBUG_RETURN(1); + + if (needs_quotes && to->append(STRING_WITH_LEN("'"))) + DBUG_RETURN(1); + + DBUG_RETURN(0); +} + +/* + Create a WHERE clause based off of values in keys + Note: This code was inspired by key_copy from key.cc + + SYNOPSIS + create_where_from_key () + to String object to store WHERE clause + key_info KEY struct pointer + key byte pointer containing key + key_length length of key + range_type 0 - no range, 1 - min range, 2 - max range + (see enum range_operation) + + DESCRIPTION + Using iteration through all the keys via a KEY_PART_INFO pointer, + This method 'extracts' the value of each key in the byte pointer + *key, and for each key found, constructs an appropriate WHERE clause + + RETURN VALUE + 0 After all keys have been accounted for to create the WHERE clause + 1 No keys found + + Range flags Table per Timour: + + ----------------- + - start_key: + * ">" -> HA_READ_AFTER_KEY + * ">=" -> HA_READ_KEY_OR_NEXT + * "=" -> HA_READ_KEY_EXACT + + - end_key: + * "<" -> HA_READ_BEFORE_KEY + * "<=" -> HA_READ_AFTER_KEY + + records_in_range: + ----------------- + - start_key: + * ">" -> HA_READ_AFTER_KEY + * ">=" -> HA_READ_KEY_EXACT + * "=" -> HA_READ_KEY_EXACT + + - end_key: + * "<" -> HA_READ_BEFORE_KEY + * "<=" -> HA_READ_AFTER_KEY + * "=" -> HA_READ_AFTER_KEY + +0 HA_READ_KEY_EXACT, Find first record else error +1 HA_READ_KEY_OR_NEXT, Record or next record +2 HA_READ_KEY_OR_PREV, Record or previous +3 HA_READ_AFTER_KEY, Find next rec. after key-record +4 HA_READ_BEFORE_KEY, Find next rec. before key-record +5 HA_READ_PREFIX, Key which as same prefix +6 HA_READ_PREFIX_LAST, Last key with the same prefix +7 HA_READ_PREFIX_LAST_OR_PREV, Last or prev key with the same prefix + +Flags that I've found: + +id, primary key, varchar + +id = 'ccccc' +records_in_range: start_key 0 end_key 3 +read_range_first: start_key 0 end_key NULL + +id > 'ccccc' +records_in_range: start_key 3 end_key NULL +read_range_first: start_key 3 end_key NULL + +id < 'ccccc' +records_in_range: start_key NULL end_key 4 +read_range_first: start_key NULL end_key 4 + +id <= 'ccccc' +records_in_range: start_key NULL end_key 3 +read_range_first: start_key NULL end_key 3 + +id >= 'ccccc' +records_in_range: start_key 0 end_key NULL +read_range_first: start_key 1 end_key NULL + +id like 'cc%cc' +records_in_range: start_key 0 end_key 3 +read_range_first: start_key 1 end_key 3 + +id > 'aaaaa' and id < 'ccccc' +records_in_range: start_key 3 end_key 4 +read_range_first: start_key 3 end_key 4 + +id >= 'aaaaa' and id < 'ccccc'; +records_in_range: start_key 0 end_key 4 +read_range_first: start_key 1 end_key 4 + +id >= 'aaaaa' and id <= 'ccccc'; +records_in_range: start_key 0 end_key 3 +read_range_first: start_key 1 end_key 3 + +id > 'aaaaa' and id <= 'ccccc'; +records_in_range: start_key 3 end_key 3 +read_range_first: start_key 3 end_key 3 + +numeric keys: + +id = 4 +index_read_idx: start_key 0 end_key NULL + +id > 4 +records_in_range: start_key 3 end_key NULL +read_range_first: start_key 3 end_key NULL + +id >= 4 +records_in_range: start_key 0 end_key NULL +read_range_first: start_key 1 end_key NULL + +id < 4 +records_in_range: start_key NULL end_key 4 +read_range_first: start_key NULL end_key 4 + +id <= 4 +records_in_range: start_key NULL end_key 3 +read_range_first: start_key NULL end_key 3 + +id like 4 +full table scan, select * from + +id > 2 and id < 8 +records_in_range: start_key 3 end_key 4 +read_range_first: start_key 3 end_key 4 + +id >= 2 and id < 8 +records_in_range: start_key 0 end_key 4 +read_range_first: start_key 1 end_key 4 + +id >= 2 and id <= 8 +records_in_range: start_key 0 end_key 3 +read_range_first: start_key 1 end_key 3 + +id > 2 and id <= 8 +records_in_range: start_key 3 end_key 3 +read_range_first: start_key 3 end_key 3 + +multi keys (id int, name varchar, other varchar) + +id = 1; +records_in_range: start_key 0 end_key 3 +read_range_first: start_key 0 end_key NULL + +id > 4; +id > 2 and name = '333'; remote: id > 2 +id > 2 and name > '333'; remote: id > 2 +id > 2 and name > '333' and other < 'ddd'; remote: id > 2 no results +id > 2 and name >= '333' and other < 'ddd'; remote: id > 2 1 result +id >= 4 and name = 'eric was here' and other > 'eeee'; +records_in_range: start_key 3 end_key NULL +read_range_first: start_key 3 end_key NULL + +id >= 4; +id >= 2 and name = '333' and other < 'ddd'; +remote: `id` >= 2 AND `name` >= '333'; +records_in_range: start_key 0 end_key NULL +read_range_first: start_key 1 end_key NULL + +id < 4; +id < 3 and name = '222' and other <= 'ccc'; remote: id < 3 +records_in_range: start_key NULL end_key 4 +read_range_first: start_key NULL end_key 4 + +id <= 4; +records_in_range: start_key NULL end_key 3 +read_range_first: start_key NULL end_key 3 + +id like 4; +full table scan + +id > 2 and id < 4; +records_in_range: start_key 3 end_key 4 +read_range_first: start_key 3 end_key 4 + +id >= 2 and id < 4; +records_in_range: start_key 0 end_key 4 +read_range_first: start_key 1 end_key 4 + +id >= 2 and id <= 4; +records_in_range: start_key 0 end_key 3 +read_range_first: start_key 1 end_key 3 + +id > 2 and id <= 4; +id = 6 and name = 'eric was here' and other > 'eeee'; +remote: (`id` > 6 AND `name` > 'eric was here' AND `other` > 'eeee') +AND (`id` <= 6) AND ( AND `name` <= 'eric was here') +no results +records_in_range: start_key 3 end_key 3 +read_range_first: start_key 3 end_key 3 + +Summary: + +* If the start key flag is 0 the max key flag shouldn't even be set, + and if it is, the query produced would be invalid. +* Multipart keys, even if containing some or all numeric columns, + are treated the same as non-numeric keys + + If the query is " = " (quotes or not): + - records in range start key flag HA_READ_KEY_EXACT, + end key flag HA_READ_AFTER_KEY (incorrect) + - any other: start key flag HA_READ_KEY_OR_NEXT, + end key flag HA_READ_AFTER_KEY (correct) + +* 'like' queries (of key) + - Numeric, full table scan + - Non-numeric + records_in_range: start_key 0 end_key 3 + other : start_key 1 end_key 3 + +* If the key flag is HA_READ_AFTER_KEY: + if start_key, append > + if end_key, append <= + +* If create_where_key was called by records_in_range: + + - if the key is numeric: + start key flag is 0 when end key is NULL, end key flag is 3 or 4 + - if create_where_key was called by any other function: + start key flag is 1 when end key is NULL, end key flag is 3 or 4 + - if the key is non-numeric, or multipart + When the query is an exact match, the start key flag is 0, + end key flag is 3 for what should be a no-range condition where + you should have 0 and max key NULL, which it is if called by + read_range_first + +Conclusion: + +1. Need logic to determin if a key is min or max when the flag is +HA_READ_AFTER_KEY, and handle appending correct operator accordingly + +2. Need a boolean flag to pass to create_where_from_key, used in the +switch statement. Add 1 to the flag if: + - start key flag is HA_READ_KEY_EXACT and the end key is NULL + +*/ + +bool ha_federated::create_where_from_key(String *to, + KEY *key_info, + const key_range *start_key, + const key_range *end_key, + bool records_in_range, + bool eq_range) +{ + bool both_not_null= + (start_key != NULL && end_key != NULL) ? TRUE : FALSE; + const byte *ptr; + uint remainder, length; + char tmpbuff[FEDERATED_QUERY_BUFFER_SIZE]; + String tmp(tmpbuff, sizeof(tmpbuff), system_charset_info); + const key_range *ranges[2]= { start_key, end_key }; + my_bitmap_map *old_map; + DBUG_ENTER("ha_federated::create_where_from_key"); + + tmp.length(0); + if (start_key == NULL && end_key == NULL) + DBUG_RETURN(1); + + old_map= dbug_tmp_use_all_columns(table, table->write_set); + for (uint i= 0; i <= 1; i++) + { + bool needs_quotes; + KEY_PART_INFO *key_part; + if (ranges[i] == NULL) + continue; + + if (both_not_null) + { + if (i > 0) + tmp.append(STRING_WITH_LEN(") AND (")); + else + tmp.append(STRING_WITH_LEN(" (")); + } + + for (key_part= key_info->key_part, + remainder= key_info->key_parts, + length= ranges[i]->length, + ptr= ranges[i]->key; ; + remainder--, + key_part++) + { + Field *field= key_part->field; + uint store_length= key_part->store_length; + uint part_length= min(store_length, length); + needs_quotes= field->str_needs_quotes(); + DBUG_DUMP("key, start of loop", (char *) ptr, length); + + if (key_part->null_bit) + { + if (*ptr++) + { + if (emit_key_part_name(&tmp, key_part) || + tmp.append(STRING_WITH_LEN(" IS NULL "))) + goto err; + continue; + } + } + + if (tmp.append(STRING_WITH_LEN(" ("))) + goto err; + + switch (ranges[i]->flag) { + case HA_READ_KEY_EXACT: + DBUG_PRINT("info", ("federated HA_READ_KEY_EXACT %d", i)); + if (store_length >= length || + !needs_quotes || + key_part->type == HA_KEYTYPE_BIT || + field->result_type() != STRING_RESULT) + { + if (emit_key_part_name(&tmp, key_part)) + goto err; + + if (records_in_range) + { + if (tmp.append(STRING_WITH_LEN(" >= "))) + goto err; + } + else + { + if (tmp.append(STRING_WITH_LEN(" = "))) + goto err; + } + + if (emit_key_part_element(&tmp, key_part, needs_quotes, 0, ptr, + part_length)) + goto err; + } + else + { + /* LIKE */ + if (emit_key_part_name(&tmp, key_part) || + tmp.append(STRING_WITH_LEN(" LIKE ")) || + emit_key_part_element(&tmp, key_part, needs_quotes, 1, ptr, + part_length)) + goto err; + } + break; + case HA_READ_AFTER_KEY: + if (eq_range) + { + if (tmp.append("1=1")) // Dummy + goto err; + break; + } + DBUG_PRINT("info", ("federated HA_READ_AFTER_KEY %d", i)); + if (store_length >= length) /* end key */ + { + if (emit_key_part_name(&tmp, key_part)) + goto err; + + if (i > 0) /* end key */ + { + if (tmp.append(STRING_WITH_LEN(" <= "))) + goto err; + } + else /* start key */ + { + if (tmp.append(STRING_WITH_LEN(" > "))) + goto err; + } + + if (emit_key_part_element(&tmp, key_part, needs_quotes, 0, ptr, + part_length)) + { + goto err; + } + break; + } + case HA_READ_KEY_OR_NEXT: + DBUG_PRINT("info", ("federated HA_READ_KEY_OR_NEXT %d", i)); + if (emit_key_part_name(&tmp, key_part) || + tmp.append(STRING_WITH_LEN(" >= ")) || + emit_key_part_element(&tmp, key_part, needs_quotes, 0, ptr, + part_length)) + goto err; + break; + case HA_READ_BEFORE_KEY: + DBUG_PRINT("info", ("federated HA_READ_BEFORE_KEY %d", i)); + if (store_length >= length) + { + if (emit_key_part_name(&tmp, key_part) || + tmp.append(STRING_WITH_LEN(" < ")) || + emit_key_part_element(&tmp, key_part, needs_quotes, 0, ptr, + part_length)) + goto err; + break; + } + case HA_READ_KEY_OR_PREV: + DBUG_PRINT("info", ("federated HA_READ_KEY_OR_PREV %d", i)); + if (emit_key_part_name(&tmp, key_part) || + tmp.append(STRING_WITH_LEN(" <= ")) || + emit_key_part_element(&tmp, key_part, needs_quotes, 0, ptr, + part_length)) + goto err; + break; + default: + DBUG_PRINT("info",("cannot handle flag %d", ranges[i]->flag)); + goto err; + } + if (tmp.append(STRING_WITH_LEN(") "))) + goto err; + +next_loop: + if (store_length >= length) + break; + DBUG_PRINT("info", ("remainder %d", remainder)); + DBUG_ASSERT(remainder > 1); + length-= store_length; + ptr+= store_length; + if (tmp.append(STRING_WITH_LEN(" AND "))) + goto err; + + DBUG_PRINT("info", + ("create_where_from_key WHERE clause: %s", + tmp.c_ptr_quick())); + } + } + dbug_tmp_restore_column_map(table->write_set, old_map); + + if (both_not_null) + if (tmp.append(STRING_WITH_LEN(") "))) + DBUG_RETURN(1); + + if (to->append(STRING_WITH_LEN(" WHERE "))) + DBUG_RETURN(1); + + if (to->append(tmp)) + DBUG_RETURN(1); + + DBUG_RETURN(0); + +err: + dbug_tmp_restore_column_map(table->write_set, old_map); + DBUG_RETURN(1); +} + +/* + Example of simple lock controls. The "share" it creates is structure we will + pass to each federated handler. Do you have to have one of these? Well, you + have pieces that are used for locking, and they are needed to function. +*/ + +static FEDERATED_SHARE *get_share(const char *table_name, TABLE *table) +{ + char *select_query; + char query_buffer[FEDERATED_QUERY_BUFFER_SIZE]; + Field **field; + String query(query_buffer, sizeof(query_buffer), &my_charset_bin); + FEDERATED_SHARE *share= NULL, tmp_share; + /* + In order to use this string, we must first zero it's length, + or it will contain garbage + */ + query.length(0); + + pthread_mutex_lock(&federated_mutex); + + if (parse_url(&tmp_share, table, 0)) + goto error; + + /* TODO: change tmp_share.scheme to LEX_STRING object */ + if (!(share= (FEDERATED_SHARE *) hash_search(&federated_open_tables, + (byte*) tmp_share.scheme, + tmp_share. + connect_string_length))) + { + query.set_charset(system_charset_info); + query.append(STRING_WITH_LEN("SELECT ")); + for (field= table->field; *field; field++) + { + query.append(STRING_WITH_LEN("`")); + query.append((*field)->field_name); + query.append(STRING_WITH_LEN("`, ")); + } + /* chops off trailing comma */ + query.length(query.length() - sizeof_trailing_comma); + + query.append(STRING_WITH_LEN(" FROM `")); + + if (!(share= (FEDERATED_SHARE *) + my_multi_malloc(MYF(MY_WME), + &share, sizeof(*share), + &select_query, + query.length()+table->s->connect_string.length+1, + NullS))) + goto error; + + memcpy(share, &tmp_share, sizeof(tmp_share)); + + share->table_name_length= strlen(share->table_name); + /* TODO: share->table_name to LEX_STRING object */ + query.append(share->table_name, share->table_name_length); + query.append(STRING_WITH_LEN("`")); + share->select_query= select_query; + strmov(share->select_query, query.ptr()); + share->use_count= 0; + DBUG_PRINT("info", + ("share->select_query %s", share->select_query)); + + if (my_hash_insert(&federated_open_tables, (byte*) share)) + goto error; + thr_lock_init(&share->lock); + pthread_mutex_init(&share->mutex, MY_MUTEX_INIT_FAST); + } + share->use_count++; + pthread_mutex_unlock(&federated_mutex); + + return share; + +error: + pthread_mutex_unlock(&federated_mutex); + my_free((gptr) tmp_share.scheme, MYF(MY_ALLOW_ZERO_PTR)); + my_free((gptr) share, MYF(MY_ALLOW_ZERO_PTR)); + return NULL; +} + + +/* + Free lock controls. We call this whenever we close a table. + If the table had the last reference to the share then we + free memory associated with it. +*/ + +static int free_share(FEDERATED_SHARE *share) +{ + DBUG_ENTER("free_share"); + + pthread_mutex_lock(&federated_mutex); + if (!--share->use_count) + { + hash_delete(&federated_open_tables, (byte*) share); + my_free((gptr) share->scheme, MYF(MY_ALLOW_ZERO_PTR)); + my_free((gptr) share->socket, MYF(MY_ALLOW_ZERO_PTR)); + thr_lock_delete(&share->lock); + VOID(pthread_mutex_destroy(&share->mutex)); + my_free((gptr) share, MYF(0)); + } + pthread_mutex_unlock(&federated_mutex); + + DBUG_RETURN(0); +} + + +ha_rows ha_federated::records_in_range(uint inx, key_range *start_key, + key_range *end_key) +{ + /* + + We really want indexes to be used as often as possible, therefore + we just need to hard-code the return value to a very low number to + force the issue + +*/ + DBUG_ENTER("ha_federated::records_in_range"); + DBUG_RETURN(FEDERATED_RECORDS_IN_RANGE); +} +/* + If frm_error() is called then we will use this to to find out + what file extentions exist for the storage engine. This is + also used by the default rename_table and delete_table method + in handler.cc. +*/ + +const char **ha_federated::bas_ext() const +{ + static const char *ext[]= + { + NullS + }; + return ext; +} + + +/* + Used for opening tables. The name will be the name of the file. + A table is opened when it needs to be opened. For instance + when a request comes in for a select on the table (tables are not + open and closed for each request, they are cached). + + Called from handler.cc by handler::ha_open(). The server opens + all tables by calling ha_open() which then calls the handler + specific open(). +*/ + +int ha_federated::open(const char *name, int mode, uint test_if_locked) +{ + DBUG_ENTER("ha_federated::open"); + + if (!(share= get_share(name, table))) + DBUG_RETURN(1); + thr_lock_data_init(&share->lock, &lock, NULL); + + /* Connect to foreign database mysql_real_connect() */ + mysql= mysql_init(0); + if (!mysql || !mysql_real_connect(mysql, + share->hostname, + share->username, + share->password, + share->database, + share->port, + share->socket, 0)) + { + free_share(share); + DBUG_RETURN(stash_remote_error()); + } + /* + Since we do not support transactions at this version, we can let the client + API silently reconnect. For future versions, we will need more logic to + deal with transactions + */ + mysql->reconnect= 1; + + ref_length= (table->s->primary_key != MAX_KEY ? + table->key_info[table->s->primary_key].key_length : + table->s->reclength); + DBUG_PRINT("info", ("ref_length: %u", ref_length)); + + DBUG_RETURN(0); +} + + +/* + Closes a table. We call the free_share() function to free any resources + that we have allocated in the "shared" structure. + + Called from sql_base.cc, sql_select.cc, and table.cc. + In sql_select.cc it is only used to close up temporary tables or during + the process where a temporary table is converted over to being a + myisam table. + For sql_base.cc look at close_data_tables(). +*/ + +int ha_federated::close(void) +{ + int retval; + DBUG_ENTER("ha_federated::close"); + + /* free the result set */ + if (stored_result) + { + mysql_free_result(stored_result); + stored_result= 0; + } + /* Disconnect from mysql */ + if (mysql) // QQ is this really needed + mysql_close(mysql); + retval= free_share(share); + DBUG_RETURN(retval); + +} + +/* + + Checks if a field in a record is SQL NULL. + + SYNOPSIS + field_in_record_is_null() + table TABLE pointer, MySQL table object + field Field pointer, MySQL field object + record char pointer, contains record + + DESCRIPTION + This method uses the record format information in table to track + the null bit in record. + + RETURN VALUE + 1 if NULL + 0 otherwise +*/ + +inline uint field_in_record_is_null(TABLE *table, + Field *field, + char *record) +{ + int null_offset; + DBUG_ENTER("ha_federated::field_in_record_is_null"); + + if (!field->null_ptr) + DBUG_RETURN(0); + + null_offset= (uint) ((char*)field->null_ptr - (char*)table->record[0]); + + if (record[null_offset] & field->null_bit) + DBUG_RETURN(1); + + DBUG_RETURN(0); +} + + +/* + write_row() inserts a row. No extra() hint is given currently if a bulk load + is happeneding. buf() is a byte array of data. You can use the field + information to extract the data from the native byte array type. + Example of this would be: + for (Field **field=table->field ; *field ; field++) + { + ... + } + + Called from item_sum.cc, item_sum.cc, sql_acl.cc, sql_insert.cc, + sql_insert.cc, sql_select.cc, sql_table.cc, sql_udf.cc, and sql_update.cc. +*/ + +int ha_federated::write_row(byte *buf) +{ + /* + I need a bool again, in 5.0, I used table->s->fields to accomplish this. + This worked as a flag that says there are fields with values or not. + In 5.1, this value doesn't work the same, and I end up with the code + truncating open parenthesis: + + the statement "INSERT INTO t1 VALUES ()" ends up being first built + in two strings + "INSERT INTO t1 (" + and + " VALUES (" + + If there are fields with values, they get appended, with commas, and + the last loop, a trailing comma is there + + "INSERT INTO t1 ( col1, col2, colN, " + + " VALUES ( 'val1', 'val2', 'valN', " + + Then, if there are fields, it should decrement the string by ", " length. + + "INSERT INTO t1 ( col1, col2, colN" + " VALUES ( 'val1', 'val2', 'valN'" + + Then it adds a close paren to both - if there are fields + + "INSERT INTO t1 ( col1, col2, colN)" + " VALUES ( 'val1', 'val2', 'valN')" + + Then appends both together + "INSERT INTO t1 ( col1, col2, colN) VALUES ( 'val1', 'val2', 'valN')" + + So... the problem, is if you have the original statement: + + "INSERT INTO t1 VALUES ()" + + Which is legitimate, but if the code thinks there are fields + + "INSERT INTO t1 (" + " VALUES ( " + + If the field flag is set, but there are no commas, reduces the + string by strlen(", ") + + "INSERT INTO t1 " + " VALUES " + + Then adds the close parenthesis + + "INSERT INTO t1 )" + " VALUES )" + + So, I have to use a bool as before, set in the loop where fields and commas + are appended to the string + */ + my_bool commas_added= FALSE; + char insert_buffer[FEDERATED_QUERY_BUFFER_SIZE]; + char values_buffer[FEDERATED_QUERY_BUFFER_SIZE]; + char insert_field_value_buffer[STRING_BUFFER_USUAL_SIZE]; + Field **field; + + /* The main insert query string */ + String insert_string(insert_buffer, sizeof(insert_buffer), &my_charset_bin); + /* The string containing the values to be added to the insert */ + String values_string(values_buffer, sizeof(values_buffer), &my_charset_bin); + /* The actual value of the field, to be added to the values_string */ + String insert_field_value_string(insert_field_value_buffer, + sizeof(insert_field_value_buffer), + &my_charset_bin); + my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->read_set); + DBUG_ENTER("ha_federated::write_row"); + + values_string.length(0); + insert_string.length(0); + insert_field_value_string.length(0); + statistic_increment(table->in_use->status_var.ha_write_count, &LOCK_status); + if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT) + table->timestamp_field->set_time(); + + /* + start both our field and field values strings + */ + insert_string.append(STRING_WITH_LEN("INSERT INTO `")); + insert_string.append(share->table_name, share->table_name_length); + insert_string.append('`'); + insert_string.append(STRING_WITH_LEN(" (")); + + values_string.append(STRING_WITH_LEN(" VALUES ")); + values_string.append(STRING_WITH_LEN(" (")); + + /* + loop through the field pointer array, add any fields to both the values + list and the fields list that is part of the write set + */ + for (field= table->field; *field; field++) + { + if (bitmap_is_set(table->write_set, (*field)->field_index)) + { + commas_added= TRUE; + if ((*field)->is_null()) + values_string.append(STRING_WITH_LEN(" NULL ")); + else + { + bool needs_quote= (*field)->str_needs_quotes(); + (*field)->val_str(&insert_field_value_string); + if (needs_quote) + values_string.append('\''); + insert_field_value_string.print(&values_string); + if (needs_quote) + values_string.append('\''); + + insert_field_value_string.length(0); + } + /* append the field name */ + insert_string.append((*field)->field_name); + + /* append commas between both fields and fieldnames */ + /* + unfortunately, we can't use the logic if *(fields + 1) to + make the following appends conditional as we don't know if the + next field is in the write set + */ + insert_string.append(STRING_WITH_LEN(", ")); + values_string.append(STRING_WITH_LEN(", ")); + } + } + dbug_tmp_restore_column_map(table->read_set, old_map); + + /* + if there were no fields, we don't want to add a closing paren + AND, we don't want to chop off the last char '(' + insert will be "INSERT INTO t1 VALUES ();" + */ + if (commas_added) + { + insert_string.length(insert_string.length() - sizeof_trailing_comma); + /* chops off leading commas */ + values_string.length(values_string.length() - sizeof_trailing_comma); + insert_string.append(STRING_WITH_LEN(") ")); + } + else + { + /* chops off trailing ) */ + insert_string.length(insert_string.length() - sizeof_trailing_closeparen); + } + + /* we always want to append this, even if there aren't any fields */ + values_string.append(STRING_WITH_LEN(") ")); + + /* add the values */ + insert_string.append(values_string); + + if (mysql_real_query(mysql, insert_string.ptr(), insert_string.length())) + { + DBUG_RETURN(stash_remote_error()); + } + /* + If the table we've just written a record to contains an auto_increment + field, then store the last_insert_id() value from the foreign server + */ + if (table->next_number_field) + update_auto_increment(); + + DBUG_RETURN(0); +} + +/* + ha_federated::update_auto_increment + + This method ensures that last_insert_id() works properly. What it simply does + is calls last_insert_id() on the foreign database immediately after insert + (if the table has an auto_increment field) and sets the insert id via + thd->insert_id(ID)). +*/ +void ha_federated::update_auto_increment(void) +{ + THD *thd= current_thd; + DBUG_ENTER("ha_federated::update_auto_increment"); + + thd->first_successful_insert_id_in_cur_stmt= + mysql->last_used_con->insert_id; + DBUG_PRINT("info",("last_insert_id %d", stats.auto_increment_value)); + + DBUG_VOID_RETURN; +} + +int ha_federated::optimize(THD* thd, HA_CHECK_OPT* check_opt) +{ + char query_buffer[STRING_BUFFER_USUAL_SIZE]; + String query(query_buffer, sizeof(query_buffer), &my_charset_bin); + DBUG_ENTER("ha_federated::optimize"); + + query.length(0); + + query.set_charset(system_charset_info); + query.append(STRING_WITH_LEN("OPTIMIZE TABLE `")); + query.append(share->table_name, share->table_name_length); + query.append(STRING_WITH_LEN("`")); + + if (mysql_real_query(mysql, query.ptr(), query.length())) + { + DBUG_RETURN(stash_remote_error()); + } + + DBUG_RETURN(0); +} + + +int ha_federated::repair(THD* thd, HA_CHECK_OPT* check_opt) +{ + char query_buffer[STRING_BUFFER_USUAL_SIZE]; + String query(query_buffer, sizeof(query_buffer), &my_charset_bin); + DBUG_ENTER("ha_federated::repair"); + + query.length(0); + + query.set_charset(system_charset_info); + query.append(STRING_WITH_LEN("REPAIR TABLE `")); + query.append(share->table_name, share->table_name_length); + query.append(STRING_WITH_LEN("`")); + if (check_opt->flags & T_QUICK) + query.append(STRING_WITH_LEN(" QUICK")); + if (check_opt->flags & T_EXTEND) + query.append(STRING_WITH_LEN(" EXTENDED")); + if (check_opt->sql_flags & TT_USEFRM) + query.append(STRING_WITH_LEN(" USE_FRM")); + + if (mysql_real_query(mysql, query.ptr(), query.length())) + { + DBUG_RETURN(stash_remote_error()); + } + + DBUG_RETURN(0); +} + + +/* + Yes, update_row() does what you expect, it updates a row. old_data will have + the previous row record in it, while new_data will have the newest data in + it. + + Keep in mind that the server can do updates based on ordering if an ORDER BY + clause was used. Consecutive ordering is not guaranteed. + Currently new_data will not have an updated auto_increament record, or + and updated timestamp field. You can do these for federated by doing these: + if (table->timestamp_on_update_now) + update_timestamp(new_row+table->timestamp_on_update_now-1); + if (table->next_number_field && record == table->record[0]) + update_auto_increment(); + + Called from sql_select.cc, sql_acl.cc, sql_update.cc, and sql_insert.cc. +*/ + +int ha_federated::update_row(const byte *old_data, byte *new_data) +{ + /* + This used to control how the query was built. If there was a + primary key, the query would be built such that there was a where + clause with only that column as the condition. This is flawed, + because if we have a multi-part primary key, it would only use the + first part! We don't need to do this anyway, because + read_range_first will retrieve the correct record, which is what + is used to build the WHERE clause. We can however use this to + append a LIMIT to the end if there is NOT a primary key. Why do + this? Because we only are updating one record, and LIMIT enforces + this. + */ + bool has_a_primary_key= test(table->s->primary_key != MAX_KEY); + + /* + buffers for following strings + */ + char field_value_buffer[STRING_BUFFER_USUAL_SIZE]; + char update_buffer[FEDERATED_QUERY_BUFFER_SIZE]; + char where_buffer[FEDERATED_QUERY_BUFFER_SIZE]; + + /* Work area for field values */ + String field_value(field_value_buffer, sizeof(field_value_buffer), + &my_charset_bin); + /* stores the update query */ + String update_string(update_buffer, + sizeof(update_buffer), + &my_charset_bin); + /* stores the WHERE clause */ + String where_string(where_buffer, + sizeof(where_buffer), + &my_charset_bin); + DBUG_ENTER("ha_federated::update_row"); + /* + set string lengths to 0 to avoid misc chars in string + */ + field_value.length(0); + update_string.length(0); + where_string.length(0); + + update_string.append(STRING_WITH_LEN("UPDATE `")); + update_string.append(share->table_name); + update_string.append(STRING_WITH_LEN("` SET ")); + + /* + In this loop, we want to match column names to values being inserted + (while building INSERT statement). + + Iterate through table->field (new data) and share->old_field (old_data) + using the same index to create an SQL UPDATE statement. New data is + used to create SET field=value and old data is used to create WHERE + field=oldvalue + */ + + for (Field **field= table->field; *field; field++) + { + if (bitmap_is_set(table->write_set, (*field)->field_index)) + { + update_string.append((*field)->field_name); + update_string.append(STRING_WITH_LEN(" = ")); + + if ((*field)->is_null()) + update_string.append(STRING_WITH_LEN(" NULL ")); + else + { + /* otherwise = */ + my_bitmap_map *old_map= tmp_use_all_columns(table, table->read_set); + bool needs_quote= (*field)->str_needs_quotes(); + (*field)->val_str(&field_value); + if (needs_quote) + update_string.append('\''); + field_value.print(&update_string); + if (needs_quote) + update_string.append('\''); + field_value.length(0); + tmp_restore_column_map(table->read_set, old_map); + } + update_string.append(STRING_WITH_LEN(", ")); + } + + if (bitmap_is_set(table->read_set, (*field)->field_index)) + { + where_string.append((*field)->field_name); + if (field_in_record_is_null(table, *field, (char*) old_data)) + where_string.append(STRING_WITH_LEN(" IS NULL ")); + else + { + bool needs_quote= (*field)->str_needs_quotes(); + where_string.append(STRING_WITH_LEN(" = ")); + (*field)->val_str(&field_value, + (char*) (old_data + (*field)->offset())); + if (needs_quote) + where_string.append('\''); + field_value.print(&where_string); + if (needs_quote) + where_string.append('\''); + field_value.length(0); + } + where_string.append(STRING_WITH_LEN(" AND ")); + } + } + + /* Remove last ', '. This works as there must be at least on updated field */ + update_string.length(update_string.length() - sizeof_trailing_comma); + + if (where_string.length()) + { + /* chop off trailing AND */ + where_string.length(where_string.length() - sizeof_trailing_and); + update_string.append(STRING_WITH_LEN(" WHERE ")); + update_string.append(where_string); + } + + /* + If this table has not a primary key, then we could possibly + update multiple rows. We want to make sure to only update one! + */ + if (!has_a_primary_key) + update_string.append(STRING_WITH_LEN(" LIMIT 1")); + + if (mysql_real_query(mysql, update_string.ptr(), update_string.length())) + { + DBUG_RETURN(stash_remote_error()); + } + DBUG_RETURN(0); +} + +/* + This will delete a row. 'buf' will contain a copy of the row to be =deleted. + The server will call this right after the current row has been called (from + either a previous rnd_next() or index call). + If you keep a pointer to the last row or can access a primary key it will + make doing the deletion quite a bit easier. + Keep in mind that the server does no guarentee consecutive deletions. + ORDER BY clauses can be used. + + Called in sql_acl.cc and sql_udf.cc to manage internal table information. + Called in sql_delete.cc, sql_insert.cc, and sql_select.cc. In sql_select + it is used for removing duplicates while in insert it is used for REPLACE + calls. +*/ + +int ha_federated::delete_row(const byte *buf) +{ + char delete_buffer[FEDERATED_QUERY_BUFFER_SIZE]; + char data_buffer[FEDERATED_QUERY_BUFFER_SIZE]; + String delete_string(delete_buffer, sizeof(delete_buffer), &my_charset_bin); + String data_string(data_buffer, sizeof(data_buffer), &my_charset_bin); + uint found= 0; + DBUG_ENTER("ha_federated::delete_row"); + + delete_string.length(0); + delete_string.append(STRING_WITH_LEN("DELETE FROM `")); + delete_string.append(share->table_name); + delete_string.append(STRING_WITH_LEN("` WHERE ")); + + for (Field **field= table->field; *field; field++) + { + Field *cur_field= *field; + found++; + if (bitmap_is_set(table->read_set, cur_field->field_index)) + { + data_string.length(0); + delete_string.append(cur_field->field_name); + if (cur_field->is_null()) + { + delete_string.append(STRING_WITH_LEN(" IS NULL ")); + } + else + { + bool needs_quote= cur_field->str_needs_quotes(); + delete_string.append(STRING_WITH_LEN(" = ")); + cur_field->val_str(&data_string); + if (needs_quote) + delete_string.append('\''); + data_string.print(&delete_string); + if (needs_quote) + delete_string.append('\''); + } + delete_string.append(STRING_WITH_LEN(" AND ")); + } + } + + // Remove trailing AND + delete_string.length(delete_string.length() - sizeof_trailing_and); + if (!found) + delete_string.length(delete_string.length() - sizeof_trailing_where); + + delete_string.append(STRING_WITH_LEN(" LIMIT 1")); + DBUG_PRINT("info", + ("Delete sql: %s", delete_string.c_ptr_quick())); + if (mysql_real_query(mysql, delete_string.ptr(), delete_string.length())) + { + DBUG_RETURN(stash_remote_error()); + } + stats.deleted+= mysql->affected_rows; + stats.records-= mysql->affected_rows; + DBUG_PRINT("info", + ("rows deleted %d rows deleted for all time %d", + int(mysql->affected_rows), stats.deleted)); + + DBUG_RETURN(0); +} + + +/* + Positions an index cursor to the index specified in the handle. Fetches the + row if available. If the key value is null, begin at the first key of the + index. This method, which is called in the case of an SQL statement having + a WHERE clause on a non-primary key index, simply calls index_read_idx. +*/ + +int ha_federated::index_read(byte *buf, const byte *key, + uint key_len, ha_rkey_function find_flag) +{ + DBUG_ENTER("ha_federated::index_read"); + + if (stored_result) + mysql_free_result(stored_result); + DBUG_RETURN(index_read_idx_with_result_set(buf, active_index, key, + key_len, find_flag, + &stored_result)); +} + + +/* + Positions an index cursor to the index specified in key. Fetches the + row if any. This is only used to read whole keys. + + This method is called via index_read in the case of a WHERE clause using + a primary key index OR is called DIRECTLY when the WHERE clause + uses a PRIMARY KEY index. + + NOTES + This uses an internal result set that is deleted before function + returns. We need to be able to be calable from ha_rnd_pos() +*/ + +int ha_federated::index_read_idx(byte *buf, uint index, const byte *key, + uint key_len, enum ha_rkey_function find_flag) +{ + int retval; + MYSQL_RES *mysql_result; + DBUG_ENTER("ha_federated::index_read_idx"); + + if ((retval= index_read_idx_with_result_set(buf, index, key, + key_len, find_flag, + &mysql_result))) + DBUG_RETURN(retval); + mysql_free_result(mysql_result); + DBUG_RETURN(retval); +} + + +/* + Create result set for rows matching query and return first row + + RESULT + 0 ok In this case *result will contain the result set + table->status == 0 + # error In this case *result will contain 0 + table->status == STATUS_NOT_FOUND +*/ + +int ha_federated::index_read_idx_with_result_set(byte *buf, uint index, + const byte *key, + uint key_len, + ha_rkey_function find_flag, + MYSQL_RES **result) +{ + int retval; + char error_buffer[FEDERATED_QUERY_BUFFER_SIZE]; + char index_value[STRING_BUFFER_USUAL_SIZE]; + char sql_query_buffer[FEDERATED_QUERY_BUFFER_SIZE]; + String index_string(index_value, + sizeof(index_value), + &my_charset_bin); + String sql_query(sql_query_buffer, + sizeof(sql_query_buffer), + &my_charset_bin); + key_range range; + DBUG_ENTER("ha_federated::index_read_idx_with_result_set"); + + *result= 0; // In case of errors + index_string.length(0); + sql_query.length(0); + statistic_increment(table->in_use->status_var.ha_read_key_count, + &LOCK_status); + + sql_query.append(share->select_query); + + range.key= key; + range.length= key_len; + range.flag= find_flag; + create_where_from_key(&index_string, + &table->key_info[index], + &range, + NULL, 0, 0); + sql_query.append(index_string); + + if (mysql_real_query(mysql, sql_query.ptr(), sql_query.length())) + { + my_sprintf(error_buffer, (error_buffer, "error: %d '%s'", + mysql_errno(mysql), mysql_error(mysql))); + retval= ER_QUERY_ON_FOREIGN_DATA_SOURCE; + goto error; + } + if (!(*result= mysql_store_result(mysql))) + { + retval= HA_ERR_END_OF_FILE; + goto error; + } + if (!(retval= read_next(buf, *result))) + DBUG_RETURN(retval); + + mysql_free_result(*result); + *result= 0; + table->status= STATUS_NOT_FOUND; + DBUG_RETURN(retval); + +error: + table->status= STATUS_NOT_FOUND; + my_error(retval, MYF(0), error_buffer); + DBUG_RETURN(retval); +} + + +/* Initialized at each key walk (called multiple times unlike rnd_init()) */ + +int ha_federated::index_init(uint keynr, bool sorted) +{ + DBUG_ENTER("ha_federated::index_init"); + DBUG_PRINT("info", ("table: '%s' key: %u", table->s->table_name, keynr)); + active_index= keynr; + DBUG_RETURN(0); +} + + +/* + Read first range +*/ + +int ha_federated::read_range_first(const key_range *start_key, + const key_range *end_key, + bool eq_range, bool sorted) +{ + char sql_query_buffer[FEDERATED_QUERY_BUFFER_SIZE]; + int retval; + String sql_query(sql_query_buffer, + sizeof(sql_query_buffer), + &my_charset_bin); + DBUG_ENTER("ha_federated::read_range_first"); + + DBUG_ASSERT(!(start_key == NULL && end_key == NULL)); + + sql_query.length(0); + sql_query.append(share->select_query); + create_where_from_key(&sql_query, + &table->key_info[active_index], + start_key, end_key, 0, eq_range); + + if (stored_result) + { + mysql_free_result(stored_result); + stored_result= 0; + } + if (mysql_real_query(mysql, sql_query.ptr(), sql_query.length())) + { + retval= ER_QUERY_ON_FOREIGN_DATA_SOURCE; + goto error; + } + sql_query.length(0); + + if (!(stored_result= mysql_store_result(mysql))) + { + retval= HA_ERR_END_OF_FILE; + goto error; + } + + retval= read_next(table->record[0], stored_result); + DBUG_RETURN(retval); + +error: + table->status= STATUS_NOT_FOUND; + DBUG_RETURN(retval); +} + + +int ha_federated::read_range_next() +{ + int retval; + DBUG_ENTER("ha_federated::read_range_next"); + retval= rnd_next(table->record[0]); + DBUG_RETURN(retval); +} + + +/* Used to read forward through the index. */ +int ha_federated::index_next(byte *buf) +{ + DBUG_ENTER("ha_federated::index_next"); + statistic_increment(table->in_use->status_var.ha_read_next_count, + &LOCK_status); + DBUG_RETURN(read_next(buf, stored_result)); +} + + +/* + rnd_init() is called when the system wants the storage engine to do a table + scan. + + This is the method that gets data for the SELECT calls. + + See the federated in the introduction at the top of this file to see when + rnd_init() is called. + + Called from filesort.cc, records.cc, sql_handler.cc, sql_select.cc, + sql_table.cc, and sql_update.cc. +*/ + +int ha_federated::rnd_init(bool scan) +{ + DBUG_ENTER("ha_federated::rnd_init"); + /* + The use of the 'scan' flag is incredibly important for this handler + to work properly, especially with updates containing WHERE clauses + using indexed columns. + + When the initial query contains a WHERE clause of the query using an + indexed column, it's index_read_idx that selects the exact record from + the foreign database. + + When there is NO index in the query, either due to not having a WHERE + clause, or the WHERE clause is using columns that are not indexed, a + 'full table scan' done by rnd_init, which in this situation simply means + a 'select * from ...' on the foreign table. + + In other words, this 'scan' flag gives us the means to ensure that if + there is an index involved in the query, we want index_read_idx to + retrieve the exact record (scan flag is 0), and do not want rnd_init + to do a 'full table scan' and wipe out that result set. + + Prior to using this flag, the problem was most apparent with updates. + + An initial query like 'UPDATE tablename SET anything = whatever WHERE + indexedcol = someval', index_read_idx would get called, using a query + constructed with a WHERE clause built from the values of index ('indexcol' + in this case, having a value of 'someval'). mysql_store_result would + then get called (this would be the result set we want to use). + + After this rnd_init (from sql_update.cc) would be called, it would then + unecessarily call "select * from table" on the foreign table, then call + mysql_store_result, which would wipe out the correct previous result set + from the previous call of index_read_idx's that had the result set + containing the correct record, hence update the wrong row! + + */ + + if (scan) + { + if (stored_result) + { + mysql_free_result(stored_result); + stored_result= 0; + } + + if (mysql_real_query(mysql, + share->select_query, + strlen(share->select_query))) + goto error; + + stored_result= mysql_store_result(mysql); + if (!stored_result) + goto error; + } + DBUG_RETURN(0); + +error: + DBUG_RETURN(stash_remote_error()); +} + + +int ha_federated::rnd_end() +{ + DBUG_ENTER("ha_federated::rnd_end"); + DBUG_RETURN(index_end()); +} + + +int ha_federated::index_end(void) +{ + DBUG_ENTER("ha_federated::index_end"); + if (stored_result) + { + mysql_free_result(stored_result); + stored_result= 0; + } + active_index= MAX_KEY; + DBUG_RETURN(0); +} + + +/* + This is called for each row of the table scan. When you run out of records + you should return HA_ERR_END_OF_FILE. Fill buff up with the row information. + The Field structure for the table is the key to getting data into buf + in a manner that will allow the server to understand it. + + Called from filesort.cc, records.cc, sql_handler.cc, sql_select.cc, + sql_table.cc, and sql_update.cc. +*/ + +int ha_federated::rnd_next(byte *buf) +{ + DBUG_ENTER("ha_federated::rnd_next"); + + if (stored_result == 0) + { + /* + Return value of rnd_init is not always checked (see records.cc), + so we can get here _even_ if there is _no_ pre-fetched result-set! + TODO: fix it. We can delete this in 5.1 when rnd_init() is checked. + */ + DBUG_RETURN(1); + } + DBUG_RETURN(read_next(buf, stored_result)); +} + + +/* + ha_federated::read_next + + reads from a result set and converts to mysql internal + format + + SYNOPSIS + field_in_record_is_null() + buf byte pointer to record + result mysql result set + + DESCRIPTION + This method is a wrapper method that reads one record from a result + set and converts it to the internal table format + + RETURN VALUE + 1 error + 0 no error +*/ + +int ha_federated::read_next(byte *buf, MYSQL_RES *result) +{ + int retval; + my_ulonglong num_rows; + MYSQL_ROW row; + DBUG_ENTER("ha_federated::read_next"); + + table->status= STATUS_NOT_FOUND; // For easier return + + /* Fetch a row, insert it back in a row format. */ + if (!(row= mysql_fetch_row(result))) + DBUG_RETURN(HA_ERR_END_OF_FILE); + + if (!(retval= convert_row_to_internal_format(buf, row, result))) + table->status= 0; + + DBUG_RETURN(retval); +} + + +/* + store reference to current row so that we can later find it for + a re-read, update or delete. + + In case of federated, a reference is either a primary key or + the whole record. + + Called from filesort.cc, sql_select.cc, sql_delete.cc and sql_update.cc. +*/ + +void ha_federated::position(const byte *record) +{ + DBUG_ENTER("ha_federated::position"); + if (table->s->primary_key != MAX_KEY) + key_copy(ref, (byte *)record, table->key_info + table->s->primary_key, + ref_length); + else + memcpy(ref, record, ref_length); + DBUG_VOID_RETURN; +} + + +/* + This is like rnd_next, but you are given a position to use to determine the + row. The position will be of the type that you stored in ref. + + This method is required for an ORDER BY + + Called from filesort.cc records.cc sql_insert.cc sql_select.cc sql_update.cc. +*/ + +int ha_federated::rnd_pos(byte *buf, byte *pos) +{ + int result; + DBUG_ENTER("ha_federated::rnd_pos"); + statistic_increment(table->in_use->status_var.ha_read_rnd_count, + &LOCK_status); + if (table->s->primary_key != MAX_KEY) + { + /* We have a primary key, so use index_read_idx to find row */ + result= index_read_idx(buf, table->s->primary_key, pos, + ref_length, HA_READ_KEY_EXACT); + } + else + { + /* otherwise, get the old record ref as obtained in ::position */ + memcpy(buf, pos, ref_length); + result= 0; + } + table->status= result ? STATUS_NOT_FOUND : 0; + DBUG_RETURN(result); +} + + +/* + ::info() is used to return information to the optimizer. + Currently this table handler doesn't implement most of the fields + really needed. SHOW also makes use of this data + Another note, you will probably want to have the following in your + code: + if (records < 2) + records = 2; + The reason is that the server will optimize for cases of only a single + record. If in a table scan you don't know the number of records + it will probably be better to set records to two so you can return + as many records as you need. + Along with records a few more variables you may wish to set are: + records + deleted + data_file_length + index_file_length + delete_length + check_time + Take a look at the public variables in handler.h for more information. + + Called in: + filesort.cc + ha_heap.cc + item_sum.cc + opt_sum.cc + sql_delete.cc + sql_delete.cc + sql_derived.cc + sql_select.cc + sql_select.cc + sql_select.cc + sql_select.cc + sql_select.cc + sql_show.cc + sql_show.cc + sql_show.cc + sql_show.cc + sql_table.cc + sql_union.cc + sql_update.cc + +*/ + +void ha_federated::info(uint flag) +{ + char error_buffer[FEDERATED_QUERY_BUFFER_SIZE]; + char status_buf[FEDERATED_QUERY_BUFFER_SIZE]; + char escaped_table_name[FEDERATED_QUERY_BUFFER_SIZE]; + int error; + uint error_code; + MYSQL_RES *result= 0; + MYSQL_ROW row; + String status_query_string(status_buf, sizeof(status_buf), &my_charset_bin); + DBUG_ENTER("ha_federated::info"); + + error_code= ER_QUERY_ON_FOREIGN_DATA_SOURCE; + /* we want not to show table status if not needed to do so */ + if (flag & (HA_STATUS_VARIABLE | HA_STATUS_CONST)) + { + status_query_string.length(0); + status_query_string.append(STRING_WITH_LEN("SHOW TABLE STATUS LIKE '")); + escape_string_for_mysql(&my_charset_bin, (char *)escaped_table_name, + sizeof(escaped_table_name), + share->table_name, + share->table_name_length); + status_query_string.append(escaped_table_name); + status_query_string.append(STRING_WITH_LEN("'")); + + if (mysql_real_query(mysql, status_query_string.ptr(), + status_query_string.length())) + goto error; + + status_query_string.length(0); + + result= mysql_store_result(mysql); + if (!result) + goto error; + + if (!mysql_num_rows(result)) + goto error; + + if (!(row= mysql_fetch_row(result))) + goto error; + + if (flag & HA_STATUS_VARIABLE | HA_STATUS_CONST) + { + /* + deleted is set in ha_federated::info + */ + /* + need to figure out what this means as far as federated is concerned, + since we don't have a "file" + + data_file_length = ? + index_file_length = ? + delete_length = ? + */ + if (row[4] != NULL) + stats.records= (ha_rows) my_strtoll10(row[4], (char**) 0, + &error); + if (row[5] != NULL) + stats.mean_rec_length= (ha_rows) my_strtoll10(row[5], (char**) 0, &error); + + stats.data_file_length= stats.records * stats.mean_rec_length; + + if (row[12] != NULL) + stats.update_time= (ha_rows) my_strtoll10(row[12], (char**) 0, + &error); + if (row[13] != NULL) + stats.check_time= (ha_rows) my_strtoll10(row[13], (char**) 0, + &error); + } + /* + size of IO operations (This is based on a good guess, no high science + involved) + */ + if (flag & HA_STATUS_CONST) + stats.block_size= 4096; + + } + + if (result) + mysql_free_result(result); + + DBUG_VOID_RETURN; + +error: + if (result) + mysql_free_result(result); + + my_sprintf(error_buffer, (error_buffer, ": %d : %s", + mysql_errno(mysql), mysql_error(mysql))); + my_error(error_code, MYF(0), error_buffer); + DBUG_VOID_RETURN; +} + + +/* + Used to delete all rows in a table. Both for cases of truncate and + for cases where the optimizer realizes that all rows will be + removed as a result of a SQL statement. + + Called from item_sum.cc by Item_func_group_concat::clear(), + Item_sum_count_distinct::clear(), and Item_func_group_concat::clear(). + Called from sql_delete.cc by mysql_delete(). + Called from sql_select.cc by JOIN::reinit(). + Called from sql_union.cc by st_select_lex_unit::exec(). +*/ + +int ha_federated::delete_all_rows() +{ + char query_buffer[FEDERATED_QUERY_BUFFER_SIZE]; + String query(query_buffer, sizeof(query_buffer), &my_charset_bin); + DBUG_ENTER("ha_federated::delete_all_rows"); + + query.length(0); + + query.set_charset(system_charset_info); + query.append(STRING_WITH_LEN("TRUNCATE `")); + query.append(share->table_name); + query.append(STRING_WITH_LEN("`")); + + /* + TRUNCATE won't return anything in mysql_affected_rows + */ + if (mysql_real_query(mysql, query.ptr(), query.length())) + { + DBUG_RETURN(stash_remote_error()); + } + stats.deleted+= stats.records; + stats.records= 0; + DBUG_RETURN(0); +} + + +/* + The idea with handler::store_lock() is the following: + + The statement decided which locks we should need for the table + for updates/deletes/inserts we get WRITE locks, for SELECT... we get + read locks. + + Before adding the lock into the table lock handler (see thr_lock.c) + mysqld calls store lock with the requested locks. Store lock can now + modify a write lock to a read lock (or some other lock), ignore the + lock (if we don't want to use MySQL table locks at all) or add locks + for many tables (like we do when we are using a MERGE handler). + + Berkeley DB for federated changes all WRITE locks to TL_WRITE_ALLOW_WRITE + (which signals that we are doing WRITES, but we are still allowing other + reader's and writer's. + + When releasing locks, store_lock() are also called. In this case one + usually doesn't have to do anything. + + In some exceptional cases MySQL may send a request for a TL_IGNORE; + This means that we are requesting the same lock as last time and this + should also be ignored. (This may happen when someone does a flush + table when we have opened a part of the tables, in which case mysqld + closes and reopens the tables and tries to get the same locks at last + time). In the future we will probably try to remove this. + + Called from lock.cc by get_lock_data(). +*/ + +THR_LOCK_DATA **ha_federated::store_lock(THD *thd, + THR_LOCK_DATA **to, + enum thr_lock_type lock_type) +{ + DBUG_ENTER("ha_federated::store_lock"); + if (lock_type != TL_IGNORE && lock.type == TL_UNLOCK) + { + /* + Here is where we get into the guts of a row level lock. + If TL_UNLOCK is set + If we are not doing a LOCK TABLE or DISCARD/IMPORT + TABLESPACE, then allow multiple writers + */ + + if ((lock_type >= TL_WRITE_CONCURRENT_INSERT && + lock_type <= TL_WRITE) && !thd->in_lock_tables) + lock_type= TL_WRITE_ALLOW_WRITE; + + /* + In queries of type INSERT INTO t1 SELECT ... FROM t2 ... + MySQL would use the lock TL_READ_NO_INSERT on t2, and that + would conflict with TL_WRITE_ALLOW_WRITE, blocking all inserts + to t2. Convert the lock to a normal read lock to allow + concurrent inserts to t2. + */ + + if (lock_type == TL_READ_NO_INSERT && !thd->in_lock_tables) + lock_type= TL_READ; + + lock.type= lock_type; + } + + *to++= &lock; + + DBUG_RETURN(to); +} + +/* + create() does nothing, since we have no local setup of our own. + FUTURE: We should potentially connect to the foreign database and +*/ + +int ha_federated::create(const char *name, TABLE *table_arg, + HA_CREATE_INFO *create_info) +{ + int retval; + FEDERATED_SHARE tmp_share; // Only a temporary share, to test the url + DBUG_ENTER("ha_federated::create"); + + if (!(retval= parse_url(&tmp_share, table_arg, 1))) + retval= check_foreign_data_source(&tmp_share, 1); + + my_free((gptr) tmp_share.scheme, MYF(MY_ALLOW_ZERO_PTR)); + DBUG_RETURN(retval); + +} + + +int ha_federated::stash_remote_error() +{ + DBUG_ENTER("ha_federated::stash_remote_error()"); + remote_error_number= mysql_errno(mysql); + strmake(remote_error_buf, mysql_error(mysql), sizeof(remote_error_buf)-1); + DBUG_RETURN(HA_FEDERATED_ERROR_WITH_REMOTE_SYSTEM); +} + + +bool ha_federated::get_error_message(int error, String* buf) +{ + DBUG_ENTER("ha_federated::get_error_message"); + DBUG_PRINT("enter", ("error: %d", error)); + if (error == HA_FEDERATED_ERROR_WITH_REMOTE_SYSTEM) + { + buf->append(STRING_WITH_LEN("Error on remote system: ")); + buf->qs_append(remote_error_number); + buf->append(STRING_WITH_LEN(": ")); + buf->append(remote_error_buf); + + remote_error_number= 0; + remote_error_buf[0]= '\0'; + } + DBUG_PRINT("exit", ("message: %s", buf->ptr())); + DBUG_RETURN(FALSE); +} + +int ha_federated::external_lock(THD *thd, int lock_type) +{ + int error= 0; + ha_federated *trx= (ha_federated *)thd->ha_data[federated_hton.slot]; + DBUG_ENTER("ha_federated::external_lock"); + + if (lock_type != F_UNLCK) + { + DBUG_PRINT("info",("federated not lock F_UNLCK")); + if (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) + { + DBUG_PRINT("info",("federated autocommit")); + /* + This means we are doing an autocommit + */ + error= connection_autocommit(TRUE); + if (error) + { + DBUG_PRINT("info", ("error setting autocommit TRUE: %d", error)); + DBUG_RETURN(error); + } + trans_register_ha(thd, FALSE, &federated_hton); + } + else + { + DBUG_PRINT("info",("not autocommit")); + if (!trx) + { + /* + This is where a transaction gets its start + */ + error= connection_autocommit(FALSE); + if (error) + { + DBUG_PRINT("info", ("error setting autocommit FALSE: %d", error)); + DBUG_RETURN(error); + } + thd->ha_data[federated_hton.slot]= this; + trans_register_ha(thd, TRUE, &federated_hton); + /* + Send a lock table to the remote end. + We do not support this at the moment + */ + if (thd->options & (OPTION_TABLE_LOCK)) + { + DBUG_PRINT("info", ("We do not support lock table yet")); + } + } + else + { + ha_federated *ptr; + for (ptr= trx; ptr; ptr= ptr->trx_next) + if (ptr == this) + break; + else if (!ptr->trx_next) + ptr->trx_next= this; + } + } + } + DBUG_RETURN(0); +} + + +static int federated_commit(THD *thd, bool all) +{ + int return_val= 0; + ha_federated *trx= (ha_federated *)thd->ha_data[federated_hton.slot]; + DBUG_ENTER("federated_commit"); + + if (all) + { + int error= 0; + ha_federated *ptr, *old= NULL; + for (ptr= trx; ptr; old= ptr, ptr= ptr->trx_next) + { + if (old) + old->trx_next= NULL; + error= ptr->connection_commit(); + if (error && !return_val); + return_val= error; + } + thd->ha_data[federated_hton.slot]= NULL; + } + + DBUG_PRINT("info", ("error val: %d", return_val)); + DBUG_RETURN(return_val); +} + + +static int federated_rollback(THD *thd, bool all) +{ + int return_val= 0; + ha_federated *trx= (ha_federated *)thd->ha_data[federated_hton.slot]; + DBUG_ENTER("federated_rollback"); + + if (all) + { + int error= 0; + ha_federated *ptr, *old= NULL; + for (ptr= trx; ptr; old= ptr, ptr= ptr->trx_next) + { + if (old) + old->trx_next= NULL; + error= ptr->connection_rollback(); + if (error && !return_val) + return_val= error; + } + thd->ha_data[federated_hton.slot]= NULL; + } + + DBUG_PRINT("info", ("error val: %d", return_val)); + DBUG_RETURN(return_val); +} + +int ha_federated::connection_commit() +{ + DBUG_ENTER("ha_federated::connection_commit"); + DBUG_RETURN(execute_simple_query("COMMIT", 6)); +} + + +int ha_federated::connection_rollback() +{ + DBUG_ENTER("ha_federated::connection_rollback"); + DBUG_RETURN(execute_simple_query("ROLLBACK", 8)); +} + + +int ha_federated::connection_autocommit(bool state) +{ + const char *text; + DBUG_ENTER("ha_federated::connection_autocommit"); + text= (state == true) ? "SET AUTOCOMMIT=1" : "SET AUTOCOMMIT=0"; + DBUG_RETURN(execute_simple_query(text, 16)); +} + + +int ha_federated::execute_simple_query(const char *query, int len) +{ + DBUG_ENTER("ha_federated::execute_simple_query"); + + if (mysql_real_query(mysql, query, len)) + { + DBUG_RETURN(stash_remote_error()); + } + DBUG_RETURN(0); +} + +struct st_mysql_storage_engine federated_storage_engine= +{ MYSQL_HANDLERTON_INTERFACE_VERSION, &federated_hton }; + +mysql_declare_plugin(federated) +{ + MYSQL_STORAGE_ENGINE_PLUGIN, + &federated_storage_engine, + "FEDERATED", + "Patrick Galbraith and Brian Aker, MySQL AB", + "Federated MySQL storage engine", + federated_db_init, /* Plugin Init */ + NULL, /* Plugin Deinit */ + 0x0100 /* 1.0 */, + 0 +} +mysql_declare_plugin_end; + diff --git a/storage/federated/ha_federated.h b/storage/federated/ha_federated.h new file mode 100644 index 00000000000..ebdc775d3bf --- /dev/null +++ b/storage/federated/ha_federated.h @@ -0,0 +1,241 @@ +/* Copyright (C) 2003 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Please read ha_exmple.cc before reading this file. + Please keep in mind that the federated storage engine implements all methods + that are required to be implemented. handler.h has a full list of methods + that you can implement. +*/ + +#ifdef USE_PRAGMA_INTERFACE +#pragma interface /* gcc class implementation */ +#endif + +#include <mysql.h> + +/* + handler::print_error has a case statement for error numbers. + This value is (10000) is far out of range and will envoke the + default: case. + (Current error range is 120-159 from include/my_base.h) +*/ +#define HA_FEDERATED_ERROR_WITH_REMOTE_SYSTEM 10000 + +#define FEDERATED_QUERY_BUFFER_SIZE STRING_BUFFER_USUAL_SIZE * 5 +#define FEDERATED_RECORDS_IN_RANGE 2 + +/* + FEDERATED_SHARE is a structure that will be shared amoung all open handlers + The example implements the minimum of what you will probably need. +*/ +typedef struct st_federated_share { + /* + the primary select query to be used in rnd_init + */ + char *select_query; + /* + remote host info, parse_url supplies + */ + char *scheme; + char *connect_string; + char *hostname; + char *username; + char *password; + char *database; + char *table_name; + char *table; + char *socket; + char *sport; + ushort port; + uint table_name_length, connect_string_length, use_count; + pthread_mutex_t mutex; + THR_LOCK lock; +} FEDERATED_SHARE; + +/* + Class definition for the storage engine +*/ +class ha_federated: public handler +{ + THR_LOCK_DATA lock; /* MySQL lock */ + FEDERATED_SHARE *share; /* Shared lock info */ + MYSQL *mysql; /* MySQL connection */ + MYSQL_RES *stored_result; + uint fetch_num; // stores the fetch num + MYSQL_ROW_OFFSET current_position; // Current position used by ::position() + int remote_error_number; + char remote_error_buf[FEDERATED_QUERY_BUFFER_SIZE]; + +private: + /* + return 0 on success + return errorcode otherwise + */ + uint convert_row_to_internal_format(byte *buf, MYSQL_ROW row, + MYSQL_RES *result); + bool create_where_from_key(String *to, KEY *key_info, + const key_range *start_key, + const key_range *end_key, + bool records_in_range, bool eq_range); + int stash_remote_error(); + +public: + ha_federated(TABLE_SHARE *table_arg); + ~ha_federated() {} + /* The name that will be used for display purposes */ + const char *table_type() const { return "FEDERATED"; } + /* + Next pointer used in transaction + */ + ha_federated *trx_next; + /* + The name of the index type that will be used for display + don't implement this method unless you really have indexes + */ + // perhaps get index type + const char *index_type(uint inx) { return "REMOTE"; } + const char **bas_ext() const; + /* + This is a list of flags that says what the storage engine + implements. The current table flags are documented in + handler.h + */ + ulonglong table_flags() const + { + /* fix server to be able to get remote server table flags */ + return (HA_PRIMARY_KEY_IN_READ_INDEX | HA_FILE_BASED + | HA_REC_NOT_IN_SEQ | HA_AUTO_PART_KEY | HA_CAN_INDEX_BLOBS | + HA_NO_PREFIX_CHAR_KEYS | HA_PRIMARY_KEY_REQUIRED_FOR_DELETE | + HA_PARTIAL_COLUMN_READ | HA_NULL_IN_KEY); + } + /* + This is a bitmap of flags that says how the storage engine + implements indexes. The current index flags are documented in + handler.h. If you do not implement indexes, just return zero + here. + + part is the key part to check. First key part is 0 + If all_parts it's set, MySQL want to know the flags for the combined + index up to and including 'part'. + */ + /* fix server to be able to get remote server index flags */ + ulong index_flags(uint inx, uint part, bool all_parts) const + { + return (HA_READ_NEXT | HA_READ_RANGE | HA_READ_AFTER_KEY); + } + uint max_supported_record_length() const { return HA_MAX_REC_LENGTH; } + uint max_supported_keys() const { return MAX_KEY; } + uint max_supported_key_parts() const { return MAX_REF_PARTS; } + uint max_supported_key_length() const { return MAX_KEY_LENGTH; } + /* + Called in test_quick_select to determine if indexes should be used. + Normally, we need to know number of blocks . For federated we need to + know number of blocks on remote side, and number of packets and blocks + on the network side (?) + Talk to Kostja about this - how to get the + number of rows * ... + disk scan time on other side (block size, size of the row) + network time ... + The reason for "records * 1000" is that such a large number forces + this to use indexes " + */ + double scan_time() + { + DBUG_PRINT("info", ("records %lu", (ulong) stats.records)); + return (double)(stats.records*1000); + } + /* + The next method will never be called if you do not implement indexes. + */ + double read_time(uint index, uint ranges, ha_rows rows) + { + /* + Per Brian, this number is bugus, but this method must be implemented, + and at a later date, he intends to document this issue for handler code + */ + return (double) rows / 20.0+1; + } + + const key_map *keys_to_use_for_scanning() { return &key_map_full; } + /* + Everything below are methods that we implment in ha_federated.cc. + + Most of these methods are not obligatory, skip them and + MySQL will treat them as not implemented + */ + int open(const char *name, int mode, uint test_if_locked); // required + int close(void); // required + + int write_row(byte *buf); + int update_row(const byte *old_data, byte *new_data); + int delete_row(const byte *buf); + int index_init(uint keynr, bool sorted); + int index_read(byte *buf, const byte *key, + uint key_len, enum ha_rkey_function find_flag); + int index_read_idx(byte *buf, uint idx, const byte *key, + uint key_len, enum ha_rkey_function find_flag); + int index_next(byte *buf); + int index_end(); + int read_range_first(const key_range *start_key, + const key_range *end_key, + bool eq_range, bool sorted); + int read_range_next(); + /* + unlike index_init(), rnd_init() can be called two times + without rnd_end() in between (it only makes sense if scan=1). + then the second call should prepare for the new table scan + (e.g if rnd_init allocates the cursor, second call should + position it to the start of the table, no need to deallocate + and allocate it again + */ + int rnd_init(bool scan); //required + int rnd_end(); + int rnd_next(byte *buf); //required + int rnd_pos(byte *buf, byte *pos); //required + void position(const byte *record); //required + void info(uint); //required + + void update_auto_increment(void); + int repair(THD* thd, HA_CHECK_OPT* check_opt); + int optimize(THD* thd, HA_CHECK_OPT* check_opt); + + int delete_all_rows(void); + int create(const char *name, TABLE *form, + HA_CREATE_INFO *create_info); //required + ha_rows records_in_range(uint inx, key_range *start_key, + key_range *end_key); + uint8 table_cache_type() { return HA_CACHE_TBL_NOCACHE; } + + THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to, + enum thr_lock_type lock_type); //required + virtual bool get_error_message(int error, String *buf); + int external_lock(THD *thd, int lock_type); + int connection_commit(); + int connection_rollback(); + int connection_autocommit(bool state); + int execute_simple_query(const char *query, int len); + + int read_next(byte *buf, MYSQL_RES *result); + int index_read_idx_with_result_set(byte *buf, uint index, + const byte *key, + uint key_len, + ha_rkey_function find_flag, + MYSQL_RES **result); +}; + +int federated_db_init(void); +int federated_db_end(ha_panic_function type); + diff --git a/storage/federated/plug.in b/storage/federated/plug.in new file mode 100644 index 00000000000..81c56cb672f --- /dev/null +++ b/storage/federated/plug.in @@ -0,0 +1,4 @@ +MYSQL_STORAGE_ENGINE(federated,,[Federated Storage Engine], + [Connects to tables on remote MySQL servers], [max,max-no-ndb]) +MYSQL_PLUGIN_STATIC(federated, [libfederated.a]) +MYSQL_PLUGIN_DYNAMIC(federated, [ha_federated.la]) diff --git a/storage/heap/CMakeLists.txt b/storage/heap/CMakeLists.txt index db5fb8b2981..720bd7228f5 100644 --- a/storage/heap/CMakeLists.txt +++ b/storage/heap/CMakeLists.txt @@ -1,8 +1,12 @@ SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX") SET(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX") -INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include) +INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/zlib + ${CMAKE_SOURCE_DIR}/sql + ${CMAKE_SOURCE_DIR}/regex + ${CMAKE_SOURCE_DIR}/extra/yassl/include) ADD_LIBRARY(heap _check.c _rectest.c hp_block.c hp_clear.c hp_close.c hp_create.c + ha_heap.cc hp_delete.c hp_extra.c hp_hash.c hp_info.c hp_open.c hp_panic.c hp_rename.c hp_rfirst.c hp_rkey.c hp_rlast.c hp_rnext.c hp_rprev.c hp_rrnd.c hp_rsame.c hp_scan.c hp_static.c hp_update.c hp_write.c) diff --git a/storage/heap/Makefile.am b/storage/heap/Makefile.am index 68dce9bca5f..46565126b65 100644 --- a/storage/heap/Makefile.am +++ b/storage/heap/Makefile.am @@ -14,22 +14,42 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include -LDADD = libheap.a \ - $(top_builddir)/mysys/libmysys.a \ - $(top_builddir)/dbug/libdbug.a \ - $(top_builddir)/strings/libmystrings.a +MYSQLDATAdir = $(localstatedir) +MYSQLSHAREdir = $(pkgdatadir) +MYSQLBASEdir= $(prefix) +MYSQLLIBdir= $(pkglibdir) +INCLUDES = -I$(top_srcdir)/include -I$(top_builddir)/include \ + -I$(top_srcdir)/regex \ + -I$(top_srcdir)/sql \ + -I$(srcdir) +WRAPLIBS= + +LDADD = + +DEFS = @DEFS@ pkglib_LIBRARIES = libheap.a noinst_PROGRAMS = hp_test1 hp_test2 +noinst_LIBRARIES = libheap.a hp_test1_LDFLAGS = @NOINST_LDFLAGS@ +hp_test1_LDADD = libheap.a \ + $(top_builddir)/mysys/libmysys.a \ + $(top_builddir)/dbug/libdbug.a \ + $(top_builddir)/strings/libmystrings.a hp_test2_LDFLAGS = @NOINST_LDFLAGS@ -noinst_HEADERS = heapdef.h +hp_test2_LDADD = libheap.a \ + $(top_builddir)/mysys/libmysys.a \ + $(top_builddir)/dbug/libdbug.a \ + $(top_builddir)/strings/libmystrings.a +noinst_HEADERS = heapdef.h ha_heap.h libheap_a_SOURCES = hp_open.c hp_extra.c hp_close.c hp_panic.c hp_info.c \ hp_rrnd.c hp_scan.c hp_update.c hp_write.c hp_delete.c \ hp_rsame.c hp_create.c hp_rename.c hp_rfirst.c \ hp_rnext.c hp_rlast.c hp_rprev.c hp_clear.c \ hp_rkey.c hp_block.c \ + ha_heap.cc \ hp_hash.c _check.c _rectest.c hp_static.c + + EXTRA_DIST = CMakeLists.txt # Don't update the files from bitkeeper diff --git a/storage/heap/ha_heap.cc b/storage/heap/ha_heap.cc new file mode 100644 index 00000000000..317f85d26f2 --- /dev/null +++ b/storage/heap/ha_heap.cc @@ -0,0 +1,713 @@ +/* Copyright (C) 2000,2004 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + + +#ifdef USE_PRAGMA_IMPLEMENTATION +#pragma implementation // gcc: Class implementation +#endif + +#define MYSQL_SERVER 1 +#include "mysql_priv.h" +#include <mysql/plugin.h> +#include "ha_heap.h" + + +static handler *heap_create_handler(TABLE_SHARE *table, MEM_ROOT *mem_root); + +handlerton heap_hton; + +int heap_init() +{ + heap_hton.state= SHOW_OPTION_YES; + heap_hton.db_type= DB_TYPE_HEAP; + heap_hton.create= heap_create_handler; + heap_hton.panic= heap_panic; + heap_hton.flags= HTON_CAN_RECREATE; + return 0; +} + +static handler *heap_create_handler(TABLE_SHARE *table, MEM_ROOT *mem_root) +{ + return new (mem_root) ha_heap(table); +} + + +/***************************************************************************** +** HEAP tables +*****************************************************************************/ + +ha_heap::ha_heap(TABLE_SHARE *table_arg) + :handler(&heap_hton, table_arg), file(0), records_changed(0), + key_stat_version(0) +{} + + +static const char *ha_heap_exts[] = { + NullS +}; + +const char **ha_heap::bas_ext() const +{ + return ha_heap_exts; +} + +/* + Hash index statistics is updated (copied from HP_KEYDEF::hash_buckets to + rec_per_key) after 1/HEAP_STATS_UPDATE_THRESHOLD fraction of table records + have been inserted/updated/deleted. delete_all_rows() and table flush cause + immediate update. + + NOTE + hash index statistics must be updated when number of table records changes + from 0 to non-zero value and vice versa. Otherwise records_in_range may + erroneously return 0 and 'range' may miss records. +*/ +#define HEAP_STATS_UPDATE_THRESHOLD 10 + +int ha_heap::open(const char *name, int mode, uint test_if_locked) +{ + if (!(file= heap_open(name, mode)) && my_errno == ENOENT) + { + HA_CREATE_INFO create_info; + bzero(&create_info, sizeof(create_info)); + if (!create(name, table, &create_info)) + { + file= heap_open(name, mode); + implicit_emptied= 1; + } + } + ref_length= sizeof(HEAP_PTR); + if (file) + { + /* Initialize variables for the opened table */ + set_keys_for_scanning(); + /* + We cannot run update_key_stats() here because we do not have a + lock on the table. The 'records' count might just be changed + temporarily at this moment and we might get wrong statistics (Bug + #10178). Instead we request for update. This will be done in + ha_heap::info(), which is always called before key statistics are + used. + */ + key_stat_version= file->s->key_stat_version-1; + } + return (file ? 0 : 1); +} + +int ha_heap::close(void) +{ + return heap_close(file); +} + + +/* + Compute which keys to use for scanning + + SYNOPSIS + set_keys_for_scanning() + no parameter + + DESCRIPTION + Set the bitmap btree_keys, which is used when the upper layers ask + which keys to use for scanning. For each btree index the + corresponding bit is set. + + RETURN + void +*/ + +void ha_heap::set_keys_for_scanning(void) +{ + btree_keys.clear_all(); + for (uint i= 0 ; i < table->s->keys ; i++) + { + if (table->key_info[i].algorithm == HA_KEY_ALG_BTREE) + btree_keys.set_bit(i); + } +} + + +void ha_heap::update_key_stats() +{ + for (uint i= 0; i < table->s->keys; i++) + { + KEY *key=table->key_info+i; + if (!key->rec_per_key) + continue; + if (key->algorithm != HA_KEY_ALG_BTREE) + { + if (key->flags & HA_NOSAME) + key->rec_per_key[key->key_parts-1]= 1; + else + { + ha_rows hash_buckets= file->s->keydef[i].hash_buckets; + uint no_records= hash_buckets ? file->s->records/hash_buckets : 2; + if (no_records < 2) + no_records= 2; + key->rec_per_key[key->key_parts-1]= no_records; + } + } + } + records_changed= 0; + /* At the end of update_key_stats() we can proudly claim they are OK. */ + key_stat_version= file->s->key_stat_version; +} + + +int ha_heap::write_row(byte * buf) +{ + int res; + statistic_increment(table->in_use->status_var.ha_write_count,&LOCK_status); + if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT) + table->timestamp_field->set_time(); + if (table->next_number_field && buf == table->record[0]) + update_auto_increment(); + res= heap_write(file,buf); + if (!res && (++records_changed*HEAP_STATS_UPDATE_THRESHOLD > + file->s->records)) + { + /* + We can perform this safely since only one writer at the time is + allowed on the table. + */ + file->s->key_stat_version++; + } + return res; +} + +int ha_heap::update_row(const byte * old_data, byte * new_data) +{ + int res; + statistic_increment(table->in_use->status_var.ha_update_count,&LOCK_status); + if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE) + table->timestamp_field->set_time(); + res= heap_update(file,old_data,new_data); + if (!res && ++records_changed*HEAP_STATS_UPDATE_THRESHOLD > + file->s->records) + { + /* + We can perform this safely since only one writer at the time is + allowed on the table. + */ + file->s->key_stat_version++; + } + return res; +} + +int ha_heap::delete_row(const byte * buf) +{ + int res; + statistic_increment(table->in_use->status_var.ha_delete_count,&LOCK_status); + res= heap_delete(file,buf); + if (!res && table->s->tmp_table == NO_TMP_TABLE && + ++records_changed*HEAP_STATS_UPDATE_THRESHOLD > file->s->records) + { + /* + We can perform this safely since only one writer at the time is + allowed on the table. + */ + file->s->key_stat_version++; + } + return res; +} + +int ha_heap::index_read(byte * buf, const byte * key, uint key_len, + enum ha_rkey_function find_flag) +{ + DBUG_ASSERT(inited==INDEX); + statistic_increment(table->in_use->status_var.ha_read_key_count, + &LOCK_status); + int error = heap_rkey(file,buf,active_index, key, key_len, find_flag); + table->status = error ? STATUS_NOT_FOUND : 0; + return error; +} + +int ha_heap::index_read_last(byte *buf, const byte *key, uint key_len) +{ + DBUG_ASSERT(inited==INDEX); + statistic_increment(table->in_use->status_var.ha_read_key_count, + &LOCK_status); + int error= heap_rkey(file, buf, active_index, key, key_len, + HA_READ_PREFIX_LAST); + table->status= error ? STATUS_NOT_FOUND : 0; + return error; +} + +int ha_heap::index_read_idx(byte * buf, uint index, const byte * key, + uint key_len, enum ha_rkey_function find_flag) +{ + statistic_increment(table->in_use->status_var.ha_read_key_count, + &LOCK_status); + int error = heap_rkey(file, buf, index, key, key_len, find_flag); + table->status = error ? STATUS_NOT_FOUND : 0; + return error; +} + +int ha_heap::index_next(byte * buf) +{ + DBUG_ASSERT(inited==INDEX); + statistic_increment(table->in_use->status_var.ha_read_next_count, + &LOCK_status); + int error=heap_rnext(file,buf); + table->status=error ? STATUS_NOT_FOUND: 0; + return error; +} + +int ha_heap::index_prev(byte * buf) +{ + DBUG_ASSERT(inited==INDEX); + statistic_increment(table->in_use->status_var.ha_read_prev_count, + &LOCK_status); + int error=heap_rprev(file,buf); + table->status=error ? STATUS_NOT_FOUND: 0; + return error; +} + +int ha_heap::index_first(byte * buf) +{ + DBUG_ASSERT(inited==INDEX); + statistic_increment(table->in_use->status_var.ha_read_first_count, + &LOCK_status); + int error=heap_rfirst(file, buf, active_index); + table->status=error ? STATUS_NOT_FOUND: 0; + return error; +} + +int ha_heap::index_last(byte * buf) +{ + DBUG_ASSERT(inited==INDEX); + statistic_increment(table->in_use->status_var.ha_read_last_count, + &LOCK_status); + int error=heap_rlast(file, buf, active_index); + table->status=error ? STATUS_NOT_FOUND: 0; + return error; +} + +int ha_heap::rnd_init(bool scan) +{ + return scan ? heap_scan_init(file) : 0; +} + +int ha_heap::rnd_next(byte *buf) +{ + statistic_increment(table->in_use->status_var.ha_read_rnd_next_count, + &LOCK_status); + int error=heap_scan(file, buf); + table->status=error ? STATUS_NOT_FOUND: 0; + return error; +} + +int ha_heap::rnd_pos(byte * buf, byte *pos) +{ + int error; + HEAP_PTR position; + statistic_increment(table->in_use->status_var.ha_read_rnd_count, + &LOCK_status); + memcpy_fixed((char*) &position,pos,sizeof(HEAP_PTR)); + error=heap_rrnd(file, buf, position); + table->status=error ? STATUS_NOT_FOUND: 0; + return error; +} + +void ha_heap::position(const byte *record) +{ + *(HEAP_PTR*) ref= heap_position(file); // Ref is aligned +} + +void ha_heap::info(uint flag) +{ + HEAPINFO info; + (void) heap_info(file,&info,flag); + + errkey= info.errkey; + stats.records = info.records; + stats.deleted = info.deleted; + stats.mean_rec_length=info.reclength; + stats.data_file_length=info.data_length; + stats.index_file_length=info.index_length; + stats.max_data_file_length= info.max_records* info.reclength; + stats.delete_length= info.deleted * info.reclength; + if (flag & HA_STATUS_AUTO) + stats.auto_increment_value= info.auto_increment; + /* + If info() is called for the first time after open(), we will still + have to update the key statistics. Hoping that a table lock is now + in place. + */ + if (key_stat_version != file->s->key_stat_version) + update_key_stats(); +} + + +int ha_heap::extra(enum ha_extra_function operation) +{ + return heap_extra(file,operation); +} + + +int ha_heap::reset() +{ + return heap_reset(file); +} + + +int ha_heap::delete_all_rows() +{ + heap_clear(file); + if (table->s->tmp_table == NO_TMP_TABLE) + { + /* + We can perform this safely since only one writer at the time is + allowed on the table. + */ + file->s->key_stat_version++; + } + return 0; +} + +int ha_heap::external_lock(THD *thd, int lock_type) +{ + return 0; // No external locking +} + + +/* + Disable indexes. + + SYNOPSIS + disable_indexes() + mode mode of operation: + HA_KEY_SWITCH_NONUNIQ disable all non-unique keys + HA_KEY_SWITCH_ALL disable all keys + HA_KEY_SWITCH_NONUNIQ_SAVE dis. non-uni. and make persistent + HA_KEY_SWITCH_ALL_SAVE dis. all keys and make persistent + + DESCRIPTION + Disable indexes and clear keys to use for scanning. + + IMPLEMENTATION + HA_KEY_SWITCH_NONUNIQ is not implemented. + HA_KEY_SWITCH_NONUNIQ_SAVE is not implemented with HEAP. + HA_KEY_SWITCH_ALL_SAVE is not implemented with HEAP. + + RETURN + 0 ok + HA_ERR_WRONG_COMMAND mode not implemented. +*/ + +int ha_heap::disable_indexes(uint mode) +{ + int error; + + if (mode == HA_KEY_SWITCH_ALL) + { + if (!(error= heap_disable_indexes(file))) + set_keys_for_scanning(); + } + else + { + /* mode not implemented */ + error= HA_ERR_WRONG_COMMAND; + } + return error; +} + + +/* + Enable indexes. + + SYNOPSIS + enable_indexes() + mode mode of operation: + HA_KEY_SWITCH_NONUNIQ enable all non-unique keys + HA_KEY_SWITCH_ALL enable all keys + HA_KEY_SWITCH_NONUNIQ_SAVE en. non-uni. and make persistent + HA_KEY_SWITCH_ALL_SAVE en. all keys and make persistent + + DESCRIPTION + Enable indexes and set keys to use for scanning. + The indexes might have been disabled by disable_index() before. + The function works only if both data and indexes are empty, + since the heap storage engine cannot repair the indexes. + To be sure, call handler::delete_all_rows() before. + + IMPLEMENTATION + HA_KEY_SWITCH_NONUNIQ is not implemented. + HA_KEY_SWITCH_NONUNIQ_SAVE is not implemented with HEAP. + HA_KEY_SWITCH_ALL_SAVE is not implemented with HEAP. + + RETURN + 0 ok + HA_ERR_CRASHED data or index is non-empty. Delete all rows and retry. + HA_ERR_WRONG_COMMAND mode not implemented. +*/ + +int ha_heap::enable_indexes(uint mode) +{ + int error; + + if (mode == HA_KEY_SWITCH_ALL) + { + if (!(error= heap_enable_indexes(file))) + set_keys_for_scanning(); + } + else + { + /* mode not implemented */ + error= HA_ERR_WRONG_COMMAND; + } + return error; +} + + +/* + Test if indexes are disabled. + + SYNOPSIS + indexes_are_disabled() + no parameters + + RETURN + 0 indexes are not disabled + 1 all indexes are disabled + [2 non-unique indexes are disabled - NOT YET IMPLEMENTED] +*/ + +int ha_heap::indexes_are_disabled(void) +{ + return heap_indexes_are_disabled(file); +} + +THR_LOCK_DATA **ha_heap::store_lock(THD *thd, + THR_LOCK_DATA **to, + enum thr_lock_type lock_type) +{ + if (lock_type != TL_IGNORE && file->lock.type == TL_UNLOCK) + file->lock.type=lock_type; + *to++= &file->lock; + return to; +} + +/* + We have to ignore ENOENT entries as the HEAP table is created on open and + not when doing a CREATE on the table. +*/ + +int ha_heap::delete_table(const char *name) +{ + int error= heap_delete_table(name); + return error == ENOENT ? 0 : error; +} + + +void ha_heap::drop_table(const char *name) +{ + heap_drop_table(file); + close(); +} + + +int ha_heap::rename_table(const char * from, const char * to) +{ + return heap_rename(from,to); +} + + +ha_rows ha_heap::records_in_range(uint inx, key_range *min_key, + key_range *max_key) +{ + KEY *key=table->key_info+inx; + if (key->algorithm == HA_KEY_ALG_BTREE) + return hp_rb_records_in_range(file, inx, min_key, max_key); + + if (!min_key || !max_key || + min_key->length != max_key->length || + min_key->length != key->key_length || + min_key->flag != HA_READ_KEY_EXACT || + max_key->flag != HA_READ_AFTER_KEY) + return HA_POS_ERROR; // Can only use exact keys + + if (stats.records <= 1) + return stats.records; + + /* Assert that info() did run. We need current statistics here. */ + DBUG_ASSERT(key_stat_version == file->s->key_stat_version); + return key->rec_per_key[key->key_parts-1]; +} + + +int ha_heap::create(const char *name, TABLE *table_arg, + HA_CREATE_INFO *create_info) +{ + uint key, parts, mem_per_row= 0, keys= table_arg->s->keys; + uint auto_key= 0, auto_key_type= 0; + ha_rows max_rows; + HP_KEYDEF *keydef; + HA_KEYSEG *seg; + int error; + TABLE_SHARE *share= table_arg->s; + bool found_real_auto_increment= 0; + + for (key= parts= 0; key < keys; key++) + parts+= table_arg->key_info[key].key_parts; + + if (!(keydef= (HP_KEYDEF*) my_malloc(keys * sizeof(HP_KEYDEF) + + parts * sizeof(HA_KEYSEG), + MYF(MY_WME)))) + return my_errno; + seg= my_reinterpret_cast(HA_KEYSEG*) (keydef + keys); + for (key= 0; key < keys; key++) + { + KEY *pos= table_arg->key_info+key; + KEY_PART_INFO *key_part= pos->key_part; + KEY_PART_INFO *key_part_end= key_part + pos->key_parts; + + keydef[key].keysegs= (uint) pos->key_parts; + keydef[key].flag= (pos->flags & (HA_NOSAME | HA_NULL_ARE_EQUAL)); + keydef[key].seg= seg; + + switch (pos->algorithm) { + case HA_KEY_ALG_UNDEF: + case HA_KEY_ALG_HASH: + keydef[key].algorithm= HA_KEY_ALG_HASH; + mem_per_row+= sizeof(char*) * 2; // = sizeof(HASH_INFO) + break; + case HA_KEY_ALG_BTREE: + keydef[key].algorithm= HA_KEY_ALG_BTREE; + mem_per_row+=sizeof(TREE_ELEMENT)+pos->key_length+sizeof(char*); + break; + default: + DBUG_ASSERT(0); // cannot happen + } + + for (; key_part != key_part_end; key_part++, seg++) + { + Field *field= key_part->field; + + if (pos->algorithm == HA_KEY_ALG_BTREE) + seg->type= field->key_type(); + else + { + if ((seg->type = field->key_type()) != (int) HA_KEYTYPE_TEXT && + seg->type != HA_KEYTYPE_VARTEXT1 && + seg->type != HA_KEYTYPE_VARTEXT2 && + seg->type != HA_KEYTYPE_VARBINARY1 && + seg->type != HA_KEYTYPE_VARBINARY2) + seg->type= HA_KEYTYPE_BINARY; + } + seg->start= (uint) key_part->offset; + seg->length= (uint) key_part->length; + seg->flag= key_part->key_part_flag; + + seg->charset= field->charset(); + if (field->null_ptr) + { + seg->null_bit= field->null_bit; + seg->null_pos= (uint) (field->null_ptr - (uchar*) table_arg->record[0]); + } + else + { + seg->null_bit= 0; + seg->null_pos= 0; + } + if (field->flags & AUTO_INCREMENT_FLAG && + table_arg->found_next_number_field && + key == share->next_number_index) + { + /* + Store key number and type for found auto_increment key + We have to store type as seg->type can differ from it + */ + auto_key= key+ 1; + auto_key_type= field->key_type(); + } + } + } + mem_per_row+= MY_ALIGN(share->reclength + 1, sizeof(char*)); + max_rows = (ha_rows) (table_arg->in_use->variables.max_heap_table_size / + mem_per_row); + if (table_arg->found_next_number_field) + { + keydef[share->next_number_index].flag|= HA_AUTO_KEY; + found_real_auto_increment= share->next_number_key_offset == 0; + } + HP_CREATE_INFO hp_create_info; + hp_create_info.auto_key= auto_key; + hp_create_info.auto_key_type= auto_key_type; + hp_create_info.auto_increment= (create_info->auto_increment_value ? + create_info->auto_increment_value - 1 : 0); + hp_create_info.max_table_size=current_thd->variables.max_heap_table_size; + hp_create_info.with_auto_increment= found_real_auto_increment; + max_rows = (ha_rows) (hp_create_info.max_table_size / mem_per_row); + error= heap_create(name, + keys, keydef, share->reclength, + (ulong) ((share->max_rows < max_rows && + share->max_rows) ? + share->max_rows : max_rows), + (ulong) share->min_rows, &hp_create_info); + my_free((gptr) keydef, MYF(0)); + if (file) + info(HA_STATUS_NO_LOCK | HA_STATUS_CONST | HA_STATUS_VARIABLE); + return (error); +} + + +void ha_heap::update_create_info(HA_CREATE_INFO *create_info) +{ + table->file->info(HA_STATUS_AUTO); + if (!(create_info->used_fields & HA_CREATE_USED_AUTO)) + create_info->auto_increment_value= stats.auto_increment_value; +} + +void ha_heap::get_auto_increment(ulonglong offset, ulonglong increment, + ulonglong nb_desired_values, + ulonglong *first_value, + ulonglong *nb_reserved_values) +{ + ha_heap::info(HA_STATUS_AUTO); + *first_value= stats.auto_increment_value; + /* such table has only table-level locking so reserves up to +inf */ + *nb_reserved_values= ULONGLONG_MAX; +} + + +bool ha_heap::check_if_incompatible_data(HA_CREATE_INFO *info, + uint table_changes) +{ + /* Check that auto_increment value was not changed */ + if ((table_changes != IS_EQUAL_YES && + info->used_fields & HA_CREATE_USED_AUTO) && + info->auto_increment_value != 0) + return COMPATIBLE_DATA_NO; + return COMPATIBLE_DATA_YES; +} + +struct st_mysql_storage_engine heap_storage_engine= +{ MYSQL_HANDLERTON_INTERFACE_VERSION, &heap_hton}; + +mysql_declare_plugin(heap) +{ + MYSQL_STORAGE_ENGINE_PLUGIN, + &heap_storage_engine, + "MEMORY", + "MySQL AB", + "Hash based, stored in memory, useful for temporary tables", + heap_init, + NULL, + 0x0100, /* 1.0 */ + 0 +} +mysql_declare_plugin_end; diff --git a/storage/heap/ha_heap.h b/storage/heap/ha_heap.h new file mode 100644 index 00000000000..00e59856f26 --- /dev/null +++ b/storage/heap/ha_heap.h @@ -0,0 +1,118 @@ +/* Copyright (C) 2000,2004 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + + +#ifdef USE_PRAGMA_INTERFACE +#pragma interface /* gcc class implementation */ +#endif + +/* class for the the heap handler */ + +#include <heap.h> + +class ha_heap: public handler +{ + HP_INFO *file; + key_map btree_keys; + /* number of records changed since last statistics update */ + uint records_changed; + uint key_stat_version; +public: + ha_heap(TABLE_SHARE *table); + ~ha_heap() {} + const char *table_type() const + { + return (table->in_use->variables.sql_mode & MODE_MYSQL323) ? + "HEAP" : "MEMORY"; + } + const char *index_type(uint inx) + { + return ((table_share->key_info[inx].algorithm == HA_KEY_ALG_BTREE) ? + "BTREE" : "HASH"); + } + /* Rows also use a fixed-size format */ + enum row_type get_row_type() const { return ROW_TYPE_FIXED; } + const char **bas_ext() const; + ulonglong table_flags() const + { + return (HA_FAST_KEY_READ | HA_NO_BLOBS | HA_NULL_IN_KEY | + HA_REC_NOT_IN_SEQ | HA_CAN_INSERT_DELAYED | HA_NO_TRANSACTIONS | + HA_HAS_RECORDS | HA_STATS_RECORDS_IS_EXACT); + } + ulong index_flags(uint inx, uint part, bool all_parts) const + { + return ((table_share->key_info[inx].algorithm == HA_KEY_ALG_BTREE) ? + HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER | HA_READ_RANGE : + HA_ONLY_WHOLE_INDEX); + } + const key_map *keys_to_use_for_scanning() { return &btree_keys; } + uint max_supported_keys() const { return MAX_KEY; } + uint max_supported_key_part_length() const { return MAX_KEY_LENGTH; } + double scan_time() + { return (double) (stats.records+stats.deleted) / 20.0+10; } + double read_time(uint index, uint ranges, ha_rows rows) + { return (double) rows / 20.0+1; } + + int open(const char *name, int mode, uint test_if_locked); + int close(void); + void set_keys_for_scanning(void); + int write_row(byte * buf); + int update_row(const byte * old_data, byte * new_data); + int delete_row(const byte * buf); + virtual void get_auto_increment(ulonglong offset, ulonglong increment, + ulonglong nb_desired_values, + ulonglong *first_value, + ulonglong *nb_reserved_values); + int index_read(byte * buf, const byte * key, + uint key_len, enum ha_rkey_function find_flag); + int index_read_idx(byte * buf, uint idx, const byte * key, + uint key_len, enum ha_rkey_function find_flag); + int index_read_last(byte * buf, const byte * key, uint key_len); + int index_next(byte * buf); + int index_prev(byte * buf); + int index_first(byte * buf); + int index_last(byte * buf); + int rnd_init(bool scan); + int rnd_next(byte *buf); + int rnd_pos(byte * buf, byte *pos); + void position(const byte *record); + void info(uint); + int extra(enum ha_extra_function operation); + int reset(); + int external_lock(THD *thd, int lock_type); + int delete_all_rows(void); + int disable_indexes(uint mode); + int enable_indexes(uint mode); + int indexes_are_disabled(void); + ha_rows records_in_range(uint inx, key_range *min_key, key_range *max_key); + int delete_table(const char *from); + void drop_table(const char *name); + int rename_table(const char * from, const char * to); + int create(const char *name, TABLE *form, HA_CREATE_INFO *create_info); + void update_create_info(HA_CREATE_INFO *create_info); + + THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to, + enum thr_lock_type lock_type); + int cmp_ref(const byte *ref1, const byte *ref2) + { + HEAP_PTR ptr1=*(HEAP_PTR*)ref1; + HEAP_PTR ptr2=*(HEAP_PTR*)ref2; + return ptr1 < ptr2? -1 : (ptr1 > ptr2? 1 : 0); + } + bool check_if_incompatible_data(HA_CREATE_INFO *info, uint table_changes); +private: + void update_key_stats(); +}; diff --git a/storage/heap/plug.in b/storage/heap/plug.in new file mode 100644 index 00000000000..9e744b6ac0d --- /dev/null +++ b/storage/heap/plug.in @@ -0,0 +1,6 @@ +MYSQL_STORAGE_ENGINE(heap,no, [Memory Storage Engine], + [Volatile memory based tables]) +MYSQL_PLUGIN_DIRECTORY(heap, [storage/heap]) +MYSQL_PLUGIN_STATIC(heap, [libheap.a]) +MYSQL_PLUGIN_MANDATORY(heap) dnl Memory tables + diff --git a/storage/innobase/Makefile.am b/storage/innobase/Makefile.am index 7884715d839..a68dbbcc2e6 100644 --- a/storage/innobase/Makefile.am +++ b/storage/innobase/Makefile.am @@ -17,10 +17,20 @@ # Process this file with automake to create Makefile.in +MYSQLDATAdir = $(localstatedir) +MYSQLSHAREdir = $(pkgdatadir) +MYSQLBASEdir= $(prefix) +MYSQLLIBdir= $(pkglibdir) +INCLUDES = -I$(top_srcdir)/include -I$(top_builddir)/include \ + -I$(top_srcdir)/regex \ + -I$(top_srcdir)/storage/innobase/include \ + -I$(top_srcdir)/sql \ + -I$(srcdir) + AUTOMAKE_OPTIONS = foreign TAR = gtar -noinst_HEADERS = ib_config.h +noinst_HEADERS = SUBDIRS = os ut btr buf data dict dyn eval fil fsp fut \ ha ibuf lock log mach mem mtr page \ diff --git a/storage/innobase/configure.in b/storage/innobase/configure.in deleted file mode 100644 index 4aaa28da89e..00000000000 --- a/storage/innobase/configure.in +++ /dev/null @@ -1,156 +0,0 @@ -# Process this file with autoconf to produce a configure script -AC_INIT -AC_CANONICAL_SYSTEM -AM_MAINTAINER_MODE -AM_CONFIG_HEADER(ib_config.h) -AM_INIT_AUTOMAKE(ib, 0.90) - -# This is need before AC_PROG_CC -# - -if test "x${CFLAGS-}" = x ; then - cflags_is_set=no -else - cflags_is_set=yes -fi - -if test "x${CPPFLAGS-}" = x ; then - cppflags_is_set=no -else - cppflags_is_set=yes -fi - -if test "x${LDFLAGS-}" = x ; then - ldflags_is_set=no -else - ldflags_is_set=yes -fi - -# The following hack should ensure that configure doesn't add optimizing -# or debugging flags to CFLAGS or CXXFLAGS -CFLAGS="$CFLAGS " -CXXFLAGS="$CXXFLAGS " - -AC_PROG_CC -AC_PROG_RANLIB -AC_PROG_INSTALL -AC_PROG_LIBTOOL -AC_CHECK_HEADERS(aio.h sched.h) -AC_CHECK_SIZEOF(int, 4) -AC_CHECK_SIZEOF(long, 4) -AC_CHECK_SIZEOF(void*, 4) -AC_CHECK_FUNCS(sched_yield) -AC_CHECK_FUNCS(fdatasync) -AC_CHECK_FUNCS(localtime_r) -#AC_CHECK_FUNCS(readdir_r) MySQL checks that it has also the right args. -# Some versions of Unix only take 2 arguments. -#AC_C_INLINE Already checked in MySQL -AC_C_BIGENDIAN - -# Build optimized or debug version ? -# First check for gcc and g++ -if test "$ac_cv_prog_gcc" = "yes" -then - DEBUG_CFLAGS="-g" - DEBUG_OPTIMIZE_CC="-O" - OPTIMIZE_CFLAGS="$MAX_C_OPTIMIZE" -else - DEBUG_CFLAGS="-g" - DEBUG_OPTIMIZE_CC="" - OPTIMIZE_CFLAGS="-O" -fi -if test "$ac_cv_prog_cxx_g" = "yes" -then - DEBUG_CXXFLAGS="-g" - DEBUG_OPTIMIZE_CXX="-O" - OPTIMIZE_CXXFLAGS="-O3" -else - DEBUG_CXXFLAGS="-g" - DEBUG_OPTIMIZE_CXX="" - OPTIMIZE_CXXFLAGS="-O" -fi -AC_ARG_WITH(debug, - [ --without-debug Build a production version without debugging code], - [with_debug=$withval], - [with_debug=no]) -if test "$with_debug" = "yes" -then - # Medium debug. - CFLAGS="$DEBUG_CFLAGS $DEBUG_OPTIMIZE_CC -DDBUG_ON -DSAFE_MUTEX $CFLAGS" - CXXFLAGS="$DEBUG_CXXFLAGS $DEBUG_OPTIMIZE_CXX -DSAFE_MUTEX $CXXFLAGS" -elif test "$with_debug" = "full" -then - # Full debug. Very slow in some cases - CFLAGS="$DEBUG_CFLAGS -DDBUG_ON -DSAFE_MUTEX -DSAFEMALLOC $CFLAGS" - CXXFLAGS="$DEBUG_CXXFLAGS -DSAFE_MUTEX -DSAFEMALLOC $CXXFLAGS" -else - # Optimized version. No debug - CFLAGS="$OPTIMIZE_CFLAGS -DDBUG_OFF $CFLAGS -DDEBUG_OFF" - CXXFLAGS="$OPTIMIZE_CXXFLAGS -DDBUG_OFF $CXXFLAGS -DDEBUG_OFF" -fi - -# NOTE: The flags below are disabled by default since we can't easily get -# rid of the "string over 509 characters in length" warnings, and thus can't -# add -Werror. But it's a good idea to enable these for a test compile -# before shipping a new snapshot to MySQL to catch errors that could make -# the compile fail on non-C99 compilers. - -# If using gcc, disallow usage of C99 features to avoid accidentally -# introducing problems on compilers that only implement C89. -#if test "$ac_cv_prog_gcc" = "yes" -#then -# CFLAGS="$CFLAGS -std=c89 -ansi -pedantic -Wno-long-long" -#fi - -# If using gcc, add some extra warning flags. -if test "$ac_cv_prog_gcc" = "yes" -then - CFLAGS="$CFLAGS -Werror-implicit-function-declaration -Wpointer-arith" -fi - -case "$target_os" in - lin*) - CFLAGS="$CFLAGS -DUNIV_LINUX";; - hpux10*) - CFLAGS="$CFLAGS -DUNIV_MUST_NOT_INLINE -DUNIV_HPUX -DUNIV_HPUX10";; - hp*) - CFLAGS="$CFLAGS -DUNIV_MUST_NOT_INLINE -DUNIV_HPUX";; - aix*) - CFLAGS="$CFLAGS -DUNIV_AIX";; - irix*) - CFLAGS="$CFLAGS -DUNIV_MUST_NOT_INLINE";; - osf*) - CFLAGS="$CFLAGS -DUNIV_MUST_NOT_INLINE";; - sysv5uw7*) - # Problem when linking on SCO - CFLAGS="$CFLAGS -DUNIV_MUST_NOT_INLINE";; - openbsd*) - CFLAGS="$CFLAGS -DUNIV_MUST_NOT_INLINE";; -esac - -case "$target" in - i[[4567]]86-*-*) - CFLAGS="$CFLAGS -DUNIV_INTEL_X86";; - # The compiler on Linux/S390 does not seem to have inlining - s390-*-*) - CFLAGS="$CFLAGS -DUNIV_MUST_NOT_INLINE";; -esac - -# must go in pair with AR as set by MYSQL_CHECK_AR -if test -z "$ARFLAGS" -then - ARFLAGS="cru" -fi -AC_SUBST(ARFLAGS) - -AC_OUTPUT(Makefile os/Makefile ut/Makefile btr/Makefile dnl - buf/Makefile data/Makefile dnl - dict/Makefile dyn/Makefile dnl - eval/Makefile fil/Makefile fsp/Makefile fut/Makefile dnl - ha/Makefile ibuf/Makefile dnl - lock/Makefile log/Makefile dnl - mach/Makefile mem/Makefile mtr/Makefile dnl - page/Makefile pars/Makefile que/Makefile dnl - read/Makefile rem/Makefile row/Makefile dnl - srv/Makefile sync/Makefile thr/Makefile trx/Makefile dnl - usr/Makefile) diff --git a/storage/innobase/include/Makefile.i b/storage/innobase/include/Makefile.i index 87952a7abc8..db436c702ff 100644 --- a/storage/innobase/include/Makefile.i +++ b/storage/innobase/include/Makefile.i @@ -1,6 +1,10 @@ # Makefile included in Makefile.am in every subdirectory -INCLUDES = -I$(top_srcdir)/include -I$(top_srcdir)/../../include +INCLUDES = -I$(top_srcdir)/include -I$(top_builddir)/include \ + -I$(top_srcdir)/regex \ + -I$(top_srcdir)/storage/innobase/include \ + -I$(top_srcdir)/sql \ + -I$(srcdir) # Don't update the files from bitkeeper %::SCCS/s.% diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i index f2dafbc3a70..c1f028ef4a3 100644 --- a/storage/innobase/include/univ.i +++ b/storage/innobase/include/univ.i @@ -41,7 +41,7 @@ if we are compiling on Windows. */ /* Include the header file generated by GNU autoconf */ #ifndef __WIN__ -#include "../ib_config.h" +#include "config.h" #endif #ifdef HAVE_SCHED_H diff --git a/storage/innobase/plug.in b/storage/innobase/plug.in new file mode 100644 index 00000000000..fc1d758fd87 --- /dev/null +++ b/storage/innobase/plug.in @@ -0,0 +1,70 @@ +MYSQL_STORAGE_ENGINE(innobase, innodb, [InnoDB Storage Engine], + [Transactional Tables using InnoDB], [max,max-no-ndb]) +MYSQL_PLUGIN_DIRECTORY(innobase, [storage/innobase]) +MYSQL_PLUGIN_STATIC(innobase, [libinnobase.a]) +MYSQL_PLUGIN_ACTIONS(innobase, [ + AC_CHECK_LIB(rt, aio_read, [innodb_system_libs="-lrt"]) + AC_SUBST(innodb_system_libs) + AC_PROG_CC + AC_PROG_RANLIB + AC_PROG_INSTALL + AC_PROG_LIBTOOL + AC_CHECK_HEADERS(aio.h sched.h) + AC_CHECK_SIZEOF(int, 4) + AC_CHECK_SIZEOF(long, 4) + AC_CHECK_SIZEOF(void*, 4) + AC_CHECK_FUNCS(sched_yield) + AC_CHECK_FUNCS(fdatasync) + AC_CHECK_FUNCS(localtime_r) + AC_C_BIGENDIAN + case "$target_os" in + lin*) + CFLAGS="$CFLAGS -DUNIV_LINUX";; + hpux10*) + CFLAGS="$CFLAGS -DUNIV_MUST_NOT_INLINE -DUNIV_HPUX -DUNIV_HPUX10";; + hp*) + CFLAGS="$CFLAGS -DUNIV_MUST_NOT_INLINE -DUNIV_HPUX";; + aix*) + CFLAGS="$CFLAGS -DUNIV_AIX";; + irix*) + CFLAGS="$CFLAGS -DUNIV_MUST_NOT_INLINE";; + osf*) + CFLAGS="$CFLAGS -DUNIV_MUST_NOT_INLINE";; + sysv5uw7*) + # Problem when linking on SCO + CFLAGS="$CFLAGS -DUNIV_MUST_NOT_INLINE";; + openbsd*) + CFLAGS="$CFLAGS -DUNIV_MUST_NOT_INLINE";; + esac + AC_CONFIG_FILES( + storage/innobase/ut/Makefile + storage/innobase/btr/Makefile + storage/innobase/buf/Makefile + storage/innobase/data/Makefile + storage/innobase/dict/Makefile + storage/innobase/dyn/Makefile + storage/innobase/eval/Makefile + storage/innobase/fil/Makefile + storage/innobase/fsp/Makefile + storage/innobase/fut/Makefile + storage/innobase/ha/Makefile + storage/innobase/ibuf/Makefile + storage/innobase/lock/Makefile + storage/innobase/log/Makefile + storage/innobase/mach/Makefile + storage/innobase/mem/Makefile + storage/innobase/mtr/Makefile + storage/innobase/os/Makefile + storage/innobase/page/Makefile + storage/innobase/pars/Makefile + storage/innobase/que/Makefile + storage/innobase/read/Makefile + storage/innobase/rem/Makefile + storage/innobase/row/Makefile + storage/innobase/srv/Makefile + storage/innobase/sync/Makefile + storage/innobase/thr/Makefile + storage/innobase/trx/Makefile + storage/innobase/usr/Makefile) + ]) + diff --git a/storage/myisam/CMakeLists.txt b/storage/myisam/CMakeLists.txt index 3ba7aba4555..046e4fe28cd 100644 --- a/storage/myisam/CMakeLists.txt +++ b/storage/myisam/CMakeLists.txt @@ -1,8 +1,12 @@ SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX") SET(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX") -INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include) +INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/zlib + ${CMAKE_SOURCE_DIR}/sql + ${CMAKE_SOURCE_DIR}/regex + ${CMAKE_SOURCE_DIR}/extra/yassl/include) ADD_LIBRARY(myisam ft_boolean_search.c ft_nlq_search.c ft_parser.c ft_static.c ft_stem.c + ha_myisam.cc ft_stopwords.c ft_update.c mi_cache.c mi_changed.c mi_check.c mi_checksum.c mi_close.c mi_create.c mi_dbug.c mi_delete.c mi_delete_all.c mi_delete_table.c mi_dynrec.c mi_extra.c mi_info.c diff --git a/storage/myisam/Makefile.am b/storage/myisam/Makefile.am index 081d7facf3a..fdccb1f5b19 100644 --- a/storage/myisam/Makefile.am +++ b/storage/myisam/Makefile.am @@ -14,29 +14,76 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +MYSQLDATAdir = $(localstatedir) +MYSQLSHAREdir = $(pkgdatadir) +MYSQLBASEdir= $(prefix) +MYSQLLIBdir= $(pkglibdir) +INCLUDES = -I$(top_srcdir)/include -I$(top_builddir)/include \ + -I$(top_srcdir)/regex \ + -I$(top_srcdir)/sql \ + -I$(srcdir) +WRAPLIBS= + +LDADD = + +DEFS = @DEFS@ + EXTRA_DIST = mi_test_all.sh mi_test_all.res ft_stem.c CMakeLists.txt pkgdata_DATA = mi_test_all mi_test_all.res -INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include -LDADD = @CLIENT_EXTRA_LDFLAGS@ libmyisam.a \ - $(top_builddir)/mysys/libmysys.a \ - $(top_builddir)/dbug/libdbug.a \ - $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ pkglib_LIBRARIES = libmyisam.a bin_PROGRAMS = myisamchk myisamlog myisampack myisam_ftdump myisamchk_DEPENDENCIES= $(LIBRARIES) +myisamchk_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmyisam.a \ + $(top_builddir)/mysys/libmysys.a \ + $(top_builddir)/dbug/libdbug.a \ + $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ myisamlog_DEPENDENCIES= $(LIBRARIES) +myisamlog_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmyisam.a \ + $(top_builddir)/mysys/libmysys.a \ + $(top_builddir)/dbug/libdbug.a \ + $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ myisampack_DEPENDENCIES=$(LIBRARIES) +myisampack_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmyisam.a \ + $(top_builddir)/mysys/libmysys.a \ + $(top_builddir)/dbug/libdbug.a \ + $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ noinst_PROGRAMS = mi_test1 mi_test2 mi_test3 rt_test sp_test #ft_test1 ft_eval -noinst_HEADERS = myisamdef.h rt_index.h rt_key.h rt_mbr.h sp_defs.h fulltext.h ftdefs.h ft_test1.h ft_eval.h +noinst_HEADERS = myisamdef.h rt_index.h rt_key.h rt_mbr.h sp_defs.h \ + fulltext.h ftdefs.h ft_test1.h ft_eval.h \ + ha_myisam.h mi_test1_DEPENDENCIES= $(LIBRARIES) +mi_test1_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmyisam.a \ + $(top_builddir)/mysys/libmysys.a \ + $(top_builddir)/dbug/libdbug.a \ + $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ mi_test2_DEPENDENCIES= $(LIBRARIES) +mi_test2_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmyisam.a \ + $(top_builddir)/mysys/libmysys.a \ + $(top_builddir)/dbug/libdbug.a \ + $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ mi_test3_DEPENDENCIES= $(LIBRARIES) +mi_test3_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmyisam.a \ + $(top_builddir)/mysys/libmysys.a \ + $(top_builddir)/dbug/libdbug.a \ + $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ #ft_test1_DEPENDENCIES= $(LIBRARIES) #ft_eval_DEPENDENCIES= $(LIBRARIES) myisam_ftdump_DEPENDENCIES= $(LIBRARIES) +myisam_ftdump_LDADD = @CLIENT_EXTRA_LDFLAGS@ libmyisam.a \ + $(top_builddir)/mysys/libmysys.a \ + $(top_builddir)/dbug/libdbug.a \ + $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ rt_test_DEPENDENCIES= $(LIBRARIES) +rt_test_LDADD = @CLIENT_EXTRA_LDFLAGS@ libmyisam.a \ + $(top_builddir)/mysys/libmysys.a \ + $(top_builddir)/dbug/libdbug.a \ + $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ sp_test_DEPENDENCIES= $(LIBRARIES) +sp_test_LDADD = @CLIENT_EXTRA_LDFLAGS@ libmyisam.a \ + $(top_builddir)/mysys/libmysys.a \ + $(top_builddir)/dbug/libdbug.a \ + $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ libmyisam_a_SOURCES = mi_open.c mi_extra.c mi_info.c mi_rkey.c \ mi_rnext.c mi_rnext_same.c \ mi_search.c mi_page.c mi_key.c mi_locking.c \ @@ -52,9 +99,9 @@ libmyisam_a_SOURCES = mi_open.c mi_extra.c mi_info.c mi_rkey.c \ mi_keycache.c mi_preload.c \ ft_parser.c ft_stopwords.c ft_static.c \ ft_update.c ft_boolean_search.c ft_nlq_search.c sort.c \ + ha_myisam.cc \ rt_index.c rt_key.c rt_mbr.c rt_split.c sp_key.c CLEANFILES = test?.MY? FT?.MY? isam.log mi_test_all rt_test.MY? sp_test.MY? -DEFS = -DMAP_TO_USE_RAID # Move to automake rules ? prolint:; plparse -b -u -hF1 "-width(0,0)" "-format=%f:%l:\s%t:%n\s%m" \ diff --git a/storage/myisam/ha_myisam.cc b/storage/myisam/ha_myisam.cc new file mode 100644 index 00000000000..209478ee9a5 --- /dev/null +++ b/storage/myisam/ha_myisam.cc @@ -0,0 +1,1806 @@ +/* Copyright (C) 2000,2004 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + + +#ifdef USE_PRAGMA_IMPLEMENTATION +#pragma implementation // gcc: Class implementation +#endif + +#define MYSQL_SERVER 1 +#include "mysql_priv.h" +#include <mysql/plugin.h> +#include <m_ctype.h> +#include <myisampack.h> +#include "ha_myisam.h" +#include <stdarg.h> +#include "myisamdef.h" +#include "rt_index.h" + +ulong myisam_recover_options= HA_RECOVER_NONE; + +/* bits in myisam_recover_options */ +const char *myisam_recover_names[] = +{ "DEFAULT", "BACKUP", "FORCE", "QUICK", NullS}; +TYPELIB myisam_recover_typelib= {array_elements(myisam_recover_names)-1,"", + myisam_recover_names, NULL}; + +const char *myisam_stats_method_names[] = {"nulls_unequal", "nulls_equal", + "nulls_ignored", NullS}; +TYPELIB myisam_stats_method_typelib= { + array_elements(myisam_stats_method_names) - 1, "", + myisam_stats_method_names, NULL}; + + +/***************************************************************************** +** MyISAM tables +*****************************************************************************/ + +static handler *myisam_create_handler(TABLE_SHARE *table, MEM_ROOT *mem_root) +{ + return new (mem_root) ha_myisam(table); +} + +// collect errors printed by mi_check routines + +static void mi_check_print_msg(MI_CHECK *param, const char* msg_type, + const char *fmt, va_list args) +{ + THD* thd = (THD*)param->thd; + Protocol *protocol= thd->protocol; + uint length, msg_length; + char msgbuf[MI_MAX_MSG_BUF]; + char name[NAME_LEN*2+2]; + + msg_length= my_vsnprintf(msgbuf, sizeof(msgbuf), fmt, args); + msgbuf[sizeof(msgbuf) - 1] = 0; // healthy paranoia + + DBUG_PRINT(msg_type,("message: %s",msgbuf)); + + if (!thd->vio_ok()) + { + sql_print_error(msgbuf); + return; + } + + if (param->testflag & (T_CREATE_MISSING_KEYS | T_SAFE_REPAIR | + T_AUTO_REPAIR)) + { + my_message(ER_NOT_KEYFILE,msgbuf,MYF(MY_WME)); + return; + } + length=(uint) (strxmov(name, param->db_name,".",param->table_name,NullS) - + name); + protocol->prepare_for_resend(); + protocol->store(name, length, system_charset_info); + protocol->store(param->op_name, system_charset_info); + protocol->store(msg_type, system_charset_info); + protocol->store(msgbuf, msg_length, system_charset_info); + if (protocol->write()) + sql_print_error("Failed on my_net_write, writing to stderr instead: %s\n", + msgbuf); + return; +} + +extern "C" { + +volatile int *killed_ptr(MI_CHECK *param) +{ + /* In theory Unsafe conversion, but should be ok for now */ + return (int*) &(((THD *)(param->thd))->killed); +} + +void mi_check_print_error(MI_CHECK *param, const char *fmt,...) +{ + param->error_printed|=1; + param->out_flag|= O_DATA_LOST; + va_list args; + va_start(args, fmt); + mi_check_print_msg(param, "error", fmt, args); + va_end(args); +} + +void mi_check_print_info(MI_CHECK *param, const char *fmt,...) +{ + va_list args; + va_start(args, fmt); + mi_check_print_msg(param, "info", fmt, args); + va_end(args); +} + +void mi_check_print_warning(MI_CHECK *param, const char *fmt,...) +{ + param->warning_printed=1; + param->out_flag|= O_DATA_LOST; + va_list args; + va_start(args, fmt); + mi_check_print_msg(param, "warning", fmt, args); + va_end(args); +} + +} + + +ha_myisam::ha_myisam(TABLE_SHARE *table_arg) + :handler(&myisam_hton, table_arg), file(0), + int_table_flags(HA_NULL_IN_KEY | HA_CAN_FULLTEXT | HA_CAN_SQL_HANDLER | + HA_DUPLICATE_POS | HA_CAN_INDEX_BLOBS | HA_AUTO_PART_KEY | + HA_FILE_BASED | HA_CAN_GEOMETRY | HA_NO_TRANSACTIONS | + HA_CAN_INSERT_DELAYED | HA_CAN_BIT_FIELD | HA_CAN_RTREEKEYS | + HA_HAS_RECORDS | HA_STATS_RECORDS_IS_EXACT), + can_enable_indexes(1) +{} + + +static const char *ha_myisam_exts[] = { + ".MYI", + ".MYD", + NullS +}; + +const char **ha_myisam::bas_ext() const +{ + return ha_myisam_exts; +} + + +const char *ha_myisam::index_type(uint key_number) +{ + return ((table->key_info[key_number].flags & HA_FULLTEXT) ? + "FULLTEXT" : + (table->key_info[key_number].flags & HA_SPATIAL) ? + "SPATIAL" : + (table->key_info[key_number].algorithm == HA_KEY_ALG_RTREE) ? + "RTREE" : + "BTREE"); +} + +#ifdef HAVE_REPLICATION +int ha_myisam::net_read_dump(NET* net) +{ + int data_fd = file->dfile; + int error = 0; + + my_seek(data_fd, 0L, MY_SEEK_SET, MYF(MY_WME)); + for (;;) + { + ulong packet_len = my_net_read(net); + if (!packet_len) + break ; // end of file + if (packet_len == packet_error) + { + sql_print_error("ha_myisam::net_read_dump - read error "); + error= -1; + goto err; + } + if (my_write(data_fd, (byte*)net->read_pos, (uint) packet_len, + MYF(MY_WME|MY_FNABP))) + { + error = errno; + goto err; + } + } +err: + return error; +} + + +int ha_myisam::dump(THD* thd, int fd) +{ + MYISAM_SHARE* share = file->s; + NET* net = &thd->net; + uint blocksize = share->blocksize; + my_off_t bytes_to_read = share->state.state.data_file_length; + int data_fd = file->dfile; + byte * buf = (byte*) my_malloc(blocksize, MYF(MY_WME)); + if (!buf) + return ENOMEM; + + int error = 0; + my_seek(data_fd, 0L, MY_SEEK_SET, MYF(MY_WME)); + for (; bytes_to_read > 0;) + { + uint bytes = my_read(data_fd, buf, blocksize, MYF(MY_WME)); + if (bytes == MY_FILE_ERROR) + { + error = errno; + goto err; + } + + if (fd >= 0) + { + if (my_write(fd, buf, bytes, MYF(MY_WME | MY_FNABP))) + { + error = errno ? errno : EPIPE; + goto err; + } + } + else + { + if (my_net_write(net, (char*) buf, bytes)) + { + error = errno ? errno : EPIPE; + goto err; + } + } + bytes_to_read -= bytes; + } + + if (fd < 0) + { + if (my_net_write(net, "", 0)) + error = errno ? errno : EPIPE; + net_flush(net); + } + +err: + my_free((gptr) buf, MYF(0)); + return error; +} +#endif /* HAVE_REPLICATION */ + + +bool ha_myisam::check_if_locking_is_allowed(uint sql_command, + ulong type, TABLE *table, + uint count, + bool called_by_logger_thread) +{ + /* + To be able to open and lock for reading system tables like 'mysql.proc', + when we already have some tables opened and locked, and avoid deadlocks + we have to disallow write-locking of these tables with any other tables. + */ + if (table->s->system_table && + table->reginfo.lock_type >= TL_WRITE_ALLOW_WRITE && + count != 1) + { + my_error(ER_WRONG_LOCK_OF_SYSTEM_TABLE, MYF(0), table->s->db.str, + table->s->table_name.str); + return FALSE; + } + + /* + Deny locking of the log tables, which is incompatible with + concurrent insert. Unless called from a logger THD: + general_log_thd or slow_log_thd. + */ + if (!called_by_logger_thread) + return check_if_log_table_locking_is_allowed(sql_command, type, table); + + return TRUE; +} + + /* Name is here without an extension */ + +int ha_myisam::open(const char *name, int mode, uint test_if_locked) +{ + uint i; + if (!(file=mi_open(name, mode, test_if_locked | HA_OPEN_FROM_SQL_LAYER))) + return (my_errno ? my_errno : -1); + + if (test_if_locked & (HA_OPEN_IGNORE_IF_LOCKED | HA_OPEN_TMP_TABLE)) + VOID(mi_extra(file, HA_EXTRA_NO_WAIT_LOCK, 0)); + + if (!(test_if_locked & HA_OPEN_TMP_TABLE) && opt_myisam_use_mmap) + VOID(mi_extra(file, HA_EXTRA_MMAP, 0)); + + info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST); + if (!(test_if_locked & HA_OPEN_WAIT_IF_LOCKED)) + VOID(mi_extra(file, HA_EXTRA_WAIT_LOCK, 0)); + if (!table->s->db_record_offset) + int_table_flags|=HA_REC_NOT_IN_SEQ; + if (file->s->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD)) + int_table_flags|=HA_HAS_CHECKSUM; + + for (i= 0; i < table->s->keys; i++) + { + struct st_plugin_int *parser= table->key_info[i].parser; + if (table->key_info[i].flags & HA_USES_PARSER) + file->s->keyinfo[i].parser= + (struct st_mysql_ftparser *)parser->plugin->info; + table->key_info[i].block_size= file->s->keyinfo[i].block_length; + } + return (0); +} + +int ha_myisam::close(void) +{ + MI_INFO *tmp=file; + file=0; + return mi_close(tmp); +} + +int ha_myisam::write_row(byte * buf) +{ + statistic_increment(table->in_use->status_var.ha_write_count,&LOCK_status); + + /* If we have a timestamp column, update it to the current time */ + if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT) + table->timestamp_field->set_time(); + + /* + If we have an auto_increment column and we are writing a changed row + or a new row, then update the auto_increment value in the record. + */ + if (table->next_number_field && buf == table->record[0]) + update_auto_increment(); + return mi_write(file,buf); +} + +int ha_myisam::check(THD* thd, HA_CHECK_OPT* check_opt) +{ + if (!file) return HA_ADMIN_INTERNAL_ERROR; + int error; + MI_CHECK param; + MYISAM_SHARE* share = file->s; + const char *old_proc_info=thd->proc_info; + + thd->proc_info="Checking table"; + myisamchk_init(¶m); + param.thd = thd; + param.op_name = "check"; + param.db_name= table->s->db.str; + param.table_name= table->alias; + param.testflag = check_opt->flags | T_CHECK | T_SILENT; + param.stats_method= (enum_mi_stats_method)thd->variables.myisam_stats_method; + + if (!(table->db_stat & HA_READ_ONLY)) + param.testflag|= T_STATISTICS; + param.using_global_keycache = 1; + + if (!mi_is_crashed(file) && + (((param.testflag & T_CHECK_ONLY_CHANGED) && + !(share->state.changed & (STATE_CHANGED | STATE_CRASHED | + STATE_CRASHED_ON_REPAIR)) && + share->state.open_count == 0) || + ((param.testflag & T_FAST) && (share->state.open_count == + (uint) (share->global_changed ? 1 : 0))))) + return HA_ADMIN_ALREADY_DONE; + + error = chk_status(¶m, file); // Not fatal + error = chk_size(¶m, file); + if (!error) + error |= chk_del(¶m, file, param.testflag); + if (!error) + error = chk_key(¶m, file); + if (!error) + { + if ((!(param.testflag & T_QUICK) && + ((share->options & + (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)) || + (param.testflag & (T_EXTEND | T_MEDIUM)))) || + mi_is_crashed(file)) + { + uint old_testflag=param.testflag; + param.testflag|=T_MEDIUM; + if (!(error= init_io_cache(¶m.read_cache, file->dfile, + my_default_record_cache_size, READ_CACHE, + share->pack.header_length, 1, MYF(MY_WME)))) + { + error= chk_data_link(¶m, file, param.testflag & T_EXTEND); + end_io_cache(&(param.read_cache)); + } + param.testflag= old_testflag; + } + } + if (!error) + { + if ((share->state.changed & (STATE_CHANGED | + STATE_CRASHED_ON_REPAIR | + STATE_CRASHED | STATE_NOT_ANALYZED)) || + (param.testflag & T_STATISTICS) || + mi_is_crashed(file)) + { + file->update|=HA_STATE_CHANGED | HA_STATE_ROW_CHANGED; + pthread_mutex_lock(&share->intern_lock); + share->state.changed&= ~(STATE_CHANGED | STATE_CRASHED | + STATE_CRASHED_ON_REPAIR); + if (!(table->db_stat & HA_READ_ONLY)) + error=update_state_info(¶m,file,UPDATE_TIME | UPDATE_OPEN_COUNT | + UPDATE_STAT); + pthread_mutex_unlock(&share->intern_lock); + info(HA_STATUS_NO_LOCK | HA_STATUS_TIME | HA_STATUS_VARIABLE | + HA_STATUS_CONST); + } + } + else if (!mi_is_crashed(file) && !thd->killed) + { + mi_mark_crashed(file); + file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED; + } + + thd->proc_info=old_proc_info; + return error ? HA_ADMIN_CORRUPT : HA_ADMIN_OK; +} + + +/* + analyze the key distribution in the table + As the table may be only locked for read, we have to take into account that + two threads may do an analyze at the same time! +*/ + +int ha_myisam::analyze(THD *thd, HA_CHECK_OPT* check_opt) +{ + int error=0; + MI_CHECK param; + MYISAM_SHARE* share = file->s; + + myisamchk_init(¶m); + param.thd = thd; + param.op_name= "analyze"; + param.db_name= table->s->db.str; + param.table_name= table->alias; + param.testflag= (T_FAST | T_CHECK | T_SILENT | T_STATISTICS | + T_DONT_CHECK_CHECKSUM); + param.using_global_keycache = 1; + param.stats_method= (enum_mi_stats_method)thd->variables.myisam_stats_method; + + if (!(share->state.changed & STATE_NOT_ANALYZED)) + return HA_ADMIN_ALREADY_DONE; + + error = chk_key(¶m, file); + if (!error) + { + pthread_mutex_lock(&share->intern_lock); + error=update_state_info(¶m,file,UPDATE_STAT); + pthread_mutex_unlock(&share->intern_lock); + } + else if (!mi_is_crashed(file) && !thd->killed) + mi_mark_crashed(file); + return error ? HA_ADMIN_CORRUPT : HA_ADMIN_OK; +} + + +int ha_myisam::restore(THD* thd, HA_CHECK_OPT *check_opt) +{ + HA_CHECK_OPT tmp_check_opt; + char *backup_dir= thd->lex->backup_dir; + char src_path[FN_REFLEN], dst_path[FN_REFLEN]; + char table_name[FN_REFLEN]; + int error; + const char* errmsg; + DBUG_ENTER("restore"); + + VOID(tablename_to_filename(table->s->table_name.str, table_name, + sizeof(table_name))); + + if (fn_format_relative_to_data_home(src_path, table_name, backup_dir, + MI_NAME_DEXT)) + DBUG_RETURN(HA_ADMIN_INVALID); + + strxmov(dst_path, table->s->normalized_path.str, MI_NAME_DEXT, NullS); + if (my_copy(src_path, dst_path, MYF(MY_WME))) + { + error= HA_ADMIN_FAILED; + errmsg= "Failed in my_copy (Error %d)"; + goto err; + } + + tmp_check_opt.init(); + tmp_check_opt.flags |= T_VERY_SILENT | T_CALC_CHECKSUM | T_QUICK; + DBUG_RETURN(repair(thd, &tmp_check_opt)); + + err: + { + MI_CHECK param; + myisamchk_init(¶m); + param.thd= thd; + param.op_name= "restore"; + param.db_name= table->s->db.str; + param.table_name= table->s->table_name.str; + param.testflag= 0; + mi_check_print_error(¶m, errmsg, my_errno); + DBUG_RETURN(error); + } +} + + +int ha_myisam::backup(THD* thd, HA_CHECK_OPT *check_opt) +{ + char *backup_dir= thd->lex->backup_dir; + char src_path[FN_REFLEN], dst_path[FN_REFLEN]; + char table_name[FN_REFLEN]; + int error; + const char *errmsg; + DBUG_ENTER("ha_myisam::backup"); + + VOID(tablename_to_filename(table->s->table_name.str, table_name, + sizeof(table_name))); + + if (fn_format_relative_to_data_home(dst_path, table_name, backup_dir, + reg_ext)) + { + errmsg= "Failed in fn_format() for .frm file (errno: %d)"; + error= HA_ADMIN_INVALID; + goto err; + } + + strxmov(src_path, table->s->normalized_path.str, reg_ext, NullS); + if (my_copy(src_path, dst_path, + MYF(MY_WME | MY_HOLD_ORIGINAL_MODES | MY_DONT_OVERWRITE_FILE))) + { + error = HA_ADMIN_FAILED; + errmsg = "Failed copying .frm file (errno: %d)"; + goto err; + } + + /* Change extension */ + if (fn_format_relative_to_data_home(dst_path, table_name, backup_dir, + MI_NAME_DEXT)) + { + errmsg = "Failed in fn_format() for .MYD file (errno: %d)"; + error = HA_ADMIN_INVALID; + goto err; + } + + strxmov(src_path, table->s->normalized_path.str, MI_NAME_DEXT, NullS); + if (my_copy(src_path, dst_path, + MYF(MY_WME | MY_HOLD_ORIGINAL_MODES | MY_DONT_OVERWRITE_FILE))) + { + errmsg = "Failed copying .MYD file (errno: %d)"; + error= HA_ADMIN_FAILED; + goto err; + } + DBUG_RETURN(HA_ADMIN_OK); + + err: + { + MI_CHECK param; + myisamchk_init(¶m); + param.thd= thd; + param.op_name= "backup"; + param.db_name= table->s->db.str; + param.table_name= table->s->table_name.str; + param.testflag = 0; + mi_check_print_error(¶m,errmsg, my_errno); + DBUG_RETURN(error); + } +} + + +int ha_myisam::repair(THD* thd, HA_CHECK_OPT *check_opt) +{ + int error; + MI_CHECK param; + ha_rows start_records; + + if (!file) return HA_ADMIN_INTERNAL_ERROR; + + myisamchk_init(¶m); + param.thd = thd; + param.op_name= "repair"; + param.testflag= ((check_opt->flags & ~(T_EXTEND)) | + T_SILENT | T_FORCE_CREATE | T_CALC_CHECKSUM | + (check_opt->flags & T_EXTEND ? T_REP : T_REP_BY_SORT)); + param.sort_buffer_length= check_opt->sort_buffer_size; + start_records=file->state->records; + while ((error=repair(thd,param,0)) && param.retry_repair) + { + param.retry_repair=0; + if (test_all_bits(param.testflag, + (uint) (T_RETRY_WITHOUT_QUICK | T_QUICK))) + { + param.testflag&= ~T_RETRY_WITHOUT_QUICK; + sql_print_information("Retrying repair of: '%s' without quick", + table->s->path); + continue; + } + param.testflag&= ~T_QUICK; + if ((param.testflag & T_REP_BY_SORT)) + { + param.testflag= (param.testflag & ~T_REP_BY_SORT) | T_REP; + sql_print_information("Retrying repair of: '%s' with keycache", + table->s->path); + continue; + } + break; + } + if (!error && start_records != file->state->records && + !(check_opt->flags & T_VERY_SILENT)) + { + char llbuff[22],llbuff2[22]; + sql_print_information("Found %s of %s rows when repairing '%s'", + llstr(file->state->records, llbuff), + llstr(start_records, llbuff2), + table->s->path); + } + return error; +} + +int ha_myisam::optimize(THD* thd, HA_CHECK_OPT *check_opt) +{ + int error; + if (!file) return HA_ADMIN_INTERNAL_ERROR; + MI_CHECK param; + + myisamchk_init(¶m); + param.thd = thd; + param.op_name= "optimize"; + param.testflag= (check_opt->flags | T_SILENT | T_FORCE_CREATE | + T_REP_BY_SORT | T_STATISTICS | T_SORT_INDEX); + param.sort_buffer_length= check_opt->sort_buffer_size; + if ((error= repair(thd,param,1)) && param.retry_repair) + { + sql_print_warning("Warning: Optimize table got errno %d, retrying", + my_errno); + param.testflag&= ~T_REP_BY_SORT; + error= repair(thd,param,1); + } + return error; +} + + +int ha_myisam::repair(THD *thd, MI_CHECK ¶m, bool optimize) +{ + int error=0; + uint local_testflag=param.testflag; + bool optimize_done= !optimize, statistics_done=0; + const char *old_proc_info=thd->proc_info; + char fixed_name[FN_REFLEN]; + MYISAM_SHARE* share = file->s; + ha_rows rows= file->state->records; + DBUG_ENTER("ha_myisam::repair"); + + param.db_name= table->s->db.str; + param.table_name= table->alias; + param.tmpfile_createflag = O_RDWR | O_TRUNC; + param.using_global_keycache = 1; + param.thd= thd; + param.tmpdir= &mysql_tmpdir_list; + param.out_flag= 0; + strmov(fixed_name,file->filename); + + // Don't lock tables if we have used LOCK TABLE + if (!thd->locked_tables && + mi_lock_database(file, table->s->tmp_table ? F_EXTRA_LCK : F_WRLCK)) + { + mi_check_print_error(¶m,ER(ER_CANT_LOCK),my_errno); + DBUG_RETURN(HA_ADMIN_FAILED); + } + + if (!optimize || + ((file->state->del || share->state.split != file->state->records) && + (!(param.testflag & T_QUICK) || + !(share->state.changed & STATE_NOT_OPTIMIZED_KEYS)))) + { + ulonglong key_map= ((local_testflag & T_CREATE_MISSING_KEYS) ? + mi_get_mask_all_keys_active(share->base.keys) : + share->state.key_map); + uint testflag=param.testflag; + if (mi_test_if_sort_rep(file,file->state->records,key_map,0) && + (local_testflag & T_REP_BY_SORT)) + { + local_testflag|= T_STATISTICS; + param.testflag|= T_STATISTICS; // We get this for free + statistics_done=1; + if (thd->variables.myisam_repair_threads>1) + { + char buf[40]; + /* TODO: respect myisam_repair_threads variable */ + my_snprintf(buf, 40, "Repair with %d threads", my_count_bits(key_map)); + thd->proc_info=buf; + error = mi_repair_parallel(¶m, file, fixed_name, + param.testflag & T_QUICK); + thd->proc_info="Repair done"; // to reset proc_info, as + // it was pointing to local buffer + } + else + { + thd->proc_info="Repair by sorting"; + error = mi_repair_by_sort(¶m, file, fixed_name, + param.testflag & T_QUICK); + } + } + else + { + thd->proc_info="Repair with keycache"; + param.testflag &= ~T_REP_BY_SORT; + error= mi_repair(¶m, file, fixed_name, + param.testflag & T_QUICK); + } + param.testflag=testflag; + optimize_done=1; + } + if (!error) + { + if ((local_testflag & T_SORT_INDEX) && + (share->state.changed & STATE_NOT_SORTED_PAGES)) + { + optimize_done=1; + thd->proc_info="Sorting index"; + error=mi_sort_index(¶m,file,fixed_name); + } + if (!statistics_done && (local_testflag & T_STATISTICS)) + { + if (share->state.changed & STATE_NOT_ANALYZED) + { + optimize_done=1; + thd->proc_info="Analyzing"; + error = chk_key(¶m, file); + } + else + local_testflag&= ~T_STATISTICS; // Don't update statistics + } + } + thd->proc_info="Saving state"; + if (!error) + { + if ((share->state.changed & STATE_CHANGED) || mi_is_crashed(file)) + { + share->state.changed&= ~(STATE_CHANGED | STATE_CRASHED | + STATE_CRASHED_ON_REPAIR); + file->update|=HA_STATE_CHANGED | HA_STATE_ROW_CHANGED; + } + /* + the following 'if', thought conceptually wrong, + is a useful optimization nevertheless. + */ + if (file->state != &file->s->state.state) + file->s->state.state = *file->state; + if (file->s->base.auto_key) + update_auto_increment_key(¶m, file, 1); + if (optimize_done) + error = update_state_info(¶m, file, + UPDATE_TIME | UPDATE_OPEN_COUNT | + (local_testflag & + T_STATISTICS ? UPDATE_STAT : 0)); + info(HA_STATUS_NO_LOCK | HA_STATUS_TIME | HA_STATUS_VARIABLE | + HA_STATUS_CONST); + if (rows != file->state->records && ! (param.testflag & T_VERY_SILENT)) + { + char llbuff[22],llbuff2[22]; + mi_check_print_warning(¶m,"Number of rows changed from %s to %s", + llstr(rows,llbuff), + llstr(file->state->records,llbuff2)); + } + } + else + { + mi_mark_crashed_on_repair(file); + file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED; + update_state_info(¶m, file, 0); + } + thd->proc_info=old_proc_info; + if (!thd->locked_tables) + mi_lock_database(file,F_UNLCK); + DBUG_RETURN(error ? HA_ADMIN_FAILED : + !optimize_done ? HA_ADMIN_ALREADY_DONE : HA_ADMIN_OK); +} + + +/* + Assign table indexes to a specific key cache. +*/ + +int ha_myisam::assign_to_keycache(THD* thd, HA_CHECK_OPT *check_opt) +{ + KEY_CACHE *new_key_cache= check_opt->key_cache; + const char *errmsg= 0; + int error= HA_ADMIN_OK; + ulonglong map= ~(ulonglong) 0; + TABLE_LIST *table_list= table->pos_in_table_list; + DBUG_ENTER("ha_myisam::assign_to_keycache"); + + /* Check validity of the index references */ + if (table_list->use_index) + { + /* We only come here when the user did specify an index map */ + key_map kmap; + if (get_key_map_from_key_list(&kmap, table, table_list->use_index)) + { + errmsg= thd->net.last_error; + error= HA_ADMIN_FAILED; + goto err; + } + map= kmap.to_ulonglong(); + } + + if ((error= mi_assign_to_key_cache(file, map, new_key_cache))) + { + char buf[STRING_BUFFER_USUAL_SIZE]; + my_snprintf(buf, sizeof(buf), + "Failed to flush to index file (errno: %d)", error); + errmsg= buf; + error= HA_ADMIN_CORRUPT; + } + + err: + if (error != HA_ADMIN_OK) + { + /* Send error to user */ + MI_CHECK param; + myisamchk_init(¶m); + param.thd= thd; + param.op_name= "assign_to_keycache"; + param.db_name= table->s->db.str; + param.table_name= table->s->table_name.str; + param.testflag= 0; + mi_check_print_error(¶m, errmsg); + } + DBUG_RETURN(error); +} + + +/* + Preload pages of the index file for a table into the key cache. +*/ + +int ha_myisam::preload_keys(THD* thd, HA_CHECK_OPT *check_opt) +{ + int error; + const char *errmsg; + ulonglong map= ~(ulonglong) 0; + TABLE_LIST *table_list= table->pos_in_table_list; + my_bool ignore_leaves= table_list->ignore_leaves; + + DBUG_ENTER("ha_myisam::preload_keys"); + + /* Check validity of the index references */ + if (table_list->use_index) + { + key_map kmap; + get_key_map_from_key_list(&kmap, table, table_list->use_index); + if (kmap.is_set_all()) + { + errmsg= thd->net.last_error; + error= HA_ADMIN_FAILED; + goto err; + } + if (!kmap.is_clear_all()) + map= kmap.to_ulonglong(); + } + + mi_extra(file, HA_EXTRA_PRELOAD_BUFFER_SIZE, + (void *) &thd->variables.preload_buff_size); + + if ((error= mi_preload(file, map, ignore_leaves))) + { + switch (error) { + case HA_ERR_NON_UNIQUE_BLOCK_SIZE: + errmsg= "Indexes use different block sizes"; + break; + case HA_ERR_OUT_OF_MEM: + errmsg= "Failed to allocate buffer"; + break; + default: + char buf[ERRMSGSIZE+20]; + my_snprintf(buf, ERRMSGSIZE, + "Failed to read from index file (errno: %d)", my_errno); + errmsg= buf; + } + error= HA_ADMIN_FAILED; + goto err; + } + + DBUG_RETURN(HA_ADMIN_OK); + + err: + { + MI_CHECK param; + myisamchk_init(¶m); + param.thd= thd; + param.op_name= "preload_keys"; + param.db_name= table->s->db.str; + param.table_name= table->s->table_name.str; + param.testflag= 0; + mi_check_print_error(¶m, errmsg); + DBUG_RETURN(error); + } +} + + +/* + Disable indexes, making it persistent if requested. + + SYNOPSIS + disable_indexes() + mode mode of operation: + HA_KEY_SWITCH_NONUNIQ disable all non-unique keys + HA_KEY_SWITCH_ALL disable all keys + HA_KEY_SWITCH_NONUNIQ_SAVE dis. non-uni. and make persistent + HA_KEY_SWITCH_ALL_SAVE dis. all keys and make persistent + + IMPLEMENTATION + HA_KEY_SWITCH_NONUNIQ is not implemented. + HA_KEY_SWITCH_ALL_SAVE is not implemented. + + RETURN + 0 ok + HA_ERR_WRONG_COMMAND mode not implemented. +*/ + +int ha_myisam::disable_indexes(uint mode) +{ + int error; + + if (mode == HA_KEY_SWITCH_ALL) + { + /* call a storage engine function to switch the key map */ + error= mi_disable_indexes(file); + } + else if (mode == HA_KEY_SWITCH_NONUNIQ_SAVE) + { + mi_extra(file, HA_EXTRA_NO_KEYS, 0); + info(HA_STATUS_CONST); // Read new key info + error= 0; + } + else + { + /* mode not implemented */ + error= HA_ERR_WRONG_COMMAND; + } + return error; +} + + +/* + Enable indexes, making it persistent if requested. + + SYNOPSIS + enable_indexes() + mode mode of operation: + HA_KEY_SWITCH_NONUNIQ enable all non-unique keys + HA_KEY_SWITCH_ALL enable all keys + HA_KEY_SWITCH_NONUNIQ_SAVE en. non-uni. and make persistent + HA_KEY_SWITCH_ALL_SAVE en. all keys and make persistent + + DESCRIPTION + Enable indexes, which might have been disabled by disable_index() before. + The modes without _SAVE work only if both data and indexes are empty, + since the MyISAM repair would enable them persistently. + To be sure in these cases, call handler::delete_all_rows() before. + + IMPLEMENTATION + HA_KEY_SWITCH_NONUNIQ is not implemented. + HA_KEY_SWITCH_ALL_SAVE is not implemented. + + RETURN + 0 ok + !=0 Error, among others: + HA_ERR_CRASHED data or index is non-empty. Delete all rows and retry. + HA_ERR_WRONG_COMMAND mode not implemented. +*/ + +int ha_myisam::enable_indexes(uint mode) +{ + int error; + + if (mi_is_all_keys_active(file->s->state.key_map, file->s->base.keys)) + { + /* All indexes are enabled already. */ + return 0; + } + + if (mode == HA_KEY_SWITCH_ALL) + { + error= mi_enable_indexes(file); + /* + Do not try to repair on error, + as this could make the enabled state persistent, + but mode==HA_KEY_SWITCH_ALL forbids it. + */ + } + else if (mode == HA_KEY_SWITCH_NONUNIQ_SAVE) + { + THD *thd=current_thd; + MI_CHECK param; + const char *save_proc_info=thd->proc_info; + thd->proc_info="Creating index"; + myisamchk_init(¶m); + param.op_name= "recreating_index"; + param.testflag= (T_SILENT | T_REP_BY_SORT | T_QUICK | + T_CREATE_MISSING_KEYS); + param.myf_rw&= ~MY_WAIT_IF_FULL; + param.sort_buffer_length= thd->variables.myisam_sort_buff_size; + param.stats_method= (enum_mi_stats_method)thd->variables.myisam_stats_method; + param.tmpdir=&mysql_tmpdir_list; + if ((error= (repair(thd,param,0) != HA_ADMIN_OK)) && param.retry_repair) + { + sql_print_warning("Warning: Enabling keys got errno %d, retrying", + my_errno); + /* Repairing by sort failed. Now try standard repair method. */ + param.testflag&= ~(T_REP_BY_SORT | T_QUICK); + error= (repair(thd,param,0) != HA_ADMIN_OK); + /* + If the standard repair succeeded, clear all error messages which + might have been set by the first repair. They can still be seen + with SHOW WARNINGS then. + */ + if (! error) + thd->clear_error(); + } + info(HA_STATUS_CONST); + thd->proc_info=save_proc_info; + } + else + { + /* mode not implemented */ + error= HA_ERR_WRONG_COMMAND; + } + return error; +} + + +/* + Test if indexes are disabled. + + + SYNOPSIS + indexes_are_disabled() + no parameters + + + RETURN + 0 indexes are not disabled + 1 all indexes are disabled + [2 non-unique indexes are disabled - NOT YET IMPLEMENTED] +*/ + +int ha_myisam::indexes_are_disabled(void) +{ + + return mi_indexes_are_disabled(file); +} + + +/* + prepare for a many-rows insert operation + e.g. - disable indexes (if they can be recreated fast) or + activate special bulk-insert optimizations + + SYNOPSIS + start_bulk_insert(rows) + rows Rows to be inserted + 0 if we don't know + + NOTICE + Do not forget to call end_bulk_insert() later! +*/ + +void ha_myisam::start_bulk_insert(ha_rows rows) +{ + DBUG_ENTER("ha_myisam::start_bulk_insert"); + THD *thd= current_thd; + ulong size= min(thd->variables.read_buff_size, + table->s->avg_row_length*rows); + DBUG_PRINT("info",("start_bulk_insert: rows %lu size %lu", + (ulong) rows, size)); + + /* don't enable row cache if too few rows */ + if (! rows || (rows > MI_MIN_ROWS_TO_USE_WRITE_CACHE)) + mi_extra(file, HA_EXTRA_WRITE_CACHE, (void*) &size); + + can_enable_indexes= mi_is_all_keys_active(file->s->state.key_map, + file->s->base.keys); + + if (!(specialflag & SPECIAL_SAFE_MODE)) + { + /* + Only disable old index if the table was empty and we are inserting + a lot of rows. + We should not do this for only a few rows as this is slower and + we don't want to update the key statistics based of only a few rows. + */ + if (file->state->records == 0 && can_enable_indexes && + (!rows || rows >= MI_MIN_ROWS_TO_DISABLE_INDEXES)) + mi_disable_non_unique_index(file,rows); + else + if (!file->bulk_insert && + (!rows || rows >= MI_MIN_ROWS_TO_USE_BULK_INSERT)) + { + mi_init_bulk_insert(file, thd->variables.bulk_insert_buff_size, rows); + } + } + DBUG_VOID_RETURN; +} + +/* + end special bulk-insert optimizations, + which have been activated by start_bulk_insert(). + + SYNOPSIS + end_bulk_insert() + no arguments + + RETURN + 0 OK + != 0 Error +*/ + +int ha_myisam::end_bulk_insert() +{ + mi_end_bulk_insert(file); + int err=mi_extra(file, HA_EXTRA_NO_CACHE, 0); + return err ? err : can_enable_indexes ? + enable_indexes(HA_KEY_SWITCH_NONUNIQ_SAVE) : 0; +} + + +bool ha_myisam::check_and_repair(THD *thd) +{ + int error=0; + int marked_crashed; + char *old_query; + uint old_query_length; + HA_CHECK_OPT check_opt; + DBUG_ENTER("ha_myisam::check_and_repair"); + + check_opt.init(); + check_opt.flags= T_MEDIUM | T_AUTO_REPAIR; + // Don't use quick if deleted rows + if (!file->state->del && (myisam_recover_options & HA_RECOVER_QUICK)) + check_opt.flags|=T_QUICK; + sql_print_warning("Checking table: '%s'",table->s->path); + + old_query= thd->query; + old_query_length= thd->query_length; + pthread_mutex_lock(&LOCK_thread_count); + thd->query= table->s->table_name.str; + thd->query_length= table->s->table_name.length; + pthread_mutex_unlock(&LOCK_thread_count); + + if ((marked_crashed= mi_is_crashed(file)) || check(thd, &check_opt)) + { + sql_print_warning("Recovering table: '%s'",table->s->path); + check_opt.flags= + ((myisam_recover_options & HA_RECOVER_BACKUP ? T_BACKUP_DATA : 0) | + (marked_crashed ? 0 : T_QUICK) | + (myisam_recover_options & HA_RECOVER_FORCE ? 0 : T_SAFE_REPAIR) | + T_AUTO_REPAIR); + if (repair(thd, &check_opt)) + error=1; + } + pthread_mutex_lock(&LOCK_thread_count); + thd->query= old_query; + thd->query_length= old_query_length; + pthread_mutex_unlock(&LOCK_thread_count); + DBUG_RETURN(error); +} + +bool ha_myisam::is_crashed() const +{ + return (file->s->state.changed & STATE_CRASHED || + (my_disable_locking && file->s->state.open_count)); +} + +int ha_myisam::update_row(const byte * old_data, byte * new_data) +{ + statistic_increment(table->in_use->status_var.ha_update_count,&LOCK_status); + if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE) + table->timestamp_field->set_time(); + return mi_update(file,old_data,new_data); +} + +int ha_myisam::delete_row(const byte * buf) +{ + statistic_increment(table->in_use->status_var.ha_delete_count,&LOCK_status); + return mi_delete(file,buf); +} + +int ha_myisam::index_read(byte * buf, const byte * key, + uint key_len, enum ha_rkey_function find_flag) +{ + DBUG_ASSERT(inited==INDEX); + statistic_increment(table->in_use->status_var.ha_read_key_count, + &LOCK_status); + int error=mi_rkey(file,buf,active_index, key, key_len, find_flag); + table->status=error ? STATUS_NOT_FOUND: 0; + return error; +} + +int ha_myisam::index_read_idx(byte * buf, uint index, const byte * key, + uint key_len, enum ha_rkey_function find_flag) +{ + statistic_increment(table->in_use->status_var.ha_read_key_count, + &LOCK_status); + int error=mi_rkey(file,buf,index, key, key_len, find_flag); + table->status=error ? STATUS_NOT_FOUND: 0; + return error; +} + +int ha_myisam::index_read_last(byte * buf, const byte * key, uint key_len) +{ + DBUG_ENTER("ha_myisam::index_read_last"); + DBUG_ASSERT(inited==INDEX); + statistic_increment(table->in_use->status_var.ha_read_key_count, + &LOCK_status); + int error=mi_rkey(file,buf,active_index, key, key_len, HA_READ_PREFIX_LAST); + table->status=error ? STATUS_NOT_FOUND: 0; + DBUG_RETURN(error); +} + +int ha_myisam::index_next(byte * buf) +{ + DBUG_ASSERT(inited==INDEX); + statistic_increment(table->in_use->status_var.ha_read_next_count, + &LOCK_status); + int error=mi_rnext(file,buf,active_index); + table->status=error ? STATUS_NOT_FOUND: 0; + return error; +} + +int ha_myisam::index_prev(byte * buf) +{ + DBUG_ASSERT(inited==INDEX); + statistic_increment(table->in_use->status_var.ha_read_prev_count, + &LOCK_status); + int error=mi_rprev(file,buf, active_index); + table->status=error ? STATUS_NOT_FOUND: 0; + return error; +} + +int ha_myisam::index_first(byte * buf) +{ + DBUG_ASSERT(inited==INDEX); + statistic_increment(table->in_use->status_var.ha_read_first_count, + &LOCK_status); + int error=mi_rfirst(file, buf, active_index); + table->status=error ? STATUS_NOT_FOUND: 0; + return error; +} + +int ha_myisam::index_last(byte * buf) +{ + DBUG_ASSERT(inited==INDEX); + statistic_increment(table->in_use->status_var.ha_read_last_count, + &LOCK_status); + int error=mi_rlast(file, buf, active_index); + table->status=error ? STATUS_NOT_FOUND: 0; + return error; +} + +int ha_myisam::index_next_same(byte * buf, + const byte *key __attribute__((unused)), + uint length __attribute__((unused))) +{ + DBUG_ASSERT(inited==INDEX); + statistic_increment(table->in_use->status_var.ha_read_next_count, + &LOCK_status); + int error=mi_rnext_same(file,buf); + table->status=error ? STATUS_NOT_FOUND: 0; + return error; +} + + +int ha_myisam::rnd_init(bool scan) +{ + if (scan) + return mi_scan_init(file); + return mi_reset(file); // Free buffers +} + +int ha_myisam::rnd_next(byte *buf) +{ + statistic_increment(table->in_use->status_var.ha_read_rnd_next_count, + &LOCK_status); + int error=mi_scan(file, buf); + table->status=error ? STATUS_NOT_FOUND: 0; + return error; +} + +int ha_myisam::restart_rnd_next(byte *buf, byte *pos) +{ + return rnd_pos(buf,pos); +} + +int ha_myisam::rnd_pos(byte * buf, byte *pos) +{ + statistic_increment(table->in_use->status_var.ha_read_rnd_count, + &LOCK_status); + int error=mi_rrnd(file, buf, my_get_ptr(pos,ref_length)); + table->status=error ? STATUS_NOT_FOUND: 0; + return error; +} + +void ha_myisam::position(const byte* record) +{ + my_off_t position=mi_position(file); + my_store_ptr(ref, ref_length, position); +} + +void ha_myisam::info(uint flag) +{ + MI_ISAMINFO info; + char name_buff[FN_REFLEN]; + + (void) mi_status(file,&info,flag); + if (flag & HA_STATUS_VARIABLE) + { + stats.records = info.records; + stats.deleted = info.deleted; + stats.data_file_length=info.data_file_length; + stats.index_file_length=info.index_file_length; + stats.delete_length = info.delete_length; + stats.check_time = info.check_time; + stats. mean_rec_length=info.mean_reclength; + } + if (flag & HA_STATUS_CONST) + { + TABLE_SHARE *share= table->s; + stats.max_data_file_length= info.max_data_file_length; + stats.max_index_file_length= info.max_index_file_length; + stats.create_time= info.create_time; + ref_length= info.reflength; + share->db_options_in_use= info.options; + stats.block_size= myisam_block_size; /* record block size */ + + /* Update share */ + if (share->tmp_table == NO_TMP_TABLE) + pthread_mutex_lock(&share->mutex); + share->keys_in_use.set_prefix(share->keys); + share->keys_in_use.intersect_extended(info.key_map); + share->keys_for_keyread.intersect(share->keys_in_use); + share->db_record_offset= info.record_offset; + if (share->key_parts) + memcpy((char*) table->key_info[0].rec_per_key, + (char*) info.rec_per_key, + sizeof(table->key_info[0].rec_per_key)*share->key_parts); + if (share->tmp_table == NO_TMP_TABLE) + pthread_mutex_unlock(&share->mutex); + + /* + Set data_file_name and index_file_name to point at the symlink value + if table is symlinked (Ie; Real name is not same as generated name) + */ + data_file_name= index_file_name= 0; + fn_format(name_buff, file->filename, "", MI_NAME_DEXT, MY_APPEND_EXT); + if (strcmp(name_buff, info.data_file_name)) + data_file_name=info.data_file_name; + fn_format(name_buff, file->filename, "", MI_NAME_IEXT, MY_APPEND_EXT); + if (strcmp(name_buff, info.index_file_name)) + index_file_name=info.index_file_name; + } + if (flag & HA_STATUS_ERRKEY) + { + errkey = info.errkey; + my_store_ptr(dup_ref, ref_length, info.dupp_key_pos); + } + if (flag & HA_STATUS_TIME) + stats.update_time = info.update_time; + if (flag & HA_STATUS_AUTO) + stats.auto_increment_value= info.auto_increment; +} + + +int ha_myisam::extra(enum ha_extra_function operation) +{ + if ((specialflag & SPECIAL_SAFE_MODE) && operation == HA_EXTRA_KEYREAD) + return 0; + return mi_extra(file, operation, 0); +} + +int ha_myisam::reset(void) +{ + return mi_reset(file); +} + +/* To be used with WRITE_CACHE and EXTRA_CACHE */ + +int ha_myisam::extra_opt(enum ha_extra_function operation, ulong cache_size) +{ + if ((specialflag & SPECIAL_SAFE_MODE) && operation == HA_EXTRA_WRITE_CACHE) + return 0; + return mi_extra(file, operation, (void*) &cache_size); +} + +int ha_myisam::delete_all_rows() +{ + return mi_delete_all_rows(file); +} + +int ha_myisam::delete_table(const char *name) +{ + return mi_delete_table(name); +} + + +int ha_myisam::external_lock(THD *thd, int lock_type) +{ + return mi_lock_database(file, !table->s->tmp_table ? + lock_type : ((lock_type == F_UNLCK) ? + F_UNLCK : F_EXTRA_LCK)); +} + +THR_LOCK_DATA **ha_myisam::store_lock(THD *thd, + THR_LOCK_DATA **to, + enum thr_lock_type lock_type) +{ + if (lock_type != TL_IGNORE && file->lock.type == TL_UNLOCK) + file->lock.type=lock_type; + *to++= &file->lock; + return to; +} + +void ha_myisam::update_create_info(HA_CREATE_INFO *create_info) +{ + ha_myisam::info(HA_STATUS_AUTO | HA_STATUS_CONST); + if (!(create_info->used_fields & HA_CREATE_USED_AUTO)) + { + create_info->auto_increment_value= stats.auto_increment_value; + } + create_info->data_file_name=data_file_name; + create_info->index_file_name=index_file_name; +} + + +int ha_myisam::create(const char *name, register TABLE *table_arg, + HA_CREATE_INFO *info) +{ + int error; + uint i,j,recpos,minpos,fieldpos,temp_length,length, create_flags= 0; + bool found_real_auto_increment=0; + enum ha_base_keytype type; + char buff[FN_REFLEN]; + KEY *pos; + MI_KEYDEF *keydef; + MI_COLUMNDEF *recinfo,*recinfo_pos; + HA_KEYSEG *keyseg; + TABLE_SHARE *share= table_arg->s; + uint options= share->db_options_in_use; + DBUG_ENTER("ha_myisam::create"); + + type=HA_KEYTYPE_BINARY; // Keep compiler happy + if (!(my_multi_malloc(MYF(MY_WME), + &recinfo,(share->fields*2+2)* + sizeof(MI_COLUMNDEF), + &keydef, share->keys*sizeof(MI_KEYDEF), + &keyseg, + ((share->key_parts + share->keys) * + sizeof(HA_KEYSEG)), + NullS))) + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + + pos=table_arg->key_info; + for (i=0; i < share->keys ; i++, pos++) + { + if (pos->flags & HA_USES_PARSER) + create_flags|= HA_CREATE_RELIES_ON_SQL_LAYER; + keydef[i].flag= (pos->flags & (HA_NOSAME | HA_FULLTEXT | HA_SPATIAL)); + keydef[i].key_alg= pos->algorithm == HA_KEY_ALG_UNDEF ? + (pos->flags & HA_SPATIAL ? HA_KEY_ALG_RTREE : HA_KEY_ALG_BTREE) : + pos->algorithm; + keydef[i].block_length= pos->block_size; + + keydef[i].seg=keyseg; + keydef[i].keysegs=pos->key_parts; + for (j=0 ; j < pos->key_parts ; j++) + { + Field *field=pos->key_part[j].field; + type=field->key_type(); + keydef[i].seg[j].flag=pos->key_part[j].key_part_flag; + + if (options & HA_OPTION_PACK_KEYS || + (pos->flags & (HA_PACK_KEY | HA_BINARY_PACK_KEY | + HA_SPACE_PACK_USED))) + { + if (pos->key_part[j].length > 8 && + (type == HA_KEYTYPE_TEXT || + type == HA_KEYTYPE_NUM || + (type == HA_KEYTYPE_BINARY && !field->zero_pack()))) + { + /* No blobs here */ + if (j == 0) + keydef[i].flag|=HA_PACK_KEY; + if (!(field->flags & ZEROFILL_FLAG) && + (field->type() == MYSQL_TYPE_STRING || + field->type() == MYSQL_TYPE_VAR_STRING || + ((int) (pos->key_part[j].length - field->decimals())) + >= 4)) + keydef[i].seg[j].flag|=HA_SPACE_PACK; + } + else if (j == 0 && (!(pos->flags & HA_NOSAME) || pos->key_length > 16)) + keydef[i].flag|= HA_BINARY_PACK_KEY; + } + keydef[i].seg[j].type= (int) type; + keydef[i].seg[j].start= pos->key_part[j].offset; + keydef[i].seg[j].length= pos->key_part[j].length; + keydef[i].seg[j].bit_start= keydef[i].seg[j].bit_end= + keydef[i].seg[j].bit_length= 0; + keydef[i].seg[j].bit_pos= 0; + keydef[i].seg[j].language= field->charset()->number; + + if (field->null_ptr) + { + keydef[i].seg[j].null_bit=field->null_bit; + keydef[i].seg[j].null_pos= (uint) (field->null_ptr- + (uchar*) table_arg->record[0]); + } + else + { + keydef[i].seg[j].null_bit=0; + keydef[i].seg[j].null_pos=0; + } + if (field->type() == FIELD_TYPE_BLOB || + field->type() == FIELD_TYPE_GEOMETRY) + { + keydef[i].seg[j].flag|=HA_BLOB_PART; + /* save number of bytes used to pack length */ + keydef[i].seg[j].bit_start= (uint) (field->pack_length() - + share->blob_ptr_size); + } + else if (field->type() == FIELD_TYPE_BIT) + { + keydef[i].seg[j].bit_length= ((Field_bit *) field)->bit_len; + keydef[i].seg[j].bit_start= ((Field_bit *) field)->bit_ofs; + keydef[i].seg[j].bit_pos= (uint) (((Field_bit *) field)->bit_ptr - + (uchar*) table_arg->record[0]); + } + } + keyseg+=pos->key_parts; + } + + if (table_arg->found_next_number_field) + { + keydef[share->next_number_index].flag|= HA_AUTO_KEY; + found_real_auto_increment= share->next_number_key_offset == 0; + } + + recpos=0; recinfo_pos=recinfo; + while (recpos < (uint) share->reclength) + { + Field **field,*found=0; + minpos= share->reclength; + length=0; + + for (field=table_arg->field ; *field ; field++) + { + if ((fieldpos=(*field)->offset()) >= recpos && + fieldpos <= minpos) + { + /* skip null fields */ + if (!(temp_length= (*field)->pack_length_in_rec())) + continue; /* Skip null-fields */ + if (! found || fieldpos < minpos || + (fieldpos == minpos && temp_length < length)) + { + minpos=fieldpos; found= *field; length=temp_length; + } + } + } + DBUG_PRINT("loop",("found: 0x%lx recpos: %d minpos: %d length: %d", + found,recpos,minpos,length)); + if (recpos != minpos) + { // Reserved space (Null bits?) + bzero((char*) recinfo_pos,sizeof(*recinfo_pos)); + recinfo_pos->type=(int) FIELD_NORMAL; + recinfo_pos++->length= (uint16) (minpos-recpos); + } + if (! found) + break; + + if (found->flags & BLOB_FLAG) + recinfo_pos->type= (int) FIELD_BLOB; + else if (found->type() == MYSQL_TYPE_VARCHAR) + recinfo_pos->type= FIELD_VARCHAR; + else if (!(options & HA_OPTION_PACK_RECORD)) + recinfo_pos->type= (int) FIELD_NORMAL; + else if (found->zero_pack()) + recinfo_pos->type= (int) FIELD_SKIP_ZERO; + else + recinfo_pos->type= (int) ((length <= 3 || + (found->flags & ZEROFILL_FLAG)) ? + FIELD_NORMAL : + found->type() == MYSQL_TYPE_STRING || + found->type() == MYSQL_TYPE_VAR_STRING ? + FIELD_SKIP_ENDSPACE : + FIELD_SKIP_PRESPACE); + if (found->null_ptr) + { + recinfo_pos->null_bit=found->null_bit; + recinfo_pos->null_pos= (uint) (found->null_ptr- + (uchar*) table_arg->record[0]); + } + else + { + recinfo_pos->null_bit=0; + recinfo_pos->null_pos=0; + } + (recinfo_pos++)->length= (uint16) length; + recpos=minpos+length; + DBUG_PRINT("loop",("length: %d type: %d", + recinfo_pos[-1].length,recinfo_pos[-1].type)); + + } + MI_CREATE_INFO create_info; + bzero((char*) &create_info,sizeof(create_info)); + create_info.max_rows= share->max_rows; + create_info.reloc_rows= share->min_rows; + create_info.with_auto_increment=found_real_auto_increment; + create_info.auto_increment=(info->auto_increment_value ? + info->auto_increment_value -1 : + (ulonglong) 0); + create_info.data_file_length= ((ulonglong) share->max_rows * + share->avg_row_length); + create_info.data_file_name= info->data_file_name; + create_info.index_file_name= info->index_file_name; + + if (info->options & HA_LEX_CREATE_TMP_TABLE) + create_flags|= HA_CREATE_TMP_TABLE; + if (options & HA_OPTION_PACK_RECORD) + create_flags|= HA_PACK_RECORD; + if (options & HA_OPTION_CHECKSUM) + create_flags|= HA_CREATE_CHECKSUM; + if (options & HA_OPTION_DELAY_KEY_WRITE) + create_flags|= HA_CREATE_DELAY_KEY_WRITE; + + /* TODO: Check that the following fn_format is really needed */ + error=mi_create(fn_format(buff,name,"","",MY_UNPACK_FILENAME|MY_APPEND_EXT), + share->keys,keydef, + (uint) (recinfo_pos-recinfo), recinfo, + 0, (MI_UNIQUEDEF*) 0, + &create_info, create_flags); + + my_free((gptr) recinfo,MYF(0)); + DBUG_RETURN(error); +} + + +int ha_myisam::rename_table(const char * from, const char * to) +{ + return mi_rename(from,to); +} + + +void ha_myisam::get_auto_increment(ulonglong offset, ulonglong increment, + ulonglong nb_desired_values, + ulonglong *first_value, + ulonglong *nb_reserved_values) +{ + ulonglong nr; + int error; + byte key[MI_MAX_KEY_LENGTH]; + + if (!table->s->next_number_key_offset) + { // Autoincrement at key-start + ha_myisam::info(HA_STATUS_AUTO); + *first_value= stats.auto_increment_value; + /* MyISAM has only table-level lock, so reserves to +inf */ + *nb_reserved_values= ULONGLONG_MAX; + return; + } + + /* it's safe to call the following if bulk_insert isn't on */ + mi_flush_bulk_insert(file, table->s->next_number_index); + + (void) extra(HA_EXTRA_KEYREAD); + key_copy(key, table->record[0], + table->key_info + table->s->next_number_index, + table->s->next_number_key_offset); + error= mi_rkey(file,table->record[1],(int) table->s->next_number_index, + key,table->s->next_number_key_offset,HA_READ_PREFIX_LAST); + if (error) + nr= 1; + else + { + /* Get data from record[1] */ + nr= ((ulonglong) table->next_number_field-> + val_int_offset(table->s->rec_buff_length)+1); + } + extra(HA_EXTRA_NO_KEYREAD); + *first_value= nr; + /* + MySQL needs to call us for next row: assume we are inserting ("a",null) + here, we return 3, and next this statement will want to insert ("b",null): + there is no reason why ("b",3+1) would be the good row to insert: maybe it + already exists, maybe 3+1 is too large... + */ + *nb_reserved_values= 1; +} + + +/* + Find out how many rows there is in the given range + + SYNOPSIS + records_in_range() + inx Index to use + min_key Start of range. Null pointer if from first key + max_key End of range. Null pointer if to last key + + NOTES + min_key.flag can have one of the following values: + HA_READ_KEY_EXACT Include the key in the range + HA_READ_AFTER_KEY Don't include key in range + + max_key.flag can have one of the following values: + HA_READ_BEFORE_KEY Don't include key in range + HA_READ_AFTER_KEY Include all 'end_key' values in the range + + RETURN + HA_POS_ERROR Something is wrong with the index tree. + 0 There is no matching keys in the given range + number > 0 There is approximately 'number' matching rows in + the range. +*/ + +ha_rows ha_myisam::records_in_range(uint inx, key_range *min_key, + key_range *max_key) +{ + return (ha_rows) mi_records_in_range(file, (int) inx, min_key, max_key); +} + + +int ha_myisam::ft_read(byte * buf) +{ + int error; + + if (!ft_handler) + return -1; + + thread_safe_increment(table->in_use->status_var.ha_read_next_count, + &LOCK_status); // why ? + + error=ft_handler->please->read_next(ft_handler,(char*) buf); + + table->status=error ? STATUS_NOT_FOUND: 0; + return error; +} + +uint ha_myisam::checksum() const +{ + return (uint)file->state->checksum; +} + + +bool ha_myisam::check_if_incompatible_data(HA_CREATE_INFO *info, + uint table_changes) +{ + uint options= table->s->db_options_in_use; + + if (info->auto_increment_value != stats.auto_increment_value || + info->data_file_name != data_file_name || + info->index_file_name != index_file_name || + table_changes == IS_EQUAL_NO || + table_changes & IS_EQUAL_PACK_LENGTH) // Not implemented yet + return COMPATIBLE_DATA_NO; + + if ((options & (HA_OPTION_PACK_RECORD | HA_OPTION_CHECKSUM | + HA_OPTION_DELAY_KEY_WRITE)) != + (info->table_options & (HA_OPTION_PACK_RECORD | HA_OPTION_CHECKSUM | + HA_OPTION_DELAY_KEY_WRITE))) + return COMPATIBLE_DATA_NO; + return COMPATIBLE_DATA_YES; +} + +handlerton myisam_hton; + +static int myisam_init() +{ + myisam_hton.state=SHOW_OPTION_YES; + myisam_hton.db_type=DB_TYPE_MYISAM; + myisam_hton.create=myisam_create_handler; + myisam_hton.panic=mi_panic; + myisam_hton.flags=HTON_CAN_RECREATE; + return 0; +} + +struct st_mysql_storage_engine myisam_storage_engine= +{ MYSQL_HANDLERTON_INTERFACE_VERSION, &myisam_hton }; + +mysql_declare_plugin(myisam) +{ + MYSQL_STORAGE_ENGINE_PLUGIN, + &myisam_storage_engine, + "MyISAM", + "MySQL AB", + "Default engine as of MySQL 3.23 with great performance", + myisam_init, /* Plugin Init */ + NULL, /* Plugin Deinit */ + 0x0100, /* 1.0 */ + 0 +} +mysql_declare_plugin_end; + diff --git a/storage/myisam/ha_myisam.h b/storage/myisam/ha_myisam.h new file mode 100644 index 00000000000..5544e5040b3 --- /dev/null +++ b/storage/myisam/ha_myisam.h @@ -0,0 +1,139 @@ +/* Copyright (C) 2000,2004 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + + +#ifdef USE_PRAGMA_INTERFACE +#pragma interface /* gcc class implementation */ +#endif + +/* class for the the myisam handler */ + +#include <myisam.h> +#include <ft_global.h> + +#define HA_RECOVER_NONE 0 /* No automatic recover */ +#define HA_RECOVER_DEFAULT 1 /* Automatic recover active */ +#define HA_RECOVER_BACKUP 2 /* Make a backupfile on recover */ +#define HA_RECOVER_FORCE 4 /* Recover even if we loose rows */ +#define HA_RECOVER_QUICK 8 /* Don't check rows in data file */ + +extern ulong myisam_sort_buffer_size; +extern TYPELIB myisam_recover_typelib; +extern ulong myisam_recover_options; + +class ha_myisam: public handler +{ + MI_INFO *file; + ulong int_table_flags; + char *data_file_name, *index_file_name; + bool can_enable_indexes; + int repair(THD *thd, MI_CHECK ¶m, bool optimize); + + public: + ha_myisam(TABLE_SHARE *table_arg); + ~ha_myisam() {} + const char *table_type() const { return "MyISAM"; } + const char *index_type(uint key_number); + const char **bas_ext() const; + ulonglong table_flags() const { return int_table_flags; } + ulong index_flags(uint inx, uint part, bool all_parts) const + { + return ((table_share->key_info[inx].algorithm == HA_KEY_ALG_FULLTEXT) ? + 0 : HA_READ_NEXT | HA_READ_PREV | HA_READ_RANGE | + HA_READ_ORDER | HA_KEYREAD_ONLY); + } + uint max_supported_keys() const { return MI_MAX_KEY; } + uint max_supported_key_length() const { return MI_MAX_KEY_LENGTH; } + uint max_supported_key_part_length() const { return MI_MAX_KEY_LENGTH; } + uint checksum() const; + + virtual bool check_if_locking_is_allowed(uint sql_command, + ulong type, TABLE *table, + uint count, + bool called_by_logger_thread); + int open(const char *name, int mode, uint test_if_locked); + int close(void); + int write_row(byte * buf); + int update_row(const byte * old_data, byte * new_data); + int delete_row(const byte * buf); + int index_read(byte * buf, const byte * key, + uint key_len, enum ha_rkey_function find_flag); + int index_read_idx(byte * buf, uint idx, const byte * key, + uint key_len, enum ha_rkey_function find_flag); + int index_read_last(byte * buf, const byte * key, uint key_len); + int index_next(byte * buf); + int index_prev(byte * buf); + int index_first(byte * buf); + int index_last(byte * buf); + int index_next_same(byte *buf, const byte *key, uint keylen); + int ft_init() + { + if (!ft_handler) + return 1; + ft_handler->please->reinit_search(ft_handler); + return 0; + } + FT_INFO *ft_init_ext(uint flags, uint inx,String *key) + { + return ft_init_search(flags,file,inx, + (byte *)key->ptr(), key->length(), key->charset(), + table->record[0]); + } + int ft_read(byte *buf); + int rnd_init(bool scan); + int rnd_next(byte *buf); + int rnd_pos(byte * buf, byte *pos); + int restart_rnd_next(byte *buf, byte *pos); + void position(const byte *record); + void info(uint); + int extra(enum ha_extra_function operation); + int extra_opt(enum ha_extra_function operation, ulong cache_size); + int reset(void); + int external_lock(THD *thd, int lock_type); + int delete_all_rows(void); + int disable_indexes(uint mode); + int enable_indexes(uint mode); + int indexes_are_disabled(void); + void start_bulk_insert(ha_rows rows); + int end_bulk_insert(); + ha_rows records_in_range(uint inx, key_range *min_key, key_range *max_key); + void update_create_info(HA_CREATE_INFO *create_info); + int create(const char *name, TABLE *form, HA_CREATE_INFO *create_info); + THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to, + enum thr_lock_type lock_type); + virtual void get_auto_increment(ulonglong offset, ulonglong increment, + ulonglong nb_desired_values, + ulonglong *first_value, + ulonglong *nb_reserved_values); + int rename_table(const char * from, const char * to); + int delete_table(const char *name); + int check(THD* thd, HA_CHECK_OPT* check_opt); + int analyze(THD* thd,HA_CHECK_OPT* check_opt); + int repair(THD* thd, HA_CHECK_OPT* check_opt); + bool check_and_repair(THD *thd); + bool is_crashed() const; + bool auto_repair() const { return myisam_recover_options != 0; } + int optimize(THD* thd, HA_CHECK_OPT* check_opt); + int restore(THD* thd, HA_CHECK_OPT* check_opt); + int backup(THD* thd, HA_CHECK_OPT* check_opt); + int assign_to_keycache(THD* thd, HA_CHECK_OPT* check_opt); + int preload_keys(THD* thd, HA_CHECK_OPT* check_opt); + bool check_if_incompatible_data(HA_CREATE_INFO *info, uint table_changes); +#ifdef HAVE_REPLICATION + int dump(THD* thd, int fd); + int net_read_dump(NET* net); +#endif +}; diff --git a/storage/myisam/plug.in b/storage/myisam/plug.in new file mode 100644 index 00000000000..3160752182d --- /dev/null +++ b/storage/myisam/plug.in @@ -0,0 +1,6 @@ +MYSQL_STORAGE_ENGINE(myisam,no, [MyISAM Storage Engine], + [Traditional non-transactional MySQL tables]) +MYSQL_PLUGIN_DIRECTORY(myisam, [storage/myisam]) +MYSQL_PLUGIN_STATIC(myisam, [libmyisam.a]) +MYSQL_PLUGIN_MANDATORY(myisam) dnl Default + diff --git a/storage/myisammrg/CMakeLists.txt b/storage/myisammrg/CMakeLists.txt index 83168f6c60c..a86eff9d764 100644 --- a/storage/myisammrg/CMakeLists.txt +++ b/storage/myisammrg/CMakeLists.txt @@ -1,8 +1,12 @@ SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX") SET(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX") -INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include) +INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/zlib + ${CMAKE_SOURCE_DIR}/sql + ${CMAKE_SOURCE_DIR}/regex + ${CMAKE_SOURCE_DIR}/extra/yassl/include) ADD_LIBRARY(myisammrg myrg_close.c myrg_create.c myrg_delete.c myrg_extra.c myrg_info.c + ha_myisammrg.cc myrg_locking.c myrg_open.c myrg_panic.c myrg_queue.c myrg_range.c myrg_rfirst.c myrg_rkey.c myrg_rlast.c myrg_rnext.c myrg_rnext_same.c myrg_rprev.c myrg_rrnd.c myrg_rsame.c myrg_static.c myrg_update.c diff --git a/storage/myisammrg/Makefile.am b/storage/myisammrg/Makefile.am index 0402f2730b9..08cd52c363f 100644 --- a/storage/myisammrg/Makefile.am +++ b/storage/myisammrg/Makefile.am @@ -14,15 +14,31 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include +MYSQLDATAdir = $(localstatedir) +MYSQLSHAREdir = $(pkgdatadir) +MYSQLBASEdir= $(prefix) +MYSQLLIBdir= $(pkglibdir) +INCLUDES = -I$(top_srcdir)/include -I$(top_builddir)/include \ + -I$(top_srcdir)/regex \ + -I$(top_srcdir)/sql \ + -I$(srcdir) +WRAPLIBS= + +LDADD = + +DEFS = @DEFS@ pkglib_LIBRARIES = libmyisammrg.a -noinst_HEADERS = myrg_def.h +noinst_HEADERS = myrg_def.h ha_myisammrg.h +noinst_LIBRARIES = libmyisammrg.a libmyisammrg_a_SOURCES = myrg_open.c myrg_extra.c myrg_info.c myrg_locking.c \ myrg_rrnd.c myrg_update.c myrg_delete.c myrg_rsame.c \ myrg_panic.c myrg_close.c myrg_create.c myrg_static.c \ myrg_rkey.c myrg_rfirst.c myrg_rlast.c myrg_rnext.c \ myrg_rprev.c myrg_queue.c myrg_write.c myrg_range.c \ + ha_myisammrg.cc \ myrg_rnext_same.c + + EXTRA_DIST = CMakeLists.txt # Don't update the files from bitkeeper diff --git a/storage/myisammrg/ha_myisammrg.cc b/storage/myisammrg/ha_myisammrg.cc new file mode 100644 index 00000000000..8c767e32b83 --- /dev/null +++ b/storage/myisammrg/ha_myisammrg.cc @@ -0,0 +1,578 @@ +/* Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + + +#ifdef USE_PRAGMA_IMPLEMENTATION +#pragma implementation // gcc: Class implementation +#endif + +#define MYSQL_SERVER 1 +#include "mysql_priv.h" +#include <mysql/plugin.h> +#include <m_ctype.h> +#include "ha_myisammrg.h" +#include "myrg_def.h" + + +/***************************************************************************** +** MyISAM MERGE tables +*****************************************************************************/ + +static handler *myisammrg_create_handler(TABLE_SHARE *table, + MEM_ROOT *mem_root); + +/* MyISAM MERGE handlerton */ + +handlerton myisammrg_hton; + +static handler *myisammrg_create_handler(TABLE_SHARE *table, + MEM_ROOT *mem_root) +{ + return new (mem_root) ha_myisammrg(table); +} + + +ha_myisammrg::ha_myisammrg(TABLE_SHARE *table_arg) + :handler(&myisammrg_hton, table_arg), file(0) +{} + +static const char *ha_myisammrg_exts[] = { + ".MRG", + NullS +}; + +const char **ha_myisammrg::bas_ext() const +{ + return ha_myisammrg_exts; +} + + +const char *ha_myisammrg::index_type(uint key_number) +{ + return ((table->key_info[key_number].flags & HA_FULLTEXT) ? + "FULLTEXT" : + (table->key_info[key_number].flags & HA_SPATIAL) ? + "SPATIAL" : + (table->key_info[key_number].algorithm == HA_KEY_ALG_RTREE) ? + "RTREE" : + "BTREE"); +} + + +int ha_myisammrg::open(const char *name, int mode, uint test_if_locked) +{ + char name_buff[FN_REFLEN]; + + DBUG_PRINT("info", ("ha_myisammrg::open")); + if (!(file=myrg_open(fn_format(name_buff,name,"","", + MY_UNPACK_FILENAME|MY_APPEND_EXT), + mode, test_if_locked))) + { + DBUG_PRINT("info", ("ha_myisammrg::open exit %d", my_errno)); + return (my_errno ? my_errno : -1); + } + DBUG_PRINT("info", ("ha_myisammrg::open myrg_extrafunc...")); + myrg_extrafunc(file, query_cache_invalidate_by_MyISAM_filename_ref); + if (!(test_if_locked == HA_OPEN_WAIT_IF_LOCKED || + test_if_locked == HA_OPEN_ABORT_IF_LOCKED)) + myrg_extra(file,HA_EXTRA_NO_WAIT_LOCK,0); + info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST); + if (!(test_if_locked & HA_OPEN_WAIT_IF_LOCKED)) + myrg_extra(file,HA_EXTRA_WAIT_LOCK,0); + + if (table->s->reclength != stats.mean_rec_length && stats.mean_rec_length) + { + DBUG_PRINT("error",("reclength: %d mean_rec_length: %d", + table->s->reclength, stats.mean_rec_length)); + goto err; + } +#if !defined(BIG_TABLES) || SIZEOF_OFF_T == 4 + /* Merge table has more than 2G rows */ + if (table->s->crashed) + goto err; +#endif + return (0); +err: + myrg_close(file); + file=0; + return (my_errno= HA_ERR_WRONG_MRG_TABLE_DEF); +} + +int ha_myisammrg::close(void) +{ + return myrg_close(file); +} + +int ha_myisammrg::write_row(byte * buf) +{ + statistic_increment(table->in_use->status_var.ha_write_count,&LOCK_status); + + if (file->merge_insert_method == MERGE_INSERT_DISABLED || !file->tables) + return (HA_ERR_TABLE_READONLY); + + if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT) + table->timestamp_field->set_time(); + if (table->next_number_field && buf == table->record[0]) + update_auto_increment(); + return myrg_write(file,buf); +} + +int ha_myisammrg::update_row(const byte * old_data, byte * new_data) +{ + statistic_increment(table->in_use->status_var.ha_update_count,&LOCK_status); + if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE) + table->timestamp_field->set_time(); + return myrg_update(file,old_data,new_data); +} + +int ha_myisammrg::delete_row(const byte * buf) +{ + statistic_increment(table->in_use->status_var.ha_delete_count,&LOCK_status); + return myrg_delete(file,buf); +} + +int ha_myisammrg::index_read(byte * buf, const byte * key, + uint key_len, enum ha_rkey_function find_flag) +{ + statistic_increment(table->in_use->status_var.ha_read_key_count, + &LOCK_status); + int error=myrg_rkey(file,buf,active_index, key, key_len, find_flag); + table->status=error ? STATUS_NOT_FOUND: 0; + return error; +} + +int ha_myisammrg::index_read_idx(byte * buf, uint index, const byte * key, + uint key_len, enum ha_rkey_function find_flag) +{ + statistic_increment(table->in_use->status_var.ha_read_key_count, + &LOCK_status); + int error=myrg_rkey(file,buf,index, key, key_len, find_flag); + table->status=error ? STATUS_NOT_FOUND: 0; + return error; +} + +int ha_myisammrg::index_read_last(byte * buf, const byte * key, uint key_len) +{ + statistic_increment(table->in_use->status_var.ha_read_key_count, + &LOCK_status); + int error=myrg_rkey(file,buf,active_index, key, key_len, + HA_READ_PREFIX_LAST); + table->status=error ? STATUS_NOT_FOUND: 0; + return error; +} + +int ha_myisammrg::index_next(byte * buf) +{ + statistic_increment(table->in_use->status_var.ha_read_next_count, + &LOCK_status); + int error=myrg_rnext(file,buf,active_index); + table->status=error ? STATUS_NOT_FOUND: 0; + return error; +} + +int ha_myisammrg::index_prev(byte * buf) +{ + statistic_increment(table->in_use->status_var.ha_read_prev_count, + &LOCK_status); + int error=myrg_rprev(file,buf, active_index); + table->status=error ? STATUS_NOT_FOUND: 0; + return error; +} + +int ha_myisammrg::index_first(byte * buf) +{ + statistic_increment(table->in_use->status_var.ha_read_first_count, + &LOCK_status); + int error=myrg_rfirst(file, buf, active_index); + table->status=error ? STATUS_NOT_FOUND: 0; + return error; +} + +int ha_myisammrg::index_last(byte * buf) +{ + statistic_increment(table->in_use->status_var.ha_read_last_count, + &LOCK_status); + int error=myrg_rlast(file, buf, active_index); + table->status=error ? STATUS_NOT_FOUND: 0; + return error; +} + +int ha_myisammrg::index_next_same(byte * buf, + const byte *key __attribute__((unused)), + uint length __attribute__((unused))) +{ + statistic_increment(table->in_use->status_var.ha_read_next_count, + &LOCK_status); + int error=myrg_rnext_same(file,buf); + table->status=error ? STATUS_NOT_FOUND: 0; + return error; +} + + +int ha_myisammrg::rnd_init(bool scan) +{ + return myrg_reset(file); +} + + +int ha_myisammrg::rnd_next(byte *buf) +{ + statistic_increment(table->in_use->status_var.ha_read_rnd_next_count, + &LOCK_status); + int error=myrg_rrnd(file, buf, HA_OFFSET_ERROR); + table->status=error ? STATUS_NOT_FOUND: 0; + return error; +} + + +int ha_myisammrg::rnd_pos(byte * buf, byte *pos) +{ + statistic_increment(table->in_use->status_var.ha_read_rnd_count, + &LOCK_status); + int error=myrg_rrnd(file, buf, my_get_ptr(pos,ref_length)); + table->status=error ? STATUS_NOT_FOUND: 0; + return error; +} + +void ha_myisammrg::position(const byte *record) +{ + ulonglong position= myrg_position(file); + my_store_ptr(ref, ref_length, (my_off_t) position); +} + + +ha_rows ha_myisammrg::records_in_range(uint inx, key_range *min_key, + key_range *max_key) +{ + return (ha_rows) myrg_records_in_range(file, (int) inx, min_key, max_key); +} + + +void ha_myisammrg::info(uint flag) +{ + MYMERGE_INFO info; + (void) myrg_status(file,&info,flag); + /* + The following fails if one has not compiled MySQL with -DBIG_TABLES + and one has more than 2^32 rows in the merge tables. + */ + stats.records = (ha_rows) info.records; + stats.deleted = (ha_rows) info.deleted; +#if !defined(BIG_TABLES) || SIZEOF_OFF_T == 4 + if ((info.records >= (ulonglong) 1 << 32) || + (info.deleted >= (ulonglong) 1 << 32)) + table->s->crashed= 1; +#endif + stats.data_file_length=info.data_file_length; + errkey = info.errkey; + table->s->keys_in_use.set_prefix(table->s->keys); + table->s->db_options_in_use= info.options; + stats.mean_rec_length= info.reclength; + + /* + The handler::block_size is used all over the code in index scan cost + calculations. It is used to get number of disk seeks required to + retrieve a number of index tuples. + If the merge table has N underlying tables, then (assuming underlying + tables have equal size, the only "simple" approach we can use) + retrieving X index records from a merge table will require N times more + disk seeks compared to doing the same on a MyISAM table with equal + number of records. + In the edge case (file_tables > myisam_block_size) we'll get + block_size==0, and index calculation code will act as if we need one + disk seek to retrieve one index tuple. + + TODO: In 5.2 index scan cost calculation will be factored out into a + virtual function in class handler and we'll be able to remove this hack. + */ + stats.block_size= 0; + if (file->tables) + stats.block_size= myisam_block_size / file->tables; + + stats.update_time= 0; +#if SIZEOF_OFF_T > 4 + ref_length=6; // Should be big enough +#else + ref_length=4; // Can't be > than my_off_t +#endif + if (flag & HA_STATUS_CONST) + { + if (table->s->key_parts && info.rec_per_key) + memcpy((char*) table->key_info[0].rec_per_key, + (char*) info.rec_per_key, + sizeof(table->key_info[0].rec_per_key)*table->s->key_parts); + } +} + + +int ha_myisammrg::extra(enum ha_extra_function operation) +{ + /* As this is just a mapping, we don't have to force the underlying + tables to be closed */ + if (operation == HA_EXTRA_FORCE_REOPEN || + operation == HA_EXTRA_PREPARE_FOR_DELETE) + return 0; + return myrg_extra(file,operation,0); +} + +int ha_myisammrg::reset(void) +{ + return myrg_reset(file); +} + +/* To be used with WRITE_CACHE, EXTRA_CACHE and BULK_INSERT_BEGIN */ + +int ha_myisammrg::extra_opt(enum ha_extra_function operation, ulong cache_size) +{ + if ((specialflag & SPECIAL_SAFE_MODE) && operation == HA_EXTRA_WRITE_CACHE) + return 0; + return myrg_extra(file, operation, (void*) &cache_size); +} + +int ha_myisammrg::external_lock(THD *thd, int lock_type) +{ + return myrg_lock_database(file,lock_type); +} + +uint ha_myisammrg::lock_count(void) const +{ + return file->tables; +} + + +THR_LOCK_DATA **ha_myisammrg::store_lock(THD *thd, + THR_LOCK_DATA **to, + enum thr_lock_type lock_type) +{ + MYRG_TABLE *open_table; + + for (open_table=file->open_tables ; + open_table != file->end_table ; + open_table++) + { + *(to++)= &open_table->table->lock; + if (lock_type != TL_IGNORE && open_table->table->lock.type == TL_UNLOCK) + open_table->table->lock.type=lock_type; + } + return to; +} + + +/* Find out database name and table name from a filename */ + +static void split_file_name(const char *file_name, + LEX_STRING *db, LEX_STRING *name) +{ + uint dir_length, prefix_length; + char buff[FN_REFLEN]; + + db->length= 0; + strmake(buff, file_name, sizeof(buff)-1); + dir_length= dirname_length(buff); + if (dir_length > 1) + { + /* Get database */ + buff[dir_length-1]= 0; // Remove end '/' + prefix_length= dirname_length(buff); + db->str= (char*) file_name+ prefix_length; + db->length= dir_length - prefix_length -1; + } + name->str= (char*) file_name+ dir_length; + name->length= (uint) (fn_ext(name->str) - name->str); +} + + +void ha_myisammrg::update_create_info(HA_CREATE_INFO *create_info) +{ + DBUG_ENTER("ha_myisammrg::update_create_info"); + + if (!(create_info->used_fields & HA_CREATE_USED_UNION)) + { + MYRG_TABLE *open_table; + THD *thd=current_thd; + + create_info->merge_list.next= &create_info->merge_list.first; + create_info->merge_list.elements=0; + + for (open_table=file->open_tables ; + open_table != file->end_table ; + open_table++) + { + TABLE_LIST *ptr; + LEX_STRING db, name; + + if (!(ptr = (TABLE_LIST *) thd->calloc(sizeof(TABLE_LIST)))) + goto err; + split_file_name(open_table->table->filename, &db, &name); + if (!(ptr->table_name= thd->strmake(name.str, name.length))) + goto err; + if (db.length && !(ptr->db= thd->strmake(db.str, db.length))) + goto err; + + create_info->merge_list.elements++; + (*create_info->merge_list.next) = (byte*) ptr; + create_info->merge_list.next= (byte**) &ptr->next_local; + } + *create_info->merge_list.next=0; + } + if (!(create_info->used_fields & HA_CREATE_USED_INSERT_METHOD)) + { + create_info->merge_insert_method = file->merge_insert_method; + } + DBUG_VOID_RETURN; + +err: + create_info->merge_list.elements=0; + create_info->merge_list.first=0; + DBUG_VOID_RETURN; +} + + +int ha_myisammrg::create(const char *name, register TABLE *form, + HA_CREATE_INFO *create_info) +{ + char buff[FN_REFLEN]; + const char **table_names, **pos; + TABLE_LIST *tables= (TABLE_LIST*) create_info->merge_list.first; + THD *thd= current_thd; + uint dirlgt= dirname_length(name); + DBUG_ENTER("ha_myisammrg::create"); + + if (!(table_names= (const char**) + thd->alloc((create_info->merge_list.elements+1) * sizeof(char*)))) + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + for (pos= table_names; tables; tables= tables->next_local) + { + const char *table_name; + TABLE *tbl= 0; + if (create_info->options & HA_LEX_CREATE_TMP_TABLE) + tbl= find_temporary_table(thd, tables); + if (!tbl) + { + /* + Construct the path to the MyISAM table. Try to meet two conditions: + 1.) Allow to include MyISAM tables from different databases, and + 2.) allow for moving DATADIR around in the file system. + The first means that we need paths in the .MRG file. The second + means that we should not have absolute paths in the .MRG file. + The best, we can do, is to use 'mysql_data_home', which is '.' + in mysqld and may be an absolute path in an embedded server. + This means that it might not be possible to move the DATADIR of + an embedded server without changing the paths in the .MRG file. + */ + uint length= build_table_filename(buff, sizeof(buff), + tables->db, tables->table_name, "", 0); + /* + If a MyISAM table is in the same directory as the MERGE table, + we use the table name without a path. This means that the + DATADIR can easily be moved even for an embedded server as long + as the MyISAM tables are from the same database as the MERGE table. + */ + if ((dirname_length(buff) == dirlgt) && ! memcmp(buff, name, dirlgt)) + table_name= tables->table_name; + else + if (! (table_name= thd->strmake(buff, length))) + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + else + table_name= tbl->s->path.str; + *pos++= table_name; + } + *pos=0; + DBUG_RETURN(myrg_create(fn_format(buff,name,"","", + MY_RESOLVE_SYMLINKS| + MY_UNPACK_FILENAME|MY_APPEND_EXT), + table_names, + create_info->merge_insert_method, + (my_bool) 0)); +} + + +void ha_myisammrg::append_create_info(String *packet) +{ + const char *current_db; + uint db_length; + THD *thd= current_thd; + MYRG_TABLE *open_table, *first; + + if (file->merge_insert_method != MERGE_INSERT_DISABLED) + { + packet->append(STRING_WITH_LEN(" INSERT_METHOD=")); + packet->append(get_type(&merge_insert_method,file->merge_insert_method-1)); + } + packet->append(STRING_WITH_LEN(" UNION=(")); + + current_db= table->s->db.str; + db_length= table->s->db.length; + + for (first=open_table=file->open_tables ; + open_table != file->end_table ; + open_table++) + { + LEX_STRING db, name; + split_file_name(open_table->table->filename, &db, &name); + if (open_table != first) + packet->append(','); + /* Report database for mapped table if it isn't in current database */ + if (db.length && + (db_length != db.length || + strncmp(current_db, db.str, db.length))) + { + append_identifier(thd, packet, db.str, db.length); + packet->append('.'); + } + append_identifier(thd, packet, name.str, name.length); + } + packet->append(')'); +} + + +bool ha_myisammrg::check_if_incompatible_data(HA_CREATE_INFO *info, + uint table_changes) +{ + /* + For myisammrg, we should always re-generate the mapping file as this + is trivial to do + */ + return COMPATIBLE_DATA_NO; +} + +static int myisammrg_init() +{ + myisammrg_hton.state=have_merge_db; + myisammrg_hton.db_type=DB_TYPE_MRG_MYISAM; + myisammrg_hton.create=myisammrg_create_handler; + myisammrg_hton.panic=myrg_panic; + myisammrg_hton.flags= HTON_CAN_RECREATE; + return 0; +} + +struct st_mysql_storage_engine myisammrg_storage_engine= +{ MYSQL_HANDLERTON_INTERFACE_VERSION, &myisammrg_hton }; + +mysql_declare_plugin(myisammrg) +{ + MYSQL_STORAGE_ENGINE_PLUGIN, + &myisammrg_storage_engine, + "MRG_MYISAM", + "MySQL AB", + "Collection of identical MyISAM tables", + myisammrg_init, /* Plugin Init */ + NULL, /* Plugin Deinit */ + 0x0100, /* 1.0 */ + 0 +} +mysql_declare_plugin_end; diff --git a/storage/myisammrg/ha_myisammrg.h b/storage/myisammrg/ha_myisammrg.h new file mode 100644 index 00000000000..d58a3523c26 --- /dev/null +++ b/storage/myisammrg/ha_myisammrg.h @@ -0,0 +1,88 @@ +/* Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + + +#ifdef USE_PRAGMA_INTERFACE +#pragma interface /* gcc class implementation */ +#endif + +/* class for the the myisam merge handler */ + +#include <myisammrg.h> + +class ha_myisammrg: public handler +{ + MYRG_INFO *file; + + public: + ha_myisammrg(TABLE_SHARE *table_arg); + ~ha_myisammrg() {} + const char *table_type() const { return "MRG_MyISAM"; } + const char **bas_ext() const; + const char *index_type(uint key_number); + ulonglong table_flags() const + { + return (HA_REC_NOT_IN_SEQ | HA_AUTO_PART_KEY | HA_NO_TRANSACTIONS | + HA_NULL_IN_KEY | HA_CAN_INDEX_BLOBS | HA_FILE_BASED | + HA_CAN_INSERT_DELAYED | HA_ANY_INDEX_MAY_BE_UNIQUE | + HA_CAN_BIT_FIELD | HA_NO_COPY_ON_ALTER); + } + ulong index_flags(uint inx, uint part, bool all_parts) const + { + return ((table_share->key_info[inx].algorithm == HA_KEY_ALG_FULLTEXT) ? + 0 : HA_READ_NEXT | HA_READ_PREV | HA_READ_RANGE | + HA_READ_ORDER | HA_KEYREAD_ONLY); + } + uint max_supported_keys() const { return MI_MAX_KEY; } + uint max_supported_key_length() const { return MI_MAX_KEY_LENGTH; } + uint max_supported_key_part_length() const { return MI_MAX_KEY_LENGTH; } + double scan_time() + { return ulonglong2double(stats.data_file_length) / IO_SIZE + file->tables; } + + int open(const char *name, int mode, uint test_if_locked); + int close(void); + int write_row(byte * buf); + int update_row(const byte * old_data, byte * new_data); + int delete_row(const byte * buf); + int index_read(byte * buf, const byte * key, + uint key_len, enum ha_rkey_function find_flag); + int index_read_idx(byte * buf, uint idx, const byte * key, + uint key_len, enum ha_rkey_function find_flag); + int index_read_last(byte * buf, const byte * key, uint key_len); + int index_next(byte * buf); + int index_prev(byte * buf); + int index_first(byte * buf); + int index_last(byte * buf); + int index_next_same(byte *buf, const byte *key, uint keylen); + int rnd_init(bool scan); + int rnd_next(byte *buf); + int rnd_pos(byte * buf, byte *pos); + void position(const byte *record); + ha_rows records_in_range(uint inx, key_range *min_key, key_range *max_key); + void info(uint); + int reset(void); + int extra(enum ha_extra_function operation); + int extra_opt(enum ha_extra_function operation, ulong cache_size); + int external_lock(THD *thd, int lock_type); + uint lock_count(void) const; + int create(const char *name, TABLE *form, HA_CREATE_INFO *create_info); + THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to, + enum thr_lock_type lock_type); + void update_create_info(HA_CREATE_INFO *create_info); + void append_create_info(String *packet); + MYRG_INFO *myrg_info() { return file; } + bool check_if_incompatible_data(HA_CREATE_INFO *info, uint table_changes); +}; diff --git a/storage/myisammrg/plug.in b/storage/myisammrg/plug.in new file mode 100644 index 00000000000..b4b2af8d984 --- /dev/null +++ b/storage/myisammrg/plug.in @@ -0,0 +1,5 @@ +MYSQL_STORAGE_ENGINE(myisammrg,no,[MyISAM MERGE Engine], + [Merge multiple MySQL tables into one]) +MYSQL_PLUGIN_DIRECTORY(myisammrg,[storage/myisammrg]) +MYSQL_PLUGIN_STATIC(myisammrg, [libmyisammrg.a]) +MYSQL_PLUGIN_MANDATORY(myisammrg) diff --git a/storage/ndb/include/kernel/signaldata/BackupContinueB.hpp b/storage/ndb/include/kernel/signaldata/BackupContinueB.hpp index fe3f48444ec..9035c6f8140 100644 --- a/storage/ndb/include/kernel/signaldata/BackupContinueB.hpp +++ b/storage/ndb/include/kernel/signaldata/BackupContinueB.hpp @@ -32,7 +32,8 @@ private: BUFFER_FULL_SCAN = 2, BUFFER_FULL_FRAG_COMPLETE = 3, BUFFER_FULL_META = 4, - BACKUP_FRAGMENT_INFO = 5 + BACKUP_FRAGMENT_INFO = 5, + RESET_DISK_SPEED_COUNTER = 6 }; }; diff --git a/storage/ndb/include/kernel/signaldata/CreateIndx.hpp b/storage/ndb/include/kernel/signaldata/CreateIndx.hpp index 2ba63d6ec69..8a321d4a657 100644 --- a/storage/ndb/include/kernel/signaldata/CreateIndx.hpp +++ b/storage/ndb/include/kernel/signaldata/CreateIndx.hpp @@ -192,6 +192,7 @@ public: enum ErrorCode { NoError = 0, Busy = 701, + BusyWithNR = 711, NotMaster = 702, IndexOnDiskAttributeError = 756, TriggerNotFound = 4238, diff --git a/storage/ndb/include/kernel/signaldata/DropIndx.hpp b/storage/ndb/include/kernel/signaldata/DropIndx.hpp index fd2ea7f0b7b..41ee50082f7 100644 --- a/storage/ndb/include/kernel/signaldata/DropIndx.hpp +++ b/storage/ndb/include/kernel/signaldata/DropIndx.hpp @@ -168,6 +168,7 @@ public: NoError = 0, InvalidIndexVersion = 241, Busy = 701, + BusyWithNR = 711, NotMaster = 702, IndexNotFound = 4243, BadRequestType = 4247, diff --git a/storage/ndb/include/kernel/signaldata/FsAppendReq.hpp b/storage/ndb/include/kernel/signaldata/FsAppendReq.hpp index e2fd61f8a11..3fa569e07f6 100644 --- a/storage/ndb/include/kernel/signaldata/FsAppendReq.hpp +++ b/storage/ndb/include/kernel/signaldata/FsAppendReq.hpp @@ -39,7 +39,7 @@ class FsAppendReq { friend bool printFSAPPENDREQ(FILE * output, const Uint32 * theData, Uint32 len, Uint16 receiverBlockNo); public: - STATIC_CONST( SignalLength = 6 ); + STATIC_CONST( SignalLength = 7 ); private: @@ -52,6 +52,7 @@ private: UintR varIndex; // DATA 3 UintR offset; // DATA 4 UintR size; // DATA 5 + UintR synch_flag; // DATA 6 }; #endif diff --git a/storage/ndb/include/kernel/signaldata/FsOpenReq.hpp b/storage/ndb/include/kernel/signaldata/FsOpenReq.hpp index 1ac3ac883cc..bdf2d6a0ff8 100644 --- a/storage/ndb/include/kernel/signaldata/FsOpenReq.hpp +++ b/storage/ndb/include/kernel/signaldata/FsOpenReq.hpp @@ -43,6 +43,7 @@ class FsOpenReq { friend class Lgman; friend class Tsman; friend class Restore; + friend class Dblqh; /** * For printing @@ -53,7 +54,7 @@ public: /** * Length of signal */ - STATIC_CONST( SignalLength = 10 ); + STATIC_CONST( SignalLength = 11 ); SECTION( FILENAME = 0 ); private: @@ -69,6 +70,7 @@ private: Uint32 page_size; Uint32 file_size_hi; Uint32 file_size_lo; + Uint32 auto_sync_size; // In bytes STATIC_CONST( OM_READONLY = 0 ); STATIC_CONST( OM_WRITEONLY = 1 ); @@ -80,10 +82,10 @@ private: STATIC_CONST( OM_TRUNCATE = 0x200 ); STATIC_CONST( OM_AUTOSYNC = 0x400 ); - STATIC_CONST( OM_CREATE_IF_NONE = 0x0400 ); - STATIC_CONST( OM_INIT = 0x0800 ); // - STATIC_CONST( OM_CHECK_SIZE = 0x1000 ); - STATIC_CONST( OM_DIRECT = 0x2000 ); + STATIC_CONST( OM_CREATE_IF_NONE = 0x0800 ); + STATIC_CONST( OM_INIT = 0x1000 ); // + STATIC_CONST( OM_CHECK_SIZE = 0x2000 ); + STATIC_CONST( OM_DIRECT = 0x4000 ); enum Suffixes { S_DATA = 0, @@ -134,9 +136,11 @@ private: */ static Uint32 v5_getLcpNo(const Uint32 fileNumber[]); static Uint32 v5_getTableId(const Uint32 fileNumber[]); - + static Uint32 v5_getFragmentId(const Uint32 fileNumber[]); + static void v5_setLcpNo(Uint32 fileNumber[], Uint32 no); static void v5_setTableId(Uint32 fileNumber[], Uint32 no); + static void v5_setFragmentId(Uint32 fileNumber[], Uint32 no); }; /** @@ -316,5 +320,15 @@ void FsOpenReq::v5_setLcpNo(Uint32 fileNumber[], Uint32 val){ fileNumber[1] = val; } +inline +Uint32 FsOpenReq::v5_getFragmentId(const Uint32 fileNumber[]){ + return fileNumber[2]; +} + +inline +void FsOpenReq::v5_setFragmentId(Uint32 fileNumber[], Uint32 val){ + fileNumber[2] = val; +} + #endif diff --git a/storage/ndb/include/kernel/signaldata/SumaImpl.hpp b/storage/ndb/include/kernel/signaldata/SumaImpl.hpp index 2517cbbf246..22137508494 100644 --- a/storage/ndb/include/kernel/signaldata/SumaImpl.hpp +++ b/storage/ndb/include/kernel/signaldata/SumaImpl.hpp @@ -302,13 +302,32 @@ struct SubTableData { Uint32 senderData; Uint32 gci; Uint32 tableId; - Uint8 operation; - Uint8 req_nodeid; - Uint8 ndbd_nodeid; - Uint8 not_used3; + Uint32 requestInfo; Uint32 logType; Uint32 changeMask; Uint32 totalLen; + + static void setOperation(Uint32& ri, Uint32 val) { + ri = (ri & 0xFFFFFF00) | val; + } + static void setReqNodeId(Uint32& ri, Uint32 val) { + ri = (ri & 0xFFFF00FF) | (val << 8); + } + static void setNdbdNodeId(Uint32& ri, Uint32 val) { + ri = (ri & 0xFF00FFFF) | (val << 16); + } + + static Uint32 getOperation(const Uint32 & ri){ + return (ri & 0xFF); + } + + static Uint32 getReqNodeId(const Uint32 & ri){ + return (ri >> 8) & 0xFF; + } + + static Uint32 getNdbdNodeId(const Uint32 & ri){ + return (ri >> 16) & 0xFF; + } }; struct SubSyncContinueReq { diff --git a/storage/ndb/include/mgmapi/mgmapi_config_parameters.h b/storage/ndb/include/mgmapi/mgmapi_config_parameters.h index 78d34b31bbb..d1feaa1a7d3 100644 --- a/storage/ndb/include/mgmapi/mgmapi_config_parameters.h +++ b/storage/ndb/include/mgmapi/mgmapi_config_parameters.h @@ -92,6 +92,10 @@ #define CFG_DB_DISK_PAGE_BUFFER_MEMORY 160 #define CFG_DB_STRING_MEMORY 161 +#define CFG_DB_DISK_SYNCH_SIZE 163 +#define CFG_DB_CHECKPOINT_SPEED 164 +#define CFG_DB_CHECKPOINT_SPEED_SR 165 + #define CFG_DB_SGA 198 /* super pool mem */ #define CFG_DB_DATA_MEM_2 199 /* used in special build in 5.1 */ diff --git a/storage/ndb/include/mgmapi/mgmapi_debug.h b/storage/ndb/include/mgmapi/mgmapi_debug.h index e86d9d4b768..942e132d3b4 100644 --- a/storage/ndb/include/mgmapi/mgmapi_debug.h +++ b/storage/ndb/include/mgmapi/mgmapi_debug.h @@ -101,7 +101,7 @@ extern "C" { */ int ndb_mgm_dump_state(NdbMgmHandle handle, int nodeId, - int * args, + const int * args, int num_args, struct ndb_mgm_reply* reply); diff --git a/storage/ndb/include/ndbapi/NdbOperation.hpp b/storage/ndb/include/ndbapi/NdbOperation.hpp index d64e86c3136..7f026e29578 100644 --- a/storage/ndb/include/ndbapi/NdbOperation.hpp +++ b/storage/ndb/include/ndbapi/NdbOperation.hpp @@ -482,7 +482,7 @@ public: /** * Interpreted program instruction: - * Substract RegSource1 from RegSource2 and put the result in RegDest. + * Substract RegSource2 from RegSource1 and put the result in RegDest. * * @param RegSource1 First register. * @param RegSource2 Second register. diff --git a/storage/ndb/include/ndbapi/ndb_cluster_connection.hpp b/storage/ndb/include/ndbapi/ndb_cluster_connection.hpp index e641802a08c..bc8993c4000 100644 --- a/storage/ndb/include/ndbapi/ndb_cluster_connection.hpp +++ b/storage/ndb/include/ndbapi/ndb_cluster_connection.hpp @@ -114,6 +114,7 @@ public: void init_get_next_node(Ndb_cluster_connection_node_iter &iter); unsigned int get_next_node(Ndb_cluster_connection_node_iter &iter); + unsigned get_active_ndb_objects() const; Uint64 *get_latest_trans_gci(); #endif diff --git a/storage/ndb/plug.in b/storage/ndb/plug.in new file mode 100644 index 00000000000..a7e351417b1 --- /dev/null +++ b/storage/ndb/plug.in @@ -0,0 +1,6 @@ +MYSQL_STORAGE_ENGINE(ndbcluster, ndbcluster, [Cluster Storage Engine], + [High Availability Clustered tables], [max]) +MYSQL_PLUGIN_DIRECTORY(ndbcluster,[storage/ndb]) +MYSQL_PLUGIN_STATIC(ndbcluster, [[\$(ndbcluster_libs) \$(ndbcluster_system_libs) \$(NDB_SCI_LIBS)]]) +MYSQL_PLUGIN_ACTIONS(ndbcluster,[MYSQL_SETUP_NDBCLUSTER]) +MYSQL_PLUGIN_DEPENDS(ndbcluster, [partition]) diff --git a/storage/ndb/src/common/debugger/signaldata/SumaImpl.cpp b/storage/ndb/src/common/debugger/signaldata/SumaImpl.cpp index 963983bf351..ed531c3977f 100644 --- a/storage/ndb/src/common/debugger/signaldata/SumaImpl.cpp +++ b/storage/ndb/src/common/debugger/signaldata/SumaImpl.cpp @@ -174,10 +174,10 @@ printSUB_TABLE_DATA(FILE * output, const Uint32 * theData, Uint32 len, Uint16 receiverBlockNo) { const SubTableData * const sig = (SubTableData *)theData; fprintf(output, " senderData: %x\n", sig->senderData); - fprintf(output, " senderData: %x\n", sig->senderData); fprintf(output, " gci: %x\n", sig->gci); fprintf(output, " tableId: %x\n", sig->tableId); - fprintf(output, " operation: %x\n", sig->operation); + fprintf(output, " operation: %x\n", + SubTableData::getOperation(sig->requestInfo)); return false; } diff --git a/storage/ndb/src/kernel/blocks/ERROR_codes.txt b/storage/ndb/src/kernel/blocks/ERROR_codes.txt index fc22118e113..5b83ecffc6c 100644 --- a/storage/ndb/src/kernel/blocks/ERROR_codes.txt +++ b/storage/ndb/src/kernel/blocks/ERROR_codes.txt @@ -3,10 +3,10 @@ Next NDBCNTR 1000 Next NDBFS 2000 Next DBACC 3002 Next DBTUP 4024 -Next DBLQH 5043 +Next DBLQH 5045 Next DBDICT 6007 Next DBDIH 7177 -Next DBTC 8037 +Next DBTC 8038 Next CMVMI 9000 Next BACKUP 10022 Next DBUTIL 11002 @@ -283,6 +283,7 @@ ABORT OF TCKEYREQ 8032: No free TC records any more +8037 : Invalid schema version in TCINDXREQ CMVMI ----- diff --git a/storage/ndb/src/kernel/blocks/backup/Backup.cpp b/storage/ndb/src/kernel/blocks/backup/Backup.cpp index 2e8d8b548ce..23a2f908c20 100644 --- a/storage/ndb/src/kernel/blocks/backup/Backup.cpp +++ b/storage/ndb/src/kernel/blocks/backup/Backup.cpp @@ -84,6 +84,16 @@ Backup::execSTTOR(Signal* signal) const Uint32 startphase = signal->theData[1]; const Uint32 typeOfStart = signal->theData[7]; + if (startphase == 1) + { + m_curr_disk_write_speed = c_defaults.m_disk_write_speed_sr; + m_overflow_disk_write = 0; + m_reset_disk_speed_time = NdbTick_CurrentMillisecond(); + m_reset_delay_used = Backup::DISK_SPEED_CHECK_DELAY; + signal->theData[0] = BackupContinueB::RESET_DISK_SPEED_COUNTER; + sendSignalWithDelay(BACKUP_REF, GSN_CONTINUEB, signal, + Backup::DISK_SPEED_CHECK_DELAY, 1); + } if (startphase == 3) { jam(); g_TypeOfStart = typeOfStart; @@ -92,6 +102,11 @@ Backup::execSTTOR(Signal* signal) return; }//if + if (startphase == 7) + { + m_curr_disk_write_speed = c_defaults.m_disk_write_speed; + } + if(startphase == 7 && g_TypeOfStart == NodeState::ST_INITIAL_START && c_masterNodeId == getOwnNodeId()){ jam(); @@ -170,8 +185,45 @@ Backup::execCONTINUEB(Signal* signal) const Uint32 Tdata2 = signal->theData[2]; switch(Tdata0) { + case BackupContinueB::RESET_DISK_SPEED_COUNTER: + { + /* + Adjust for upto 10 millisecond delay of this signal. Longer + delays will not be handled, in this case the system is most + likely under too high load and it won't matter very much that + we decrease the speed of checkpoints. + + We use a technique where we allow an overflow write in one + period. This overflow will be removed from the next period + such that the load will at average be as specified. + */ + int delay_time = m_reset_delay_used; + NDB_TICKS curr_time = NdbTick_CurrentMillisecond(); + int sig_delay = curr_time - m_reset_disk_speed_time; + + m_words_written_this_period = m_overflow_disk_write; + m_overflow_disk_write = 0; + m_reset_disk_speed_time = curr_time; + + if (sig_delay > delay_time + 10) + delay_time = Backup::DISK_SPEED_CHECK_DELAY - 10; + else if (sig_delay < delay_time - 10) + delay_time = Backup::DISK_SPEED_CHECK_DELAY + 10; + else + delay_time = Backup::DISK_SPEED_CHECK_DELAY - (sig_delay - delay_time); + m_reset_delay_used= delay_time; + signal->theData[0] = BackupContinueB::RESET_DISK_SPEED_COUNTER; + sendSignalWithDelay(BACKUP_REF, GSN_CONTINUEB, signal, delay_time, 1); +#if 0 + ndbout << "Signal delay was = " << sig_delay; + ndbout << " Current time = " << curr_time << endl; + ndbout << " Delay time will be = " << delay_time << endl << endl; +#endif + break; + } case BackupContinueB::BACKUP_FRAGMENT_INFO: { + jam(); const Uint32 ptr_I = Tdata1; Uint32 tabPtr_I = Tdata2; Uint32 fragPtr_I = signal->theData[3]; @@ -180,48 +232,56 @@ Backup::execCONTINUEB(Signal* signal) c_backupPool.getPtr(ptr, ptr_I); TablePtr tabPtr; ptr.p->tables.getPtr(tabPtr, tabPtr_I); - FragmentPtr fragPtr; - tabPtr.p->fragments.getPtr(fragPtr, fragPtr_I); - - BackupFilePtr filePtr; - ptr.p->files.getPtr(filePtr, ptr.p->ctlFilePtr); - const Uint32 sz = sizeof(BackupFormat::CtlFile::FragmentInfo) >> 2; - Uint32 * dst; - if (!filePtr.p->operation.dataBuffer.getWritePtr(&dst, sz)) + if (fragPtr_I != tabPtr.p->fragments.getSize()) { - sendSignalWithDelay(BACKUP_REF, GSN_CONTINUEB, signal, 100, 4); - return; + jam(); + FragmentPtr fragPtr; + tabPtr.p->fragments.getPtr(fragPtr, fragPtr_I); + + BackupFilePtr filePtr; + ptr.p->files.getPtr(filePtr, ptr.p->ctlFilePtr); + + const Uint32 sz = sizeof(BackupFormat::CtlFile::FragmentInfo) >> 2; + Uint32 * dst; + if (!filePtr.p->operation.dataBuffer.getWritePtr(&dst, sz)) + { + sendSignalWithDelay(BACKUP_REF, GSN_CONTINUEB, signal, 100, 4); + return; + } + + BackupFormat::CtlFile::FragmentInfo * fragInfo = + (BackupFormat::CtlFile::FragmentInfo*)dst; + fragInfo->SectionType = htonl(BackupFormat::FRAGMENT_INFO); + fragInfo->SectionLength = htonl(sz); + fragInfo->TableId = htonl(fragPtr.p->tableId); + fragInfo->FragmentNo = htonl(fragPtr_I); + fragInfo->NoOfRecordsLow = htonl(fragPtr.p->noOfRecords & 0xFFFFFFFF); + fragInfo->NoOfRecordsHigh = htonl(fragPtr.p->noOfRecords >> 32); + fragInfo->FilePosLow = htonl(0); + fragInfo->FilePosHigh = htonl(0); + + filePtr.p->operation.dataBuffer.updateWritePtr(sz); + + fragPtr_I++; } - - BackupFormat::CtlFile::FragmentInfo * fragInfo = - (BackupFormat::CtlFile::FragmentInfo*)dst; - fragInfo->SectionType = htonl(BackupFormat::FRAGMENT_INFO); - fragInfo->SectionLength = htonl(sz); - fragInfo->TableId = htonl(fragPtr.p->tableId); - fragInfo->FragmentNo = htonl(fragPtr_I); - fragInfo->NoOfRecordsLow = htonl(fragPtr.p->noOfRecords & 0xFFFFFFFF); - fragInfo->NoOfRecordsHigh = htonl(fragPtr.p->noOfRecords >> 32); - fragInfo->FilePosLow = htonl(0 & 0xFFFFFFFF); - fragInfo->FilePosHigh = htonl(0 >> 32); - - filePtr.p->operation.dataBuffer.updateWritePtr(sz); - - fragPtr_I++; + if (fragPtr_I == tabPtr.p->fragments.getSize()) { signal->theData[0] = tabPtr.p->tableId; signal->theData[1] = 0; // unlock EXECUTE_DIRECT(DBDICT, GSN_BACKUP_FRAGMENT_REQ, signal, 2); - + fragPtr_I = 0; ptr.p->tables.next(tabPtr); if ((tabPtr_I = tabPtr.i) == RNIL) { - closeFiles(signal, ptr); - return; + jam(); + closeFiles(signal, ptr); + return; } } + signal->theData[0] = BackupContinueB::BACKUP_FRAGMENT_INFO; signal->theData[1] = ptr_I; signal->theData[2] = tabPtr_I; @@ -365,10 +425,9 @@ Backup::execDUMP_STATE_ORD(Signal* signal) for(ptr.p->files.first(filePtr); filePtr.i != RNIL; ptr.p->files.next(filePtr)){ jam(); - infoEvent(" file %d: type: %d open: %d running: %d done: %d scan: %d", - filePtr.i, filePtr.p->fileType, filePtr.p->fileOpened, - filePtr.p->fileRunning, - filePtr.p->fileClosing, filePtr.p->scanRunning); + infoEvent(" file %d: type: %d flags: H'%x", + filePtr.i, filePtr.p->fileType, + filePtr.p->m_flags); } } } @@ -938,7 +997,7 @@ Backup::execBACKUP_REQ(Signal* signal) return; }//if - if (m_diskless) + if (c_defaults.m_diskless) { sendBackupRef(senderRef, flags, signal, senderData, BackupRef::CannotBackupDiskless); @@ -1987,13 +2046,20 @@ Backup::sendDropTrig(Signal* signal, BackupRecordPtr ptr) { TablePtr tabPtr; - ptr.p->tables.first(tabPtr); - - signal->theData[0] = BackupContinueB::BACKUP_FRAGMENT_INFO; - signal->theData[1] = ptr.i; - signal->theData[2] = tabPtr.i; - signal->theData[3] = 0; - sendSignal(BACKUP_REF, GSN_CONTINUEB, signal, 4, JBB); + if (ptr.p->tables.first(tabPtr)) + { + jam(); + signal->theData[0] = BackupContinueB::BACKUP_FRAGMENT_INFO; + signal->theData[1] = ptr.i; + signal->theData[2] = tabPtr.i; + signal->theData[3] = 0; + sendSignal(BACKUP_REF, GSN_CONTINUEB, signal, 4, JBB); + } + else + { + jam(); + closeFiles(signal, ptr); + } } } } @@ -2300,13 +2366,31 @@ Backup::abort_scan(Signal * signal, BackupRecordPtr ptr) void Backup::defineBackupRef(Signal* signal, BackupRecordPtr ptr, Uint32 errCode) { + jam(); + ptr.p->setErrorCode(errCode); if(ptr.p->is_lcp()) { jam(); + + BackupFilePtr filePtr; + ptr.p->files.getPtr(filePtr, ptr.p->ctlFilePtr); + if (filePtr.p->m_flags & BackupFile::BF_LCP_META) + { + jam(); + ndbrequire(! (filePtr.p->m_flags & BackupFile::BF_FILE_THREAD)); + filePtr.p->m_flags &= ~(Uint32)BackupFile::BF_LCP_META; + if (filePtr.p->m_flags & BackupFile::BF_OPEN) + { + closeFile(signal, ptr, filePtr); + return; + } + } + + ndbrequire(filePtr.p->m_flags == 0); + TablePtr tabPtr; FragmentPtr fragPtr; - ptr.p->setErrorCode(errCode); ndbrequire(ptr.p->tables.first(tabPtr)); tabPtr.p->fragments.getPtr(fragPtr, 0); @@ -2322,7 +2406,6 @@ Backup::defineBackupRef(Signal* signal, BackupRecordPtr ptr, Uint32 errCode) } ptr.p->m_gsn = GSN_DEFINE_BACKUP_REF; - ptr.p->setErrorCode(errCode); ndbrequire(ptr.p->errorCode != 0); DefineBackupRef* ref = (DefineBackupRef*)signal->getDataPtrSend(); @@ -2453,10 +2536,7 @@ Backup::execDEFINE_BACKUP_REQ(Signal* signal) files[i].p->tableId = RNIL; files[i].p->backupPtr = ptr.i; files[i].p->filePointer = RNIL; - files[i].p->fileClosing = 0; - files[i].p->fileOpened = 0; - files[i].p->fileRunning = 0; - files[i].p->scanRunning = 0; + files[i].p->m_flags = 0; files[i].p->errorCode = 0; if(files[i].p->pages.seize(noOfPages[i]) == false) { @@ -2610,15 +2690,15 @@ Backup::openFiles(Signal* signal, BackupRecordPtr ptr) FsOpenReq::OM_WRITEONLY | FsOpenReq::OM_TRUNCATE | FsOpenReq::OM_CREATE | - FsOpenReq::OM_APPEND; + FsOpenReq::OM_APPEND | + FsOpenReq::OM_AUTOSYNC; FsOpenReq::v2_setCount(req->fileNumber, 0xFFFFFFFF); - + req->auto_sync_size = c_defaults.m_disk_synch_size; /** * Ctl file */ c_backupFilePool.getPtr(filePtr, ptr.p->ctlFilePtr); - ndbrequire(filePtr.p->fileRunning == 0); - filePtr.p->fileRunning = 1; + filePtr.p->m_flags |= BackupFile::BF_OPENING; req->userPointer = filePtr.i; FsOpenReq::setVersion(req->fileNumber, 2); @@ -2631,8 +2711,7 @@ Backup::openFiles(Signal* signal, BackupRecordPtr ptr) * Log file */ c_backupFilePool.getPtr(filePtr, ptr.p->logFilePtr); - ndbrequire(filePtr.p->fileRunning == 0); - filePtr.p->fileRunning = 1; + filePtr.p->m_flags |= BackupFile::BF_OPENING; req->userPointer = filePtr.i; FsOpenReq::setVersion(req->fileNumber, 2); @@ -2645,8 +2724,7 @@ Backup::openFiles(Signal* signal, BackupRecordPtr ptr) * Data file */ c_backupFilePool.getPtr(filePtr, ptr.p->dataFilePtr); - ndbrequire(filePtr.p->fileRunning == 0); - filePtr.p->fileRunning = 1; + filePtr.p->m_flags |= BackupFile::BF_OPENING; req->userPointer = filePtr.i; FsOpenReq::setVersion(req->fileNumber, 2); @@ -2692,8 +2770,8 @@ Backup::execFSOPENCONF(Signal* signal) BackupRecordPtr ptr; c_backupPool.getPtr(ptr, filePtr.p->backupPtr); - ndbrequire(filePtr.p->fileOpened == 0); - filePtr.p->fileOpened = 1; + ndbrequire(! (filePtr.p->m_flags & BackupFile::BF_OPEN)); + filePtr.p->m_flags |= BackupFile::BF_OPEN; openFilesReply(signal, ptr, filePtr); } @@ -2706,16 +2784,16 @@ Backup::openFilesReply(Signal* signal, /** * Mark files as "opened" */ - ndbrequire(filePtr.p->fileRunning == 1); - filePtr.p->fileRunning = 0; - + ndbrequire(filePtr.p->m_flags & BackupFile::BF_OPENING); + filePtr.p->m_flags &= ~(Uint32)BackupFile::BF_OPENING; + filePtr.p->m_flags |= BackupFile::BF_OPEN; /** * Check if all files have recived open_reply */ for(ptr.p->files.first(filePtr); filePtr.i!=RNIL;ptr.p->files.next(filePtr)) { jam(); - if(filePtr.p->fileRunning == 1) { + if(filePtr.p->m_flags & BackupFile::BF_OPENING) { jam(); return; }//if @@ -2771,13 +2849,22 @@ Backup::openFilesReply(Signal* signal, /** * Start CTL file thread */ - ptr.p->files.getPtr(filePtr, ptr.p->ctlFilePtr); - filePtr.p->fileRunning = 1; + if (!ptr.p->is_lcp()) + { + jam(); + ptr.p->files.getPtr(filePtr, ptr.p->ctlFilePtr); + filePtr.p->m_flags |= BackupFile::BF_FILE_THREAD; + + signal->theData[0] = BackupContinueB::START_FILE_THREAD; + signal->theData[1] = filePtr.i; + sendSignalWithDelay(BACKUP_REF, GSN_CONTINUEB, signal, 100, 2); + } + else + { + jam(); + filePtr.p->m_flags |= BackupFile::BF_LCP_META; + } - signal->theData[0] = BackupContinueB::START_FILE_THREAD; - signal->theData[1] = filePtr.i; - sendSignalWithDelay(BACKUP_REF, GSN_CONTINUEB, signal, 100, 2); - /** * Insert table list in ctl file */ @@ -2864,6 +2951,10 @@ Backup::execGET_TABINFOREF(Signal* signal) BackupRecordPtr ptr; c_backupPool.getPtr(ptr, senderData); + BackupFilePtr filePtr; + ptr.p->files.getPtr(filePtr, ptr.p->ctlFilePtr); + filePtr.p->m_flags &= ~(Uint32)BackupFile::BF_FILE_THREAD; + defineBackupRef(signal, ptr, ref->errorCode); } @@ -3292,13 +3383,13 @@ Backup::execSTART_BACKUP_REQ(Signal* signal) * Start file threads... */ BackupFilePtr filePtr; - for(ptr.p->files.first(filePtr); - filePtr.i!=RNIL; - ptr.p->files.next(filePtr)){ + for(ptr.p->files.first(filePtr); filePtr.i!=RNIL;ptr.p->files.next(filePtr)) + { jam(); - if(filePtr.p->fileRunning == 0) { + if(! (filePtr.p->m_flags & BackupFile::BF_FILE_THREAD)) + { jam(); - filePtr.p->fileRunning = 1; + filePtr.p->m_flags |= BackupFile::BF_FILE_THREAD; signal->theData[0] = BackupContinueB::START_FILE_THREAD; signal->theData[1] = filePtr.i; sendSignalWithDelay(BACKUP_REF, GSN_CONTINUEB, signal, 100, 2); @@ -3348,10 +3439,8 @@ Backup::execBACKUP_FRAGMENT_REQ(Signal* signal) c_backupFilePool.getPtr(filePtr, ptr.p->dataFilePtr); ndbrequire(filePtr.p->backupPtr == ptrI); - ndbrequire(filePtr.p->fileOpened == 1); - ndbrequire(filePtr.p->fileRunning == 1); - ndbrequire(filePtr.p->scanRunning == 0); - ndbrequire(filePtr.p->fileClosing == 0); + ndbrequire(filePtr.p->m_flags == + (BackupFile::BF_OPEN | BackupFile::BF_FILE_THREAD)); /** * Get table @@ -3400,7 +3489,7 @@ Backup::execBACKUP_FRAGMENT_REQ(Signal* signal) * Start scan */ { - filePtr.p->scanRunning = 1; + filePtr.p->m_flags |= BackupFile::BF_SCAN_THREAD; Table & table = * tabPtr.p; ScanFragReq * req = (ScanFragReq *)signal->getDataPtrSend(); @@ -3685,7 +3774,7 @@ Backup::execSCAN_FRAGREF(Signal* signal) c_backupFilePool.getPtr(filePtr, filePtrI); filePtr.p->errorCode = ref->errorCode; - filePtr.p->scanRunning = 0; + filePtr.p->m_flags &= ~(Uint32)BackupFile::BF_SCAN_THREAD; backupFragmentRef(signal, filePtr); } @@ -3725,7 +3814,7 @@ Backup::fragmentCompleted(Signal* signal, BackupFilePtr filePtr) if(filePtr.p->errorCode != 0) { jam(); - filePtr.p->scanRunning = 0; + filePtr.p->m_flags &= ~(Uint32)BackupFile::BF_SCAN_THREAD; backupFragmentRef(signal, filePtr); // Scan completed return; }//if @@ -3739,25 +3828,33 @@ Backup::fragmentCompleted(Signal* signal, BackupFilePtr filePtr) return; }//if - filePtr.p->scanRunning = 0; + filePtr.p->m_flags &= ~(Uint32)BackupFile::BF_SCAN_THREAD; BackupRecordPtr ptr; c_backupPool.getPtr(ptr, filePtr.p->backupPtr); - BackupFragmentConf * conf = (BackupFragmentConf*)signal->getDataPtrSend(); - conf->backupId = ptr.p->backupId; - conf->backupPtr = ptr.i; - conf->tableId = filePtr.p->tableId; - conf->fragmentNo = filePtr.p->fragmentNo; - conf->noOfRecordsLow = (Uint32)(op.noOfRecords & 0xFFFFFFFF); - conf->noOfRecordsHigh = (Uint32)(op.noOfRecords >> 32); - conf->noOfBytesLow = (Uint32)(op.noOfBytes & 0xFFFFFFFF); - conf->noOfBytesHigh = (Uint32)(op.noOfBytes >> 32); - sendSignal(ptr.p->masterRef, GSN_BACKUP_FRAGMENT_CONF, signal, - BackupFragmentConf::SignalLength, JBB); - - ptr.p->m_gsn = GSN_BACKUP_FRAGMENT_CONF; - ptr.p->slaveState.setState(STARTED); + if (ptr.p->is_lcp()) + { + ptr.p->slaveState.setState(STOPPING); + filePtr.p->operation.dataBuffer.eof(); + } + else + { + BackupFragmentConf * conf = (BackupFragmentConf*)signal->getDataPtrSend(); + conf->backupId = ptr.p->backupId; + conf->backupPtr = ptr.i; + conf->tableId = filePtr.p->tableId; + conf->fragmentNo = filePtr.p->fragmentNo; + conf->noOfRecordsLow = (Uint32)(op.noOfRecords & 0xFFFFFFFF); + conf->noOfRecordsHigh = (Uint32)(op.noOfRecords >> 32); + conf->noOfBytesLow = (Uint32)(op.noOfBytes & 0xFFFFFFFF); + conf->noOfBytesHigh = (Uint32)(op.noOfBytes >> 32); + sendSignal(ptr.p->masterRef, GSN_BACKUP_FRAGMENT_CONF, signal, + BackupFragmentConf::SignalLength, JBB); + + ptr.p->m_gsn = GSN_BACKUP_FRAGMENT_CONF; + ptr.p->slaveState.setState(STARTED); + } return; } @@ -3854,7 +3951,7 @@ Backup::execFSAPPENDREF(Signal* signal) BackupFilePtr filePtr; c_backupFilePool.getPtr(filePtr, filePtrI); - filePtr.p->fileRunning = 0; + filePtr.p->m_flags &= ~(Uint32)BackupFile::BF_FILE_THREAD; filePtr.p->errorCode = errCode; checkFile(signal, filePtr); @@ -3881,6 +3978,69 @@ Backup::execFSAPPENDCONF(Signal* signal) checkFile(signal, filePtr); } +/* + This routine handles two problems with writing to disk during local + checkpoints and backups. The first problem is that we need to limit + the writing to ensure that we don't use too much CPU and disk resources + for backups and checkpoints. The perfect solution to this is to use + a dynamic algorithm that adapts to the environment. Until we have + implemented this we can satisfy ourselves with an algorithm that + uses a configurable limit. + + The second problem is that in Linux we can get severe problems if we + write very much to the disk without synching. In the worst case we + can have Gigabytes of data in the Linux page cache before we reach + the limit of how much we can write. If this happens the performance + will drop significantly when we reach this limit since the Linux flush + daemon will spend a few minutes on writing out the page cache to disk. + To avoid this we ensure that a file never have more than a certain + amount of data outstanding before synch. This variable is also + configurable. +*/ +bool +Backup::ready_to_write(bool ready, Uint32 sz, bool eof, BackupFile *fileP) +{ +#if 0 + ndbout << "ready_to_write: ready = " << ready << " eof = " << eof; + ndbout << " sz = " << sz << endl; + ndbout << "words this period = " << m_words_written_this_period; + ndbout << endl << "overflow disk write = " << m_overflow_disk_write; + ndbout << endl << "Current Millisecond is = "; + ndbout << NdbTick_CurrentMillisecond() << endl; +#endif + if ((ready || eof) && + m_words_written_this_period <= m_curr_disk_write_speed) + { + /* + We have a buffer ready to write or we have reached end of + file and thus we must write the last before closing the + file. + We have already check that we are allowed to write at this + moment. We only worry about history of last 100 milliseconds. + What happened before that is of no interest since a disk + write that was issued more than 100 milliseconds should be + completed by now. + */ + int overflow; + m_words_written_this_period += sz; + overflow = m_words_written_this_period - m_curr_disk_write_speed; + if (overflow > 0) + m_overflow_disk_write = overflow; +#if 0 + ndbout << "Will write with " << endl; + ndbout << endl; +#endif + return true; + } + else + { +#if 0 + ndbout << "Will not write now" << endl << endl; +#endif + return false; + } +} + void Backup::checkFile(Signal* signal, BackupFilePtr filePtr) { @@ -3890,35 +4050,23 @@ Backup::checkFile(Signal* signal, BackupFilePtr filePtr) #endif OperationRecord & op = filePtr.p->operation; - - Uint32 * tmp, sz; bool eof; - if(op.dataBuffer.getReadPtr(&tmp, &sz, &eof)) + Uint32 *tmp = NULL; + Uint32 sz = 0; + bool eof = FALSE; + bool ready = op.dataBuffer.getReadPtr(&tmp, &sz, &eof); +#if 0 + ndbout << "Ptr to data = " << hex << tmp << endl; +#endif + if (!ready_to_write(ready, sz, eof, filePtr.p)) { jam(); - - jam(); - FsAppendReq * req = (FsAppendReq *)signal->getDataPtrSend(); - req->filePointer = filePtr.p->filePointer; - req->userPointer = filePtr.i; - req->userReference = reference(); - req->varIndex = 0; - req->offset = tmp - c_startOfPages; - req->size = sz; - - sendSignal(NDBFS_REF, GSN_FSAPPENDREQ, signal, - FsAppendReq::SignalLength, JBA); - return; - } - - if(!eof) { - jam(); signal->theData[0] = BackupContinueB::BUFFER_UNDERFLOW; signal->theData[1] = filePtr.i; - sendSignalWithDelay(BACKUP_REF, GSN_CONTINUEB, signal, 50, 2); + sendSignalWithDelay(BACKUP_REF, GSN_CONTINUEB, signal, 20, 2); return; - }//if - - if(sz > 0) { + } + else if (sz > 0) + { jam(); FsAppendReq * req = (FsAppendReq *)signal->getDataPtrSend(); req->filePointer = filePtr.p->filePointer; @@ -3926,43 +4074,23 @@ Backup::checkFile(Signal* signal, BackupFilePtr filePtr) req->userReference = reference(); req->varIndex = 0; req->offset = tmp - c_startOfPages; - req->size = sz; // Round up + req->size = sz; + req->synch_flag = 0; sendSignal(NDBFS_REF, GSN_FSAPPENDREQ, signal, FsAppendReq::SignalLength, JBA); return; - }//if - -#ifdef DEBUG_ABORT - Uint32 running= filePtr.p->fileRunning; - Uint32 closing= filePtr.p->fileClosing; -#endif - - if(!filePtr.p->fileClosing) - { - filePtr.p->fileRunning = 0; - filePtr.p->fileClosing = 1; - - FsCloseReq * req = (FsCloseReq *)signal->getDataPtrSend(); - req->filePointer = filePtr.p->filePointer; - req->userPointer = filePtr.i; - req->userReference = reference(); - req->fileFlag = 0; -#ifdef DEBUG_ABORT - ndbout_c("***** a FSCLOSEREQ filePtr.i = %u run=%d cl=%d", filePtr.i, - running, closing); -#endif - sendSignal(NDBFS_REF, GSN_FSCLOSEREQ, signal, FsCloseReq::SignalLength, JBA); } - else - { -#ifdef DEBUG_ABORT - ndbout_c("***** a NOT SENDING FSCLOSEREQ filePtr.i = %u run=%d cl=%d", - filePtr.i, - running, closing); -#endif - } + Uint32 flags = filePtr.p->m_flags; + filePtr.p->m_flags &= ~(Uint32)BackupFile::BF_FILE_THREAD; + + ndbrequire(flags & BackupFile::BF_OPEN); + ndbrequire(flags & BackupFile::BF_FILE_THREAD); + + BackupRecordPtr ptr; + c_backupPool.getPtr(ptr, filePtr.p->backupPtr); + closeFile(signal, ptr, filePtr); } @@ -4201,7 +4329,8 @@ Backup::closeFiles(Signal* sig, BackupRecordPtr ptr) int openCount = 0; for(ptr.p->files.first(filePtr); filePtr.i!=RNIL; ptr.p->files.next(filePtr)) { - if(filePtr.p->fileOpened == 0) { + if(! (filePtr.p->m_flags & BackupFile::BF_OPEN)) + { jam(); continue; } @@ -4209,35 +4338,26 @@ Backup::closeFiles(Signal* sig, BackupRecordPtr ptr) jam(); openCount++; - if(filePtr.p->fileClosing == 1){ + if(filePtr.p->m_flags & BackupFile::BF_CLOSING) + { jam(); continue; }//if - - if(filePtr.p->fileRunning == 1){ + + filePtr.p->operation.dataBuffer.eof(); + if(filePtr.p->m_flags & BackupFile::BF_FILE_THREAD) + { jam(); #ifdef DEBUG_ABORT ndbout_c("Close files fileRunning == 1, filePtr.i=%u", filePtr.i); #endif - filePtr.p->operation.dataBuffer.eof(); - } else { + } + else + { jam(); - filePtr.p->fileClosing = 1; - filePtr.p->operation.dataBuffer.eof(); - checkFile(sig, filePtr); // make sure we write everything before closing - - FsCloseReq * req = (FsCloseReq *)sig->getDataPtrSend(); - req->filePointer = filePtr.p->filePointer; - req->userPointer = filePtr.i; - req->userReference = reference(); - req->fileFlag = 0; -#ifdef DEBUG_ABORT - ndbout_c("***** b FSCLOSEREQ filePtr.i = %u", filePtr.i); -#endif - sendSignal(NDBFS_REF, GSN_FSCLOSEREQ, sig, - FsCloseReq::SignalLength, JBA); - }//if - }//for + closeFile(sig, ptr, filePtr); + } + } if(openCount == 0){ jam(); @@ -4246,6 +4366,33 @@ Backup::closeFiles(Signal* sig, BackupRecordPtr ptr) } void +Backup::closeFile(Signal* signal, BackupRecordPtr ptr, BackupFilePtr filePtr) +{ + ndbrequire(filePtr.p->m_flags & BackupFile::BF_OPEN); + ndbrequire(! (filePtr.p->m_flags & BackupFile::BF_OPENING)); + ndbrequire(! (filePtr.p->m_flags & BackupFile::BF_CLOSING)); + filePtr.p->m_flags |= BackupFile::BF_CLOSING; + + FsCloseReq * req = (FsCloseReq *)signal->getDataPtrSend(); + req->filePointer = filePtr.p->filePointer; + req->userPointer = filePtr.i; + req->userReference = reference(); + req->fileFlag = 0; + + if (ptr.p->errorCode) + { + FsCloseReq::setRemoveFileFlag(req->fileFlag, 1); + } + +#ifdef DEBUG_ABORT + ndbout_c("***** a FSCLOSEREQ filePtr.i = %u flags: %x", + filePtr.i, filePtr.p->m_flags); +#endif + sendSignal(NDBFS_REF, GSN_FSCLOSEREQ, signal, FsCloseReq::SignalLength, JBA); + +} + +void Backup::execFSCLOSEREF(Signal* signal) { jamEntry(); @@ -4259,7 +4406,6 @@ Backup::execFSCLOSEREF(Signal* signal) BackupRecordPtr ptr; c_backupPool.getPtr(ptr, filePtr.p->backupPtr); - filePtr.p->fileOpened = 1; FsConf * conf = (FsConf*)signal->getDataPtr(); conf->userPointer = filePtrI; @@ -4280,29 +4426,17 @@ Backup::execFSCLOSECONF(Signal* signal) #ifdef DEBUG_ABORT ndbout_c("***** FSCLOSECONF filePtrI = %u", filePtrI); #endif + + ndbrequire(filePtr.p->m_flags == (BackupFile::BF_OPEN | + BackupFile::BF_CLOSING)); - ndbrequire(filePtr.p->fileClosing == 1); - ndbrequire(filePtr.p->fileOpened == 1); - ndbrequire(filePtr.p->fileRunning == 0); - ndbrequire(filePtr.p->scanRunning == 0); - filePtr.p->fileOpened = 0; + filePtr.p->m_flags &= ~(Uint32)(BackupFile::BF_OPEN |BackupFile::BF_CLOSING); filePtr.p->operation.dataBuffer.reset(); - + BackupRecordPtr ptr; c_backupPool.getPtr(ptr, filePtr.p->backupPtr); - for(ptr.p->files.first(filePtr); filePtr.i!=RNIL;ptr.p->files.next(filePtr)) - { - jam(); - if(filePtr.p->fileOpened == 1) { - jam(); -#ifdef DEBUG_ABORT - ndbout_c("waiting for more FSCLOSECONF's filePtr.i = %u", filePtr.i); -#endif - return; // we will be getting more FSCLOSECONF's - }//if - }//for - closeFilesDone(signal, ptr); + closeFiles(signal, ptr); } void @@ -4498,12 +4632,8 @@ Backup::dumpUsedResources() filePtr.i != RNIL; ptr.p->files.next(filePtr)) { jam(); - ndbout_c("filePtr.i = %u, filePtr.p->fileOpened=%u fileRunning=%u " - "scanRunning=%u", - filePtr.i, - filePtr.p->fileOpened, - filePtr.p->fileRunning, - filePtr.p->scanRunning); + ndbout_c("filePtr.i = %u, flags: H'%x ", + filePtr.i, filePtr.p->m_flags); }//for } } @@ -4537,13 +4667,10 @@ Backup::cleanup(Signal* signal, BackupRecordPtr ptr) }//for BackupFilePtr filePtr; - for(ptr.p->files.first(filePtr); - filePtr.i != RNIL; - ptr.p->files.next(filePtr)) { + for(ptr.p->files.first(filePtr);filePtr.i != RNIL;ptr.p->files.next(filePtr)) + { jam(); - ndbrequire(filePtr.p->fileOpened == 0); - ndbrequire(filePtr.p->fileRunning == 0); - ndbrequire(filePtr.p->scanRunning == 0); + ndbrequire(filePtr.p->m_flags == 0); filePtr.p->pages.release(); }//for @@ -4616,91 +4743,94 @@ Backup::execLCP_PREPARE_REQ(Signal* signal) BackupRecordPtr ptr; c_backupPool.getPtr(ptr, req.backupPtr); - bool first= true; - TablePtr tabPtr; - if(ptr.p->tables.first(tabPtr) && tabPtr.p->tableId != req.tableId) - { - jam(); - first= false; - tabPtr.p->attributes.release(); - tabPtr.p->fragments.release(); - ptr.p->tables.release(); - ptr.p->errorCode = 0; - } + ptr.p->m_gsn = GSN_LCP_PREPARE_REQ; - if(ptr.p->tables.first(tabPtr) && ptr.p->errorCode == 0) - { - jam(); - FragmentPtr fragPtr; - tabPtr.p->fragments.getPtr(fragPtr, 0); - fragPtr.p->fragmentId = req.fragmentId; - - lcp_open_file_done(signal, ptr); - return; - } - else if(ptr.p->errorCode == 0) + TablePtr tabPtr; + FragmentPtr fragPtr; + if (!ptr.p->tables.isEmpty()) { jam(); - FragmentPtr fragPtr; - if(!ptr.p->tables.seize(tabPtr) || !tabPtr.p->fragments.seize(1)) + ndbrequire(ptr.p->errorCode); + ptr.p->tables.first(tabPtr); + if (tabPtr.p->tableId == req.tableId) { - if(!tabPtr.isNull()) - ptr.p->tables.release(); - ndbrequire(false); // TODO + jam(); + ndbrequire(!tabPtr.p->fragments.empty()); + tabPtr.p->fragments.getPtr(fragPtr, 0); + fragPtr.p->fragmentId = req.fragmentId; + defineBackupRef(signal, ptr, ptr.p->errorCode); + return; + } + else + { + jam(); + tabPtr.p->attributes.release(); + tabPtr.p->fragments.release(); + ptr.p->tables.release(); + ptr.p->errorCode = 0; + // fall-through } - tabPtr.p->tableId = req.tableId; - tabPtr.p->fragments.getPtr(fragPtr, 0); - tabPtr.p->tableType = DictTabInfo::UserTable; - fragPtr.p->fragmentId = req.fragmentId; - fragPtr.p->lcp_no = req.lcpNo; - fragPtr.p->scanned = 0; - fragPtr.p->scanning = 0; - fragPtr.p->tableId = req.tableId; - } - else - { - jam(); - FragmentPtr fragPtr; - tabPtr.p->fragments.getPtr(fragPtr, 0); - fragPtr.p->fragmentId = req.fragmentId; - defineBackupRef(signal, ptr, ptr.p->errorCode); - return; } - if(first) - { - jam(); - // start file thread - ptr.p->backupId= req.backupId; - lcp_open_file(signal, ptr); - return; - } - else + if(!ptr.p->tables.seize(tabPtr) || !tabPtr.p->fragments.seize(1)) { - jam(); - ndbrequire(ptr.p->backupId == req.backupId); + if(!tabPtr.isNull()) + ptr.p->tables.release(); + ndbrequire(false); // TODO } + tabPtr.p->tableId = req.tableId; + tabPtr.p->fragments.getPtr(fragPtr, 0); + tabPtr.p->tableType = DictTabInfo::UserTable; + fragPtr.p->fragmentId = req.fragmentId; + fragPtr.p->lcp_no = req.lcpNo; + fragPtr.p->scanned = 0; + fragPtr.p->scanning = 0; + fragPtr.p->tableId = req.tableId; - /** - * Close previous file - */ - jam(); - BackupFilePtr filePtr; - c_backupFilePool.getPtr(filePtr, ptr.p->dataFilePtr); - filePtr.p->operation.dataBuffer.eof(); + ptr.p->backupId= req.backupId; + lcp_open_file(signal, ptr); } void Backup::lcp_close_file_conf(Signal* signal, BackupRecordPtr ptr) { - if(!ptr.p->tables.isEmpty()) + jam(); + + TablePtr tabPtr; + ndbrequire(ptr.p->tables.first(tabPtr)); + Uint32 tableId = tabPtr.p->tableId; + + BackupFilePtr filePtr; + c_backupFilePool.getPtr(filePtr, ptr.p->dataFilePtr); + ndbrequire(filePtr.p->m_flags == 0); + + if (ptr.p->m_gsn == GSN_LCP_PREPARE_REQ) { jam(); - lcp_open_file(signal, ptr); + defineBackupRef(signal, ptr, ptr.p->errorCode); return; } + + FragmentPtr fragPtr; + tabPtr.p->fragments.getPtr(fragPtr, 0); + Uint32 fragmentId = fragPtr.p->fragmentId; + + tabPtr.p->attributes.release(); + tabPtr.p->fragments.release(); + ptr.p->tables.release(); + ptr.p->errorCode = 0; - lcp_send_end_lcp_conf(signal, ptr); + BackupFragmentConf * conf = (BackupFragmentConf*)signal->getDataPtrSend(); + conf->backupId = ptr.p->backupId; + conf->backupPtr = ptr.i; + conf->tableId = tableId; + conf->fragmentNo = fragmentId; + conf->noOfRecordsLow = 0; + conf->noOfRecordsHigh = 0; + conf->noOfBytesLow = 0; + conf->noOfBytesHigh = 0; + sendSignal(ptr.p->masterRef, GSN_BACKUP_FRAGMENT_CONF, signal, + BackupFragmentConf::SignalLength, JBB); } void @@ -4712,8 +4842,10 @@ Backup::lcp_open_file(Signal* signal, BackupRecordPtr ptr) FsOpenReq::OM_WRITEONLY | FsOpenReq::OM_TRUNCATE | FsOpenReq::OM_CREATE | - FsOpenReq::OM_APPEND; + FsOpenReq::OM_APPEND | + FsOpenReq::OM_AUTOSYNC; FsOpenReq::v2_setCount(req->fileNumber, 0xFFFFFFFF); + req->auto_sync_size = c_defaults.m_disk_synch_size; TablePtr tabPtr; FragmentPtr fragPtr; @@ -4726,15 +4858,15 @@ Backup::lcp_open_file(Signal* signal, BackupRecordPtr ptr) */ BackupFilePtr filePtr; c_backupFilePool.getPtr(filePtr, ptr.p->dataFilePtr); - ndbrequire(filePtr.p->fileRunning == 0); - filePtr.p->fileClosing = 0; - filePtr.p->fileRunning = 1; - + ndbrequire(filePtr.p->m_flags == 0); + filePtr.p->m_flags |= BackupFile::BF_OPENING; + filePtr.p->tableId = RNIL; // Will force init req->userPointer = filePtr.i; FsOpenReq::setVersion(req->fileNumber, 5); FsOpenReq::setSuffix(req->fileNumber, FsOpenReq::S_DATA); FsOpenReq::v5_setLcpNo(req->fileNumber, fragPtr.p->lcp_no); FsOpenReq::v5_setTableId(req->fileNumber, tabPtr.p->tableId); + FsOpenReq::v5_setFragmentId(req->fileNumber, fragPtr.p->fragmentId); sendSignal(NDBFS_REF, GSN_FSOPENREQ, signal, FsOpenReq::SignalLength, JBA); } @@ -4747,6 +4879,12 @@ Backup::lcp_open_file_done(Signal* signal, BackupRecordPtr ptr) ndbrequire(ptr.p->tables.first(tabPtr)); tabPtr.p->fragments.getPtr(fragPtr, 0); + BackupFilePtr filePtr; + c_backupFilePool.getPtr(filePtr, ptr.p->dataFilePtr); + ndbrequire(filePtr.p->m_flags == + (BackupFile::BF_OPEN | BackupFile::BF_LCP_META)); + filePtr.p->m_flags &= ~(Uint32)BackupFile::BF_LCP_META; + ptr.p->slaveState.setState(STARTED); LcpPrepareConf* conf= (LcpPrepareConf*)signal->getDataPtrSend(); @@ -4756,6 +4894,16 @@ Backup::lcp_open_file_done(Signal* signal, BackupRecordPtr ptr) conf->fragmentId = fragPtr.p->fragmentId; sendSignal(ptr.p->masterRef, GSN_LCP_PREPARE_CONF, signal, LcpPrepareConf::SignalLength, JBB); + + /** + * Start file thread + */ + filePtr.p->m_flags |= BackupFile::BF_FILE_THREAD; + + signal->theData[0] = BackupContinueB::START_FILE_THREAD; + signal->theData[1] = filePtr.i; + signal->theData[2] = __LINE__; + sendSignalWithDelay(BACKUP_REF, GSN_CONTINUEB, signal, 100, 3); } void @@ -4767,38 +4915,31 @@ Backup::execEND_LCPREQ(Signal* signal) c_backupPool.getPtr(ptr, req->backupPtr); ndbrequire(ptr.p->backupId == req->backupId); - ptr.p->slaveState.setState(STOPPING); + BackupFilePtr filePtr; + ptr.p->files.getPtr(filePtr, ptr.p->ctlFilePtr); + ndbrequire(filePtr.p->m_flags == 0); - TablePtr tabPtr; - if(ptr.p->tables.first(tabPtr)) + if (!ptr.p->tables.isEmpty()) { + jam(); + ndbrequire(ptr.p->errorCode); + TablePtr tabPtr; + ptr.p->tables.first(tabPtr); tabPtr.p->attributes.release(); tabPtr.p->fragments.release(); ptr.p->tables.release(); - - BackupFilePtr filePtr; - c_backupFilePool.getPtr(filePtr, ptr.p->dataFilePtr); - filePtr.p->operation.dataBuffer.eof(); - return; - } - - lcp_send_end_lcp_conf(signal, ptr); -} - -void -Backup::lcp_send_end_lcp_conf(Signal* signal, BackupRecordPtr ptr) -{ - EndLcpConf* conf= (EndLcpConf*)signal->getDataPtr(); + ptr.p->errorCode = 0; + } - conf->senderData = ptr.p->clientData; - conf->senderRef = reference(); - ptr.p->errorCode = 0; ptr.p->slaveState.setState(CLEANING); ptr.p->slaveState.setState(INITIAL); ptr.p->slaveState.setState(DEFINING); ptr.p->slaveState.setState(DEFINED); + EndLcpConf* conf= (EndLcpConf*)signal->getDataPtr(); + conf->senderData = ptr.p->clientData; + conf->senderRef = reference(); sendSignal(ptr.p->masterRef, GSN_END_LCPCONF, signal, EndLcpConf::SignalLength, JBB); } diff --git a/storage/ndb/src/kernel/blocks/backup/Backup.hpp b/storage/ndb/src/kernel/blocks/backup/Backup.hpp index afacf01ab2f..a5929efa5c8 100644 --- a/storage/ndb/src/kernel/blocks/backup/Backup.hpp +++ b/storage/ndb/src/kernel/blocks/backup/Backup.hpp @@ -33,6 +33,7 @@ #include <blocks/mutexes.hpp> #include <NdbTCP.h> +#include <NdbTick.h> #include <Array.hpp> /** @@ -344,10 +345,16 @@ public: Uint32 nextList; union { Uint32 prevList; Uint32 nextPool; }; - Uint8 fileOpened; - Uint8 fileRunning; - Uint8 fileClosing; - Uint8 scanRunning; + enum { + BF_OPEN = 0x1 + ,BF_OPENING = 0x2 + ,BF_CLOSING = 0x4 + ,BF_FILE_THREAD = 0x8 + ,BF_SCAN_THREAD = 0x10 + ,BF_LCP_META = 0x20 + }; + Uint32 m_flags; + Uint32 m_pos; }; typedef Ptr<BackupFile> BackupFilePtr; @@ -356,14 +363,14 @@ public: * State for BackupRecord */ enum State { - INITIAL, - DEFINING, // Defining backup content and parameters - DEFINED, // DEFINE_BACKUP_CONF sent in slave, received all in master - STARTED, // Creating triggers - SCANNING, // Scanning fragments - STOPPING, // Closing files - CLEANING, // Cleaning resources - ABORTING // Aborting backup + INITIAL = 0, + DEFINING = 1, // Defining backup content and parameters + DEFINED = 2, // DEFINE_BACKUP_CONF sent in slave, received all in master + STARTED = 3, // Creating triggers + SCANNING = 4, // Scanning fragments + STOPPING = 5, // Closing files + CLEANING = 6, // Cleaning resources + ABORTING = 7 // Aborting backup }; static const Uint32 validSlaveTransitionsCount; @@ -522,6 +529,11 @@ public: Uint32 m_minWriteSize; Uint32 m_maxWriteSize; Uint32 m_lcp_buffer_size; + + Uint32 m_disk_write_speed_sr; + Uint32 m_disk_write_speed; + Uint32 m_disk_synch_size; + Uint32 m_diskless; }; /** @@ -533,8 +545,17 @@ public: NdbNodeBitmask c_aliveNodes; DLList<BackupRecord> c_backups; Config c_defaults; - Uint32 m_diskless; + /* + Variables that control checkpoint to disk speed + */ + Uint32 m_curr_disk_write_speed; + Uint32 m_words_written_this_period; + Uint32 m_overflow_disk_write; + Uint32 m_reset_delay_used; + NDB_TICKS m_reset_disk_speed_time; + static const int DISK_SPEED_CHECK_DELAY = 100; + STATIC_CONST(NO_OF_PAGES_META_FILE = MAX_WORDS_META_FILE/BACKUP_WORDS_PER_PAGE); /** @@ -561,6 +582,7 @@ public: void openFiles(Signal* signal, BackupRecordPtr ptr); void openFilesReply(Signal*, BackupRecordPtr ptr, BackupFilePtr); void closeFiles(Signal*, BackupRecordPtr ptr); + void closeFile(Signal*, BackupRecordPtr, BackupFilePtr); void closeFilesDone(Signal*, BackupRecordPtr ptr); void sendDefineBackupReq(Signal *signal, BackupRecordPtr ptr); @@ -630,7 +652,8 @@ public: void lcp_open_file(Signal* signal, BackupRecordPtr ptr); void lcp_open_file_done(Signal*, BackupRecordPtr); void lcp_close_file_conf(Signal* signal, BackupRecordPtr); - void lcp_send_end_lcp_conf(Signal* signal, BackupRecordPtr); + + bool ready_to_write(bool ready, Uint32 sz, bool eof, BackupFile *fileP); }; inline diff --git a/storage/ndb/src/kernel/blocks/backup/BackupInit.cpp b/storage/ndb/src/kernel/blocks/backup/BackupInit.cpp index 38a60ac04d6..4cbe0c32a29 100644 --- a/storage/ndb/src/kernel/blocks/backup/BackupInit.cpp +++ b/storage/ndb/src/kernel/blocks/backup/BackupInit.cpp @@ -146,8 +146,28 @@ Backup::execREAD_CONFIG_REQ(Signal* signal) m_ctx.m_config.getOwnConfigIterator(); ndbrequire(p != 0); + c_defaults.m_disk_write_speed = 10 * (1024 * 1024); + c_defaults.m_disk_write_speed_sr = 100 * (1024 * 1024); + c_defaults.m_disk_synch_size = 4 * (1024 * 1024); + Uint32 noBackups = 0, noTables = 0, noAttribs = 0, noFrags = 0; - ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DB_DISCLESS, &m_diskless)); + ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DB_DISCLESS, + &c_defaults.m_diskless)); + ndb_mgm_get_int_parameter(p, CFG_DB_CHECKPOINT_SPEED_SR, + &c_defaults.m_disk_write_speed_sr); + ndb_mgm_get_int_parameter(p, CFG_DB_CHECKPOINT_SPEED, + &c_defaults.m_disk_write_speed); + ndb_mgm_get_int_parameter(p, CFG_DB_DISK_SYNCH_SIZE, + &c_defaults.m_disk_synch_size); + + /* + We adjust the disk speed parameters from bytes per second to rather be + words per 100 milliseconds. We convert disk synch size from bytes per + second to words per second. + */ + c_defaults.m_disk_write_speed /= (4 * 10); + c_defaults.m_disk_write_speed_sr /= (4 * 10); + ndb_mgm_get_int_parameter(p, CFG_DB_PARALLEL_BACKUPS, &noBackups); // ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DB_NO_TABLES, &noTables)); ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DICT_TABLE, &noTables)); diff --git a/storage/ndb/src/kernel/blocks/backup/FsBuffer.hpp b/storage/ndb/src/kernel/blocks/backup/FsBuffer.hpp index 3a21be99792..d4c87894a92 100644 --- a/storage/ndb/src/kernel/blocks/backup/FsBuffer.hpp +++ b/storage/ndb/src/kernel/blocks/backup/FsBuffer.hpp @@ -212,8 +212,6 @@ inline void FsBuffer::reset() { - assert(m_free = m_size); - assert(m_readIndex == m_writeIndex); m_readIndex = m_writeIndex = 0; m_free = m_size; m_eof = 0; diff --git a/storage/ndb/src/kernel/blocks/dbacc/DbaccMain.cpp b/storage/ndb/src/kernel/blocks/dbacc/DbaccMain.cpp index dc8b123f10f..30a36495579 100644 --- a/storage/ndb/src/kernel/blocks/dbacc/DbaccMain.cpp +++ b/storage/ndb/src/kernel/blocks/dbacc/DbaccMain.cpp @@ -4215,7 +4215,7 @@ Dbacc::abortSerieQueueOperation(Signal* signal, OperationrecPtr opPtr) { jam(); ptrCheckGuard(lastOp, coprecsize, operationrec); - ndbassert(lastOp.p->m_lock_owner_ptr_i = prevS.i); + ndbassert(lastOp.p->m_lock_owner_ptr_i == prevS.i); } else { @@ -8312,11 +8312,12 @@ Dbacc::execDUMP_STATE_ORD(Signal* signal) return; } - if(dumpState->args[0] == DumpStateOrd::DumpPageMemory){ + if(dumpState->args[0] == DumpStateOrd::DumpPageMemory && + signal->getLength() == 1){ reportMemoryUsage(signal, 0); return; } - + if(dumpState->args[0] == DumpStateOrd::EnableUndoDelayDataWrite){ ndbout << "Dbacc:: delay write of datapages for table = " << dumpState->args[1]<< endl; diff --git a/storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp b/storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp index 32bfd5d7146..02b0cb83847 100644 --- a/storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp +++ b/storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp @@ -7584,9 +7584,18 @@ Dbdict::execCREATE_INDX_REQ(Signal* signal) } if (signal->getLength() == CreateIndxReq::SignalLength) { jam(); + CreateIndxRef::ErrorCode tmperr = CreateIndxRef::NoError; if (getOwnNodeId() != c_masterNodeId) { jam(); - + tmperr = CreateIndxRef::NotMaster; + } else if (c_blockState == BS_NODE_RESTART) { + jam(); + tmperr = CreateIndxRef::BusyWithNR; + } else if (c_blockState != BS_IDLE) { + jam(); + tmperr = CreateIndxRef::Busy; + } + if (tmperr != CreateIndxRef::NoError) { releaseSections(signal); OpCreateIndex opBusy; opPtr.p = &opBusy; @@ -7594,13 +7603,12 @@ Dbdict::execCREATE_INDX_REQ(Signal* signal) opPtr.p->m_isMaster = (senderRef == reference()); opPtr.p->key = 0; opPtr.p->m_requestType = CreateIndxReq::RT_DICT_PREPARE; - opPtr.p->m_errorCode = CreateIndxRef::NotMaster; + opPtr.p->m_errorCode = tmperr; opPtr.p->m_errorLine = __LINE__; opPtr.p->m_errorNode = c_masterNodeId; createIndex_sendReply(signal, opPtr, true); return; } - // forward initial request plus operation key to all req->setOpKey(++c_opRecordSequence); NodeReceiverGroup rg(DBDICT, c_aliveNodes); @@ -8185,10 +8193,19 @@ Dbdict::execDROP_INDX_REQ(Signal* signal) jam(); if (signal->getLength() == DropIndxReq::SignalLength) { jam(); + DropIndxRef::ErrorCode tmperr = DropIndxRef::NoError; if (getOwnNodeId() != c_masterNodeId) { jam(); - - err = DropIndxRef::NotMaster; + tmperr = DropIndxRef::NotMaster; + } else if (c_blockState == BS_NODE_RESTART) { + jam(); + tmperr = DropIndxRef::BusyWithNR; + } else if (c_blockState != BS_IDLE) { + jam(); + tmperr = DropIndxRef::Busy; + } + if (tmperr != DropIndxRef::NoError) { + err = tmperr; goto error; } // forward initial request plus operation key to all @@ -13491,6 +13508,17 @@ Dbdict::execDICT_LOCK_REQ(Signal* signal) sendDictLockInfoEvent(lockPtr, "lock request by node"); } +// only table and index ops are checked +bool +Dbdict::hasDictLockSchemaOp() +{ + return + ! c_opCreateTable.isEmpty() || + ! c_opDropTable.isEmpty() || + ! c_opCreateIndex.isEmpty() || + ! c_opDropIndex.isEmpty(); +} + void Dbdict::checkDictLockQueue(Signal* signal, bool poll) { @@ -13511,7 +13539,7 @@ Dbdict::checkDictLockQueue(Signal* signal, bool poll) break; } - if (c_opRecordPool.getNoOfFree() != c_opRecordPool.getSize()) { + if (hasDictLockSchemaOp()) { jam(); break; } @@ -13544,7 +13572,7 @@ Dbdict::execDICT_UNLOCK_ORD(Signal* signal) if (lockPtr.p->locked) { jam(); ndbrequire(c_blockState == lockPtr.p->lt->blockState); - ndbrequire(c_opRecordPool.getNoOfFree() == c_opRecordPool.getSize()); + ndbrequire(! hasDictLockSchemaOp()); ndbrequire(! c_dictLockQueue.hasPrev(lockPtr)); c_blockState = BS_IDLE; @@ -13640,7 +13668,7 @@ Dbdict::removeStaleDictLocks(Signal* signal, const Uint32* theFailedNodes) if (lockPtr.p->locked) { jam(); ndbrequire(c_blockState == lockPtr.p->lt->blockState); - ndbrequire(c_opRecordPool.getNoOfFree() == c_opRecordPool.getSize()); + ndbrequire(! hasDictLockSchemaOp()); ndbrequire(! c_dictLockQueue.hasPrev(lockPtr)); c_blockState = BS_IDLE; diff --git a/storage/ndb/src/kernel/blocks/dbdict/Dbdict.hpp b/storage/ndb/src/kernel/blocks/dbdict/Dbdict.hpp index 7c44fadfa62..b196d0a503b 100644 --- a/storage/ndb/src/kernel/blocks/dbdict/Dbdict.hpp +++ b/storage/ndb/src/kernel/blocks/dbdict/Dbdict.hpp @@ -2107,6 +2107,9 @@ private: void sendDictLockInfoEvent(Uint32 pollCount); void sendDictLockInfoEvent(DictLockPtr lockPtr, const char* text); + // check if any schema op exists (conflicting with dict lock) + bool hasDictLockSchemaOp(); + void checkDictLockQueue(Signal* signal, bool poll); void sendDictLockConf(Signal* signal, DictLockPtr lockPtr); void sendDictLockRef(Signal* signal, DictLockReq req, Uint32 errorCode); diff --git a/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp b/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp index 0595c018b2e..43850f297c6 100644 --- a/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp +++ b/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp @@ -6628,6 +6628,7 @@ void Dbdih::execCREATE_FRAGMENTATION_REQ(Signal * signal) ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord); const Uint32 max = NGPtr.p->nodeCount; + fragments[count++] = c_nextLogPart++; // Store logpart first Uint32 tmp= next_replica_node[NGPtr.i]; for(Uint32 replicaNo = 0; replicaNo < noOfReplicas; replicaNo++) { @@ -6674,7 +6675,8 @@ void Dbdih::execCREATE_FRAGMENTATION_REQ(Signal * signal) FragmentstorePtr fragPtr; ReplicaRecordPtr replicaPtr; getFragstore(primTabPtr.p, fragNo, fragPtr); - fragments[count++]= fragPtr.p->preferredPrimary; + fragments[count++] = c_nextLogPart++; + fragments[count++] = fragPtr.p->preferredPrimary; for (replicaPtr.i = fragPtr.p->storedReplicas; replicaPtr.i != RNIL; replicaPtr.i = replicaPtr.p->nextReplica) { @@ -6697,7 +6699,7 @@ void Dbdih::execCREATE_FRAGMENTATION_REQ(Signal * signal) } } } - ndbrequire(count == (2U + noOfReplicas * noOfFragments)); + ndbrequire(count == (2U + (1 + noOfReplicas) * noOfFragments)); CreateFragmentationConf * const conf = (CreateFragmentationConf*)signal->getDataPtrSend(); @@ -6870,8 +6872,8 @@ void Dbdih::execDIADDTABREQ(Signal* signal) FragmentstorePtr fragPtr; Uint32 activeIndex = 0; getFragstore(tabPtr.p, fragId, fragPtr); + fragPtr.p->m_log_part_id = fragments[index++]; fragPtr.p->preferredPrimary = fragments[index]; - fragPtr.p->m_log_part_id = c_nextLogPart++; for (Uint32 i = 0; i<noReplicas; i++) { const Uint32 nodeId = fragments[index++]; @@ -8533,11 +8535,21 @@ void Dbdih::openingTableErrorLab(Signal* signal, FileRecordPtr filePtr) /* WE FAILED IN OPENING A FILE. IF THE FIRST FILE THEN TRY WITH THE */ /* DUPLICATE FILE, OTHERWISE WE REPORT AN ERROR IN THE SYSTEM RESTART. */ /* ---------------------------------------------------------------------- */ - ndbrequire(filePtr.i == tabPtr.p->tabFile[0]); - filePtr.i = tabPtr.p->tabFile[1]; - ptrCheckGuard(filePtr, cfileFileSize, fileRecord); - openFileRw(signal, filePtr); - filePtr.p->reqStatus = FileRecord::OPENING_TABLE; + if (filePtr.i == tabPtr.p->tabFile[0]) + { + filePtr.i = tabPtr.p->tabFile[1]; + ptrCheckGuard(filePtr, cfileFileSize, fileRecord); + openFileRw(signal, filePtr); + filePtr.p->reqStatus = FileRecord::OPENING_TABLE; + } + else + { + char buf[256]; + BaseString::snprintf(buf, sizeof(buf), + "Error opening DIH schema files for table: %d", + tabPtr.i); + progError(__LINE__, NDBD_EXIT_AFS_NO_SUCH_FILE, buf); + } }//Dbdih::openingTableErrorLab() void Dbdih::readingTableLab(Signal* signal, FileRecordPtr filePtr) @@ -8703,6 +8715,7 @@ Dbdih::resetReplicaSr(TabRecordPtr tabPtr){ } replicaPtr.i = nextReplicaPtrI; }//while + updateNodeInfo(fragPtr); } } diff --git a/storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp b/storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp index be52e06eb81..ec3042fa6dc 100644 --- a/storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp +++ b/storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp @@ -29,6 +29,7 @@ #include <signaldata/LCP.hpp> #include <signaldata/LqhTransConf.hpp> #include <signaldata/LqhFrag.hpp> +#include <signaldata/FsOpenReq.hpp> // primary key is stored in TUP #include "../dbtup/Dbtup.hpp" @@ -1515,11 +1516,6 @@ public: /** * This variable contains the last word written in the last page. */ - UintR logFilePagesToDiskWithoutSynch; - /** - * This variable keeps track of the number of pages written since - * last synch on this log file. - */ LogFileStatus logFileStatus; /** * A reference to page zero in this file. @@ -2798,10 +2794,10 @@ private: /*THIS VARIABLE IS THE HEAD OF A LINKED LIST OF FRAGMENTS WAITING TO BE */ /*RESTORED FROM DISK. */ /* ------------------------------------------------------------------------- */ - DLList<Fragrecord> c_lcp_waiting_fragments; // StartFragReq'ed - DLList<Fragrecord> c_lcp_restoring_fragments; // Restoring as we speek - DLList<Fragrecord> c_lcp_complete_fragments; // Restored - DLList<Fragrecord> c_redo_complete_fragments; // Redo'ed + DLFifoList<Fragrecord> c_lcp_waiting_fragments; // StartFragReq'ed + DLFifoList<Fragrecord> c_lcp_restoring_fragments; // Restoring as we speek + DLFifoList<Fragrecord> c_lcp_complete_fragments; // Restored + DLFifoList<Fragrecord> c_redo_complete_fragments; // Redo'ed /* ------------------------------------------------------------------------- */ /*USED DURING SYSTEM RESTART, INDICATES THE OLDEST GCI THAT CAN BE RESTARTED */ diff --git a/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp b/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp index 695580d556c..98f46ac8c44 100644 --- a/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp +++ b/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp @@ -427,17 +427,20 @@ void Dblqh::execCONTINUEB(Signal* signal) signal->theData[0] = fragptr.p->tabRef; signal->theData[1] = fragptr.p->fragId; sendSignal(DBACC_REF, GSN_EXPANDCHECK2, signal, 2, JBB); + Ptr<Fragrecord> save = fragptr; c_redo_complete_fragments.next(fragptr); signal->theData[0] = ZENABLE_EXPAND_CHECK; signal->theData[1] = fragptr.i; sendSignal(DBLQH_REF, GSN_CONTINUEB, signal, 2, JBB); + + c_redo_complete_fragments.remove(save); return; } else { jam(); - c_redo_complete_fragments.remove(); + ndbrequire(c_redo_complete_fragments.isEmpty()); StartRecConf * conf = (StartRecConf*)signal->getDataPtrSend(); conf->startingNodeId = getOwnNodeId(); sendSignal(cmasterDihBlockref, GSN_START_RECCONF, signal, @@ -3828,6 +3831,7 @@ void Dblqh::prepareContinueAfterBlockedLab(Signal* signal) } else if (activeCreat == Fragrecord::AC_NR_COPY) { + regTcPtr->totSendlenAi = regTcPtr->totReclenAi; handle_nr_copy(signal, tcConnectptr); } else @@ -3842,7 +3846,8 @@ void Dblqh::prepareContinueAfterBlockedLab(Signal* signal) signal->theData[0] = regTcPtr->tupConnectrec; EXECUTE_DIRECT(DBTUP, GSN_TUP_ABORTREQ, signal, 1); jamEntry(); - + + regTcPtr->totSendlenAi = regTcPtr->totReclenAi; packLqhkeyreqLab(signal); } } @@ -11269,8 +11274,22 @@ void Dblqh::execLCP_PREPARE_CONF(Signal* signal) else #endif { - sendSignal(BACKUP_REF, GSN_BACKUP_FRAGMENT_REQ, signal, - BackupFragmentReq::SignalLength, JBB); + if (ERROR_INSERTED(5044) && + (fragptr.p->tabRef == c_error_insert_table_id) && + fragptr.p->fragId) // Not first frag + { + /** + * Force CRASH_INSERTION in 10s + */ + ndbout_c("table: %d frag: %d", fragptr.p->tabRef, fragptr.p->fragId); + SET_ERROR_INSERT_VALUE(5027); + sendSignalWithDelay(reference(), GSN_START_RECREQ, signal, 10000, 1); + } + else + { + sendSignal(BACKUP_REF, GSN_BACKUP_FRAGMENT_REQ, signal, + BackupFragmentReq::SignalLength, JBB); + } } } } @@ -12415,7 +12434,6 @@ void Dblqh::initFsopenconf(Signal* signal) ptrCheckGuard(logPartPtr, clogPartFileSize, logPartRecord); logFilePtr.p->currentMbyte = 0; logFilePtr.p->filePosition = 0; - logFilePtr.p->logFilePagesToDiskWithoutSynch = 0; }//Dblqh::initFsopenconf() /* ========================================================================= */ @@ -12796,19 +12814,17 @@ void Dblqh::lastWriteInFileLab(Signal* signal) void Dblqh::writePageZeroLab(Signal* signal) { - if (false && logPartPtr.p->logPartState == LogPartRecord::FILE_CHANGE_PROBLEM) + if (logPartPtr.p->logPartState == LogPartRecord::FILE_CHANGE_PROBLEM) { if (logPartPtr.p->firstLogQueue == RNIL) { jam(); logPartPtr.p->logPartState = LogPartRecord::IDLE; - ndbout_c("resetting logPartState to IDLE"); } else { jam(); logPartPtr.p->logPartState = LogPartRecord::ACTIVE; - ndbout_c("resetting logPartState to ACTIVE"); } } @@ -13064,14 +13080,16 @@ void Dblqh::initLogpage(Signal* signal) /* ------------------------------------------------------------------------- */ void Dblqh::openFileRw(Signal* signal, LogFileRecordPtr olfLogFilePtr) { + FsOpenReq* req = (FsOpenReq*)signal->getDataPtrSend(); signal->theData[0] = cownref; signal->theData[1] = olfLogFilePtr.i; signal->theData[2] = olfLogFilePtr.p->fileName[0]; signal->theData[3] = olfLogFilePtr.p->fileName[1]; signal->theData[4] = olfLogFilePtr.p->fileName[2]; signal->theData[5] = olfLogFilePtr.p->fileName[3]; - signal->theData[6] = ZOPEN_READ_WRITE; - sendSignal(NDBFS_REF, GSN_FSOPENREQ, signal, 7, JBA); + signal->theData[6] = ZOPEN_READ_WRITE | FsOpenReq::OM_AUTOSYNC; + req->auto_sync_size = MAX_REDO_PAGES_WITHOUT_SYNCH * sizeof(LogPageRecord); + sendSignal(NDBFS_REF, GSN_FSOPENREQ, signal, FsOpenReq::SignalLength, JBA); }//Dblqh::openFileRw() /* ------------------------------------------------------------------------- */ @@ -13082,14 +13100,16 @@ void Dblqh::openFileRw(Signal* signal, LogFileRecordPtr olfLogFilePtr) void Dblqh::openLogfileInit(Signal* signal) { logFilePtr.p->logFileStatus = LogFileRecord::OPENING_INIT; + FsOpenReq* req = (FsOpenReq*)signal->getDataPtrSend(); signal->theData[0] = cownref; signal->theData[1] = logFilePtr.i; signal->theData[2] = logFilePtr.p->fileName[0]; signal->theData[3] = logFilePtr.p->fileName[1]; signal->theData[4] = logFilePtr.p->fileName[2]; signal->theData[5] = logFilePtr.p->fileName[3]; - signal->theData[6] = 0x302; - sendSignal(NDBFS_REF, GSN_FSOPENREQ, signal, 7, JBA); + signal->theData[6] = 0x302 | FsOpenReq::OM_AUTOSYNC; + req->auto_sync_size = MAX_REDO_PAGES_WITHOUT_SYNCH * sizeof(LogPageRecord); + sendSignal(NDBFS_REF, GSN_FSOPENREQ, signal, FsOpenReq::SignalLength, JBA); }//Dblqh::openLogfileInit() /* OPEN FOR READ/WRITE, DO CREATE AND DO TRUNCATE FILE */ @@ -13116,14 +13136,16 @@ void Dblqh::openNextLogfile(Signal* signal) return; }//if onlLogFilePtr.p->logFileStatus = LogFileRecord::OPENING_WRITE_LOG; + FsOpenReq* req = (FsOpenReq*)signal->getDataPtrSend(); signal->theData[0] = cownref; signal->theData[1] = onlLogFilePtr.i; signal->theData[2] = onlLogFilePtr.p->fileName[0]; signal->theData[3] = onlLogFilePtr.p->fileName[1]; signal->theData[4] = onlLogFilePtr.p->fileName[2]; signal->theData[5] = onlLogFilePtr.p->fileName[3]; - signal->theData[6] = 2; - sendSignal(NDBFS_REF, GSN_FSOPENREQ, signal, 7, JBA); + signal->theData[6] = 2 | FsOpenReq::OM_AUTOSYNC; + req->auto_sync_size = MAX_REDO_PAGES_WITHOUT_SYNCH * sizeof(LogPageRecord); + sendSignal(NDBFS_REF, GSN_FSOPENREQ, signal, FsOpenReq::SignalLength, JBA); }//if }//Dblqh::openNextLogfile() @@ -13742,7 +13764,7 @@ void Dblqh::execSTART_FRAGREQ(Signal* signal) fragptr.p->newestGci = cnewestGci; }//if - if (lcpNo == ZNIL || fragptr.i != tabptr.p->fragrec[0]) + if (lcpNo == ZNIL) { jam(); /** @@ -13757,15 +13779,9 @@ void Dblqh::execSTART_FRAGREQ(Signal* signal) if(lcpNo == ZNIL) { - for (Uint32 i = 0; i<MAX_FRAG_PER_NODE; i++) - { - if (tabptr.p->fragrec[i] != RNIL) - { - signal->theData[0] = tabptr.i; - signal->theData[1] = tabptr.p->fragid[i]; - sendSignal(DBACC_REF, GSN_EXPANDCHECK2, signal, 2, JBB); - } - } + signal->theData[0] = tabptr.i; + signal->theData[1] = fragId; + sendSignal(DBACC_REF, GSN_EXPANDCHECK2, signal, 2, JBB); } return; @@ -13832,21 +13848,17 @@ void Dblqh::execRESTORE_LCP_CONF(Signal* signal) */ tabptr.i = fragptr.p->tabRef; ptrCheckGuard(tabptr, ctabrecFileSize, tablerec); - for (Uint32 i = 0; i<MAX_FRAG_PER_NODE; i++) - { - if (tabptr.p->fragrec[i] != RNIL) - { - signal->theData[0] = tabptr.i; - signal->theData[1] = tabptr.p->fragid[i]; - sendSignal(DBACC_REF, GSN_EXPANDCHECK2, signal, 2, JBB); - } - } + + signal->theData[0] = fragptr.p->tabRef; + signal->theData[1] = fragptr.p->fragId; + sendSignal(DBACC_REF, GSN_EXPANDCHECK2, signal, 2, JBB); if (!c_lcp_waiting_fragments.isEmpty()) { send_restore_lcp(signal); return; } + if (c_lcp_restoring_fragments.isEmpty() && cstartRecReq == ZTRUE) { jam(); @@ -14694,6 +14706,8 @@ void Dblqh::execSr(Signal* signal) LogFileRecordPtr nextLogFilePtr; LogPageRecordPtr tmpLogPagePtr; Uint32 logWord; + Uint32 line; + const char * crash_msg = 0; jamEntry(); logPartPtr.i = signal->theData[0]; @@ -14904,8 +14918,14 @@ void Dblqh::execSr(Signal* signal) /* PLACE THAN IN THE FIRST PAGE OF A NEW FILE IN THE FIRST POSITION AFTER THE*/ /* HEADER. */ /*---------------------------------------------------------------------------*/ - ndbrequire(logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX] == - (ZPAGE_HEADER_SIZE + ZPOS_NO_FD)); + if (unlikely(logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX] != + (ZPAGE_HEADER_SIZE + ZPOS_NO_FD))) + { + line = __LINE__; + logWord = logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX]; + crash_msg = "ZFD_TYPE at incorrect position!"; + goto crash; + } { Uint32 noFdDescriptors = logPagePtr.p->logPageWord[ZPAGE_HEADER_SIZE + ZPOS_NO_FD]; @@ -14942,19 +14962,10 @@ void Dblqh::execSr(Signal* signal) /*---------------------------------------------------------------------------*/ /* SEND A SIGNAL TO THE SIGNAL LOG AND THEN CRASH THE SYSTEM. */ /*---------------------------------------------------------------------------*/ - signal->theData[0] = RNIL; - signal->theData[1] = logPartPtr.i; - Uint32 tmp = logFilePtr.p->fileName[3]; - tmp = (tmp >> 8) & 0xff;// To get the Directory, DXX. - signal->theData[2] = tmp; - signal->theData[3] = logFilePtr.p->fileNo; - signal->theData[4] = logFilePtr.p->currentFilepage; - signal->theData[5] = logFilePtr.p->currentMbyte; - signal->theData[6] = logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX]; - signal->theData[7] = ~0; - signal->theData[8] = __LINE__; - sendSignal(cownref, GSN_DEBUG_SIG, signal, 9, JBA); - return; + line = __LINE__; + logWord = ZNEXT_MBYTE_TYPE; + crash_msg = "end of log wo/ having found last GCI"; + goto crash; }//if }//if /*---------------------------------------------------------------------------*/ @@ -15009,19 +15020,9 @@ void Dblqh::execSr(Signal* signal) /*---------------------------------------------------------------------------*/ /* SEND A SIGNAL TO THE SIGNAL LOG AND THEN CRASH THE SYSTEM. */ /*---------------------------------------------------------------------------*/ - signal->theData[0] = RNIL; - signal->theData[1] = logPartPtr.i; - Uint32 tmp = logFilePtr.p->fileName[3]; - tmp = (tmp >> 8) & 0xff;// To get the Directory, DXX. - signal->theData[2] = tmp; - signal->theData[3] = logFilePtr.p->fileNo; - signal->theData[4] = logFilePtr.p->currentMbyte; - signal->theData[5] = logFilePtr.p->currentFilepage; - signal->theData[6] = logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX]; - signal->theData[7] = logWord; - signal->theData[8] = __LINE__; - sendSignal(cownref, GSN_DEBUG_SIG, signal, 9, JBA); - return; + line = __LINE__; + crash_msg = "Invalid logword"; + goto crash; break; }//switch /*---------------------------------------------------------------------------*/ @@ -15029,6 +15030,35 @@ void Dblqh::execSr(Signal* signal) // that we reach a new page. /*---------------------------------------------------------------------------*/ } while (1); + return; + +crash: + signal->theData[0] = RNIL; + signal->theData[1] = logPartPtr.i; + Uint32 tmp = logFilePtr.p->fileName[3]; + tmp = (tmp >> 8) & 0xff;// To get the Directory, DXX. + signal->theData[2] = tmp; + signal->theData[3] = logFilePtr.p->fileNo; + signal->theData[4] = logFilePtr.p->currentMbyte; + signal->theData[5] = logFilePtr.p->currentFilepage; + signal->theData[6] = logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX]; + signal->theData[7] = logWord; + signal->theData[8] = line; + + char buf[255]; + BaseString::snprintf(buf, sizeof(buf), + "Error while reading REDO log. from %d\n" + "D=%d, F=%d Mb=%d FP=%d W1=%d W2=%d : %s", + signal->theData[8], + signal->theData[2], + signal->theData[3], + signal->theData[4], + signal->theData[5], + signal->theData[6], + signal->theData[7], + crash_msg ? crash_msg : ""); + + progError(__LINE__, NDBD_EXIT_SR_REDOLOG, buf); }//Dblqh::execSr() /*---------------------------------------------------------------------------*/ @@ -15044,8 +15074,8 @@ void Dblqh::execDEBUG_SIG(Signal* signal) UintR tdebug; jamEntry(); - logPagePtr.i = signal->theData[0]; - tdebug = logPagePtr.p->logPageWord[0]; + //logPagePtr.i = signal->theData[0]; + //tdebug = logPagePtr.p->logPageWord[0]; char buf[100]; BaseString::snprintf(buf, 100, @@ -16140,15 +16170,8 @@ void Dblqh::completedLogPage(Signal* signal, Uint32 clpType, Uint32 place) signal->theData[0] = logFilePtr.p->fileRef; signal->theData[1] = cownref; signal->theData[2] = lfoPtr.i; - logFilePtr.p->logFilePagesToDiskWithoutSynch += twlpNoPages; if (twlpType == ZLAST_WRITE_IN_FILE) { jam(); - logFilePtr.p->logFilePagesToDiskWithoutSynch = 0; - signal->theData[3] = ZLIST_OF_MEM_PAGES_SYNCH; - } else if (logFilePtr.p->logFilePagesToDiskWithoutSynch > - MAX_REDO_PAGES_WITHOUT_SYNCH) { - jam(); - logFilePtr.p->logFilePagesToDiskWithoutSynch = 0; signal->theData[3] = ZLIST_OF_MEM_PAGES_SYNCH; } else { jam(); @@ -18474,10 +18497,17 @@ Dblqh::execDUMP_STATE_ORD(Signal* signal) } } - if (dumpState->args[0] == DumpStateOrd::LqhErrorInsert5042 && signal->getLength() == 2) + if (dumpState->args[0] == DumpStateOrd::LqhErrorInsert5042 && (signal->getLength() >= 2)) { c_error_insert_table_id = dumpState->args[1]; - SET_ERROR_INSERT_VALUE(5042); + if (signal->getLength() == 2) + { + SET_ERROR_INSERT_VALUE(5042); + } + else + { + SET_ERROR_INSERT_VALUE(dumpState->args[2]); + } } TcConnectionrec *regTcConnectionrec = tcConnectionrec; diff --git a/storage/ndb/src/kernel/blocks/dbtc/Dbtc.hpp b/storage/ndb/src/kernel/blocks/dbtc/Dbtc.hpp index 419754e2d08..6091b8f1aee 100644 --- a/storage/ndb/src/kernel/blocks/dbtc/Dbtc.hpp +++ b/storage/ndb/src/kernel/blocks/dbtc/Dbtc.hpp @@ -727,7 +727,7 @@ public: // Index data - bool isIndexOp; // Used to mark on-going TcKeyReq as indx table access + Uint8 isIndexOp; // Used to mark on-going TcKeyReq as indx table access bool indexOpReturn; UintR noIndexOp; // No outstanding index ops @@ -815,7 +815,7 @@ public: UintR savedState[LqhKeyConf::SignalLength]; // Index data - bool isIndexOp; // Used to mark on-going TcKeyReq as index table access + Uint8 isIndexOp; // Used to mark on-going TcKeyReq as index table access UintR indexOp; UintR currentIndexId; UintR attrInfoLen; diff --git a/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp b/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp index 28ff20e74ef..18cb404fc8e 100644 --- a/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp +++ b/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp @@ -1776,8 +1776,7 @@ void Dbtc::execKEYINFO(Signal* signal) apiConnectptr.i = signal->theData[0]; tmaxData = 20; if (apiConnectptr.i >= capiConnectFilesize) { - jam(); - warningHandlerLab(signal, __LINE__); + TCKEY_abort(signal, 18); return; }//if ptrAss(apiConnectptr, apiConnectRecord); @@ -1786,9 +1785,7 @@ void Dbtc::execKEYINFO(Signal* signal) compare_transid2 = apiConnectptr.p->transid[1] ^ signal->theData[2]; compare_transid1 = compare_transid1 | compare_transid2; if (compare_transid1 != 0) { - jam(); - printState(signal, 10); - sendSignalErrorRefuseLab(signal); + TCKEY_abort(signal, 19); return; }//if switch (apiConnectptr.p->apiConnectstate) { @@ -2533,7 +2530,7 @@ void Dbtc::execTCKEYREQ(Signal* signal) Uint32 TstartFlag = tcKeyReq->getStartFlag(Treqinfo); Uint32 TexecFlag = TcKeyReq::getExecuteFlag(Treqinfo); - bool isIndexOp = regApiPtr->isIndexOp; + Uint8 isIndexOp = regApiPtr->isIndexOp; bool isIndexOpReturn = regApiPtr->indexOpReturn; regApiPtr->isIndexOp = false; // Reset marker regApiPtr->m_exec_flag |= TexecFlag; @@ -3267,7 +3264,7 @@ void Dbtc::sendlqhkeyreq(Signal* signal, sig1 = regCachePtr->fragmentid + (regTcPtr->tcNodedata[1] << 16); sig2 = regApiPtr->transid[0]; sig3 = regApiPtr->transid[1]; - sig4 = regApiPtr->ndbapiBlockref; + sig4 = (regTcPtr->isIndexOp == 2) ? reference() : regApiPtr->ndbapiBlockref; sig5 = regTcPtr->clientData; sig6 = regCachePtr->scanInfo; @@ -8629,6 +8626,7 @@ void Dbtc::execSCAN_TABREQ(Signal* signal) // left over from simple/dirty read } else { jam(); + jamLine(transP->apiConnectstate); errCode = ZSTATE_ERROR; goto SCAN_TAB_error_no_state_change; } @@ -12053,14 +12051,18 @@ void Dbtc::readIndexTable(Signal* signal, opType == ZREAD ? ZREAD : ZREAD_EX); TcKeyReq::setAIInTcKeyReq(tcKeyRequestInfo, 1); // Allways send one AttrInfo TcKeyReq::setExecutingTrigger(tcKeyRequestInfo, 0); - BlockReference originalReceiver = regApiPtr->ndbapiBlockref; - regApiPtr->ndbapiBlockref = reference(); // Send result to me tcKeyReq->senderData = indexOp->indexOpId; indexOp->indexOpState = IOS_INDEX_ACCESS; regApiPtr->executingIndexOp = regApiPtr->accumulatingIndexOp; regApiPtr->accumulatingIndexOp = RNIL; - regApiPtr->isIndexOp = true; + regApiPtr->isIndexOp = 2; + if (ERROR_INSERTED(8037)) + { + ndbout_c("shifting index version"); + tcKeyReq->tableSchemaVersion = ~(Uint32)indexOp->tcIndxReq.tableSchemaVersion; + } + Uint32 remainingKey = indexOp->keyInfo.getSize(); bool moreKeyData = indexOp->keyInfo.first(keyIter); // *********** KEYINFO in TCKEYREQ *********** @@ -12079,21 +12081,13 @@ void Dbtc::readIndexTable(Signal* signal, ndbassert(TcKeyReq::getDirtyFlag(tcKeyRequestInfo) == 0); ndbassert(TcKeyReq::getSimpleFlag(tcKeyRequestInfo) == 0); EXECUTE_DIRECT(DBTC, GSN_TCKEYREQ, signal, tcKeyLength); - - /** - * "Fool" TC not to start commiting transaction since it always will - * have one outstanding lqhkeyreq - * This is later decreased when the index read is complete - */ - regApiPtr->lqhkeyreqrec++; + jamEntry(); - /** - * Remember ptr to index read operation - * (used to set correct save point id on index operation later) - */ - indexOp->indexReadTcConnect = regApiPtr->lastTcConnect; + if (unlikely(regApiPtr->apiConnectstate == CS_ABORTING)) + { + goto err; + } - jamEntry(); // *********** KEYINFO *********** if (moreKeyData) { jam(); @@ -12113,6 +12107,10 @@ void Dbtc::readIndexTable(Signal* signal, EXECUTE_DIRECT(DBTC, GSN_KEYINFO, signal, KeyInfo::HeaderLength + KeyInfo::DataLength); jamEntry(); + if (unlikely(regApiPtr->apiConnectstate == CS_ABORTING)) + { + goto err; + } dataPos = 0; dataPtr = (Uint32 *) &keyInfo->keyData; } @@ -12123,10 +12121,32 @@ void Dbtc::readIndexTable(Signal* signal, EXECUTE_DIRECT(DBTC, GSN_KEYINFO, signal, KeyInfo::HeaderLength + dataPos); jamEntry(); + if (unlikely(regApiPtr->apiConnectstate == CS_ABORTING)) + { + goto err; + } } } - regApiPtr->ndbapiBlockref = originalReceiver; // reset original receiver + /** + * "Fool" TC not to start commiting transaction since it always will + * have one outstanding lqhkeyreq + * This is later decreased when the index read is complete + */ + regApiPtr->lqhkeyreqrec++; + + /** + * Remember ptr to index read operation + * (used to set correct save point id on index operation later) + */ + indexOp->indexReadTcConnect = regApiPtr->lastTcConnect; + +done: + return; + +err: + jam(); + goto done; } /** @@ -12181,7 +12201,7 @@ void Dbtc::executeIndexOperation(Signal* signal, tcKeyReq->transId2 = regApiPtr->transid[1]; tcKeyReq->senderData = tcIndxReq->senderData; // Needed for TRANSID_AI to API indexOp->indexOpState = IOS_INDEX_OPERATION; - regApiPtr->isIndexOp = true; + regApiPtr->isIndexOp = 1; regApiPtr->executingIndexOp = indexOp->indexOpId;; regApiPtr->noIndexOp++; // Increase count @@ -12262,9 +12282,16 @@ void Dbtc::executeIndexOperation(Signal* signal, const Uint32 currSavePointId = regApiPtr->currSavePointId; regApiPtr->currSavePointId = tmp.p->savePointId; EXECUTE_DIRECT(DBTC, GSN_TCKEYREQ, signal, tcKeyLength); + jamEntry(); + + if (unlikely(regApiPtr->apiConnectstate == CS_ABORTING)) + { + jam(); + return; + } + regApiPtr->currSavePointId = currSavePointId; - jamEntry(); // *********** KEYINFO *********** if (moreKeyData) { jam(); @@ -12285,6 +12312,13 @@ void Dbtc::executeIndexOperation(Signal* signal, EXECUTE_DIRECT(DBTC, GSN_KEYINFO, signal, KeyInfo::HeaderLength + KeyInfo::DataLength); jamEntry(); + + if (unlikely(regApiPtr->apiConnectstate == CS_ABORTING)) + { + jam(); + return; + } + dataPos = 0; dataPtr = (Uint32 *) &keyInfo->keyData; } @@ -12295,6 +12329,12 @@ void Dbtc::executeIndexOperation(Signal* signal, EXECUTE_DIRECT(DBTC, GSN_KEYINFO, signal, KeyInfo::HeaderLength + dataPos); jamEntry(); + + if (unlikely(regApiPtr->apiConnectstate == CS_ABORTING)) + { + jam(); + return; + } } } @@ -12324,6 +12364,13 @@ void Dbtc::executeIndexOperation(Signal* signal, EXECUTE_DIRECT(DBTC, GSN_ATTRINFO, signal, AttrInfo::HeaderLength + AttrInfo::DataLength); jamEntry(); + + if (unlikely(regApiPtr->apiConnectstate == CS_ABORTING)) + { + jam(); + return; + } + attrInfoPos = 0; dataPtr = (Uint32 *) &attrInfo->attrData; } @@ -12731,9 +12778,16 @@ void Dbtc::insertIntoIndexTable(Signal* signal, const Uint32 currSavePointId = regApiPtr->currSavePointId; regApiPtr->currSavePointId = opRecord->savePointId; EXECUTE_DIRECT(DBTC, GSN_TCKEYREQ, signal, tcKeyLength); + jamEntry(); + + if (unlikely(regApiPtr->apiConnectstate == CS_ABORTING)) + { + jam(); + return; + } + regApiPtr->currSavePointId = currSavePointId; tcConnectptr.p->currentIndexId = indexData->indexId; - jamEntry(); // *********** KEYINFO *********** if (moreKeyData) { @@ -12763,6 +12817,12 @@ void Dbtc::insertIntoIndexTable(Signal* signal, KeyInfo::HeaderLength + KeyInfo::DataLength); jamEntry(); #endif + if (unlikely(regApiPtr->apiConnectstate == CS_ABORTING)) + { + jam(); + return; + } + dataPtr = (Uint32 *) &keyInfo->keyData; dataPos = 0; } @@ -12798,6 +12858,13 @@ void Dbtc::insertIntoIndexTable(Signal* signal, KeyInfo::HeaderLength + KeyInfo::DataLength); jamEntry(); #endif + + if (unlikely(regApiPtr->apiConnectstate == CS_ABORTING)) + { + jam(); + return; + } + dataPtr = (Uint32 *) &keyInfo->keyData; dataPos = 0; } @@ -12815,6 +12882,11 @@ void Dbtc::insertIntoIndexTable(Signal* signal, KeyInfo::HeaderLength + dataPos); jamEntry(); #endif + if (unlikely(regApiPtr->apiConnectstate == CS_ABORTING)) + { + jam(); + return; + } } } @@ -12850,6 +12922,12 @@ void Dbtc::insertIntoIndexTable(Signal* signal, AttrInfo::HeaderLength + AttrInfo::DataLength); jamEntry(); #endif + if (unlikely(regApiPtr->apiConnectstate == CS_ABORTING)) + { + jam(); + return; + } + dataPtr = (Uint32 *) &attrInfo->attrData; attrInfoPos = 0; } @@ -12909,6 +12987,12 @@ void Dbtc::insertIntoIndexTable(Signal* signal, AttrInfo::HeaderLength + AttrInfo::DataLength); jamEntry(); #endif + if (unlikely(regApiPtr->apiConnectstate == CS_ABORTING)) + { + jam(); + return; + } + dataPtr = (Uint32 *) &attrInfo->attrData; attrInfoPos = 0; } @@ -13054,9 +13138,16 @@ void Dbtc::deleteFromIndexTable(Signal* signal, const Uint32 currSavePointId = regApiPtr->currSavePointId; regApiPtr->currSavePointId = opRecord->savePointId; EXECUTE_DIRECT(DBTC, GSN_TCKEYREQ, signal, tcKeyLength); + jamEntry(); + + if (unlikely(regApiPtr->apiConnectstate == CS_ABORTING)) + { + jam(); + return; + } + regApiPtr->currSavePointId = currSavePointId; tcConnectptr.p->currentIndexId = indexData->indexId; - jamEntry(); // *********** KEYINFO *********** if (moreKeyData) { @@ -13087,6 +13178,12 @@ void Dbtc::deleteFromIndexTable(Signal* signal, KeyInfo::HeaderLength + KeyInfo::DataLength); jamEntry(); #endif + if (unlikely(regApiPtr->apiConnectstate == CS_ABORTING)) + { + jam(); + return; + } + dataPtr = (Uint32 *) &keyInfo->keyData; dataPos = 0; } @@ -13123,6 +13220,12 @@ void Dbtc::deleteFromIndexTable(Signal* signal, KeyInfo::HeaderLength + KeyInfo::DataLength); jamEntry(); #endif + if (unlikely(regApiPtr->apiConnectstate == CS_ABORTING)) + { + jam(); + return; + } + dataPtr = (Uint32 *) &keyInfo->keyData; dataPos = 0; } diff --git a/storage/ndb/src/kernel/blocks/dbtup/DbtupDebug.cpp b/storage/ndb/src/kernel/blocks/dbtup/DbtupDebug.cpp index e25e329ea9a..c433337e8b8 100644 --- a/storage/ndb/src/kernel/blocks/dbtup/DbtupDebug.cpp +++ b/storage/ndb/src/kernel/blocks/dbtup/DbtupDebug.cpp @@ -81,7 +81,7 @@ void Dbtup::execDUMP_STATE_ORD(Signal* signal) { Uint32 type = signal->theData[0]; - if(type == DumpStateOrd::DumpPageMemory){ + if(type == DumpStateOrd::DumpPageMemory && signal->getLength() == 1){ reportMemoryUsage(signal, 0); return; } diff --git a/storage/ndb/src/kernel/blocks/dbtup/DbtupExecQuery.cpp b/storage/ndb/src/kernel/blocks/dbtup/DbtupExecQuery.cpp index 6cde3e51e5a..dc1b194eed2 100644 --- a/storage/ndb/src/kernel/blocks/dbtup/DbtupExecQuery.cpp +++ b/storage/ndb/src/kernel/blocks/dbtup/DbtupExecQuery.cpp @@ -938,7 +938,7 @@ int Dbtup::handleReadReq(Signal* signal, dst, dstLen, false); - if (ret != -1) { + if (likely(ret != -1)) { /* ------------------------------------------------------------------------- */ // We have read all data into coutBuffer. Now send it to the API. /* ------------------------------------------------------------------------- */ @@ -948,16 +948,17 @@ int Dbtup::handleReadReq(Signal* signal, sendReadAttrinfo(signal, req_struct, TnoOfDataRead, regOperPtr); return 0; } - jam(); - tupkeyErrorLab(signal); - return -1; } else { jam(); - if (interpreterStartLab(signal, req_struct) != -1) { + if (likely(interpreterStartLab(signal, req_struct) != -1)) { return 0; } return -1; } + + jam(); + tupkeyErrorLab(signal); + return -1; } /* ---------------------------------------------------------------- */ @@ -1046,9 +1047,10 @@ int Dbtup::handleUpdateReq(Signal* signal, req_struct->attrinfo_len); } else { jam(); - retValue= interpreterStartLab(signal, req_struct); + if (unlikely(interpreterStartLab(signal, req_struct) == -1)) + return -1; } - + if (retValue == -1) { goto error; } @@ -1675,6 +1677,9 @@ int Dbtup::interpreterStartLab(Signal* signal, RlogSize= TnoDataRW; } else { jam(); + /** + * TUPKEY REF is sent from within interpreter + */ return -1; } } diff --git a/storage/ndb/src/kernel/blocks/dbtup/DbtupPagMan.cpp b/storage/ndb/src/kernel/blocks/dbtup/DbtupPagMan.cpp index 487c55c0d49..7478c0c20b8 100644 --- a/storage/ndb/src/kernel/blocks/dbtup/DbtupPagMan.cpp +++ b/storage/ndb/src/kernel/blocks/dbtup/DbtupPagMan.cpp @@ -181,24 +181,28 @@ void Dbtup::allocConsPages(Uint32 noOfPagesToAllocate, /* PROPER AMOUNT OF PAGES WERE NOT FOUND. FIND AS MUCH AS */ /* POSSIBLE. */ /* ---------------------------------------------------------------- */ - for (Uint32 j = firstListToCheck; (Uint32)~j; j--) { + if (firstListToCheck) + { ljam(); - if (cfreepageList[j] != RNIL) { + for (Uint32 j = firstListToCheck - 1; (Uint32)~j; j--) { ljam(); + if (cfreepageList[j] != RNIL) { + ljam(); /* ---------------------------------------------------------------- */ /* SOME AREA WAS FOUND, ALLOCATE ALL OF IT. */ /* ---------------------------------------------------------------- */ - allocPageRef = cfreepageList[j]; - removeCommonArea(allocPageRef, j); - noOfPagesAllocated = 1 << j; - findFreeLeftNeighbours(allocPageRef, noOfPagesAllocated, - noOfPagesToAllocate); - findFreeRightNeighbours(allocPageRef, noOfPagesAllocated, - noOfPagesToAllocate); - - return; - }//if - }//for + allocPageRef = cfreepageList[j]; + removeCommonArea(allocPageRef, j); + noOfPagesAllocated = 1 << j; + findFreeLeftNeighbours(allocPageRef, noOfPagesAllocated, + noOfPagesToAllocate); + findFreeRightNeighbours(allocPageRef, noOfPagesAllocated, + noOfPagesToAllocate); + + return; + }//if + }//for + } /* ---------------------------------------------------------------- */ /* NO FREE AREA AT ALL EXISTED. RETURN ZERO PAGES */ /* ---------------------------------------------------------------- */ diff --git a/storage/ndb/src/kernel/blocks/dbtup/DbtupPageMap.cpp b/storage/ndb/src/kernel/blocks/dbtup/DbtupPageMap.cpp index 82bac432545..c7c1845e076 100644 --- a/storage/ndb/src/kernel/blocks/dbtup/DbtupPageMap.cpp +++ b/storage/ndb/src/kernel/blocks/dbtup/DbtupPageMap.cpp @@ -431,12 +431,12 @@ void Dbtup::allocMoreFragPages(Fragrecord* const regFragPtr) Uint32 noAllocPages = regFragPtr->noOfPagesToGrow >> 3; // 12.5% noAllocPages += regFragPtr->noOfPagesToGrow >> 4; // 6.25% noAllocPages += 2; - regFragPtr->noOfPagesToGrow += noAllocPages; /* -----------------------------------------------------------------*/ // We will grow by 18.75% plus two more additional pages to grow // a little bit quicker in the beginning. /* -----------------------------------------------------------------*/ - allocFragPages(regFragPtr, noAllocPages); + Uint32 allocated = allocFragPages(regFragPtr, noAllocPages); + regFragPtr->noOfPagesToGrow += allocated; }//Dbtup::allocMoreFragPages() Uint32 Dbtup::leafPageRangeFull(Fragrecord* const regFragPtr, PageRangePtr currPageRangePtr) diff --git a/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp b/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp index c3140bea25b..249a0177b3f 100644 --- a/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp +++ b/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp @@ -199,10 +199,9 @@ void Ndbcntr::execSYSTEM_ERROR(Signal* signal) case SystemError::CopyFragRefError: BaseString::snprintf(buf, sizeof(buf), - "Node %d killed this node because " - "it could not copy a fragment during node restart. " - "Copy fragment error code: %u.", - killingNode, data1); + "Killed by node %d as " + "copyfrag failed, error: %u", + killingNode, data1); break; case SystemError::StartFragRefError: @@ -2086,6 +2085,11 @@ void Ndbcntr::execSET_VAR_REQ(Signal* signal) { void Ndbcntr::updateNodeState(Signal* signal, const NodeState& newState) const{ NodeStateRep * const stateRep = (NodeStateRep *)&signal->theData[0]; + if (newState.startLevel == NodeState::SL_STARTED) + { + CRASH_INSERTION(1000); + } + stateRep->nodeState = newState; stateRep->nodeState.masterNodeId = cmasterNodeId; stateRep->nodeState.setNodeGroup(c_nodeGroup); @@ -2890,7 +2894,7 @@ void Ndbcntr::Missra::sendNextSTTOR(Signal* signal){ cntr.sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB); } } - + signal->theData[0] = NDB_LE_NDBStartCompleted; signal->theData[1] = NDB_VERSION; cntr.sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB); diff --git a/storage/ndb/src/kernel/blocks/ndbfs/AsyncFile.cpp b/storage/ndb/src/kernel/blocks/ndbfs/AsyncFile.cpp index a3b6104a059..82554746a0f 100644 --- a/storage/ndb/src/kernel/blocks/ndbfs/AsyncFile.cpp +++ b/storage/ndb/src/kernel/blocks/ndbfs/AsyncFile.cpp @@ -219,6 +219,10 @@ AsyncFile::run() case Request:: append: appendReq(request); break; + case Request:: append_synch: + appendReq(request); + syncReq(request); + break; case Request::rmrf: rmrfReq(request, (char*)theFileName.c_str(), request->par.rmrf.own_directory); break; @@ -240,15 +244,14 @@ AsyncFile::run() }//while }//AsyncFile::run() -extern bool Global_useO_SYNC; -extern bool Global_unlinkO_CREAT; -extern Uint32 Global_syncFreq; +#ifdef O_DIRECT +static char g_odirect_readbuf[2*GLOBAL_PAGE_SIZE -1]; +#endif void AsyncFile::openReq(Request* request) { - m_openedWithSync = false; - m_syncFrequency = 0; - m_syncCount= 0; + m_auto_sync_freq = 0; + m_write_wo_sync = 0; // for open.flags, see signal FSOPENREQ #ifdef NDB_WIN32 @@ -256,7 +259,7 @@ void AsyncFile::openReq(Request* request) DWORD dwDesiredAccess = 0; DWORD dwShareMode = FILE_SHARE_READ | FILE_SHARE_WRITE; DWORD dwFlagsAndAttributes = FILE_ATTRIBUTE_NORMAL | FILE_FLAG_RANDOM_ACCESS | FILE_FLAG_NO_BUFFERING; - const Uint32 flags = request->par.open.flags; + Uint32 flags = request->par.open.flags; // Convert file open flags from Solaris to Windows if ((flags & FsOpenReq::OM_CREATE) && (flags & FsOpenReq::OM_TRUNCATE)){ @@ -309,7 +312,7 @@ void AsyncFile::openReq(Request* request) return; } #else - const Uint32 flags = request->par.open.flags; + Uint32 flags = request->par.open.flags; Uint32 new_flags = 0; // Convert file open flags from Solaris to Liux @@ -329,29 +332,31 @@ void AsyncFile::openReq(Request* request) if (flags & FsOpenReq::OM_AUTOSYNC) { - m_syncFrequency = 1024*1024; // Hard coded to 1M + m_auto_sync_freq = request->par.open.auto_sync_size; } if (flags & FsOpenReq::OM_APPEND){ new_flags |= O_APPEND; } - if ((flags & FsOpenReq::OM_SYNC) && ! (flags & FsOpenReq::OM_INIT)) - { -#ifdef O_SYNC - new_flags |= O_SYNC; -#endif - } - -//#ifndef NDB_NO_O_DIRECT /* to allow tmpfs */ -#ifdef O_DIRECT if (flags & FsOpenReq::OM_DIRECT) +#ifdef O_DIRECT { new_flags |= O_DIRECT; } +#elif defined O_SYNC + { + flags |= FsOpenReq::OM_SYNC; + } #endif -//#endif + if ((flags & FsOpenReq::OM_SYNC) && ! (flags & FsOpenReq::OM_INIT)) + { +#ifdef O_SYNC + new_flags |= O_SYNC; +#endif + } + switch(flags & 0x3){ case FsOpenReq::OM_READONLY: new_flags |= O_RDONLY; @@ -400,6 +405,11 @@ no_odirect: if (new_flags & O_DIRECT) { new_flags &= ~O_DIRECT; + flags |= FsOpenReq::OM_SYNC; +#ifdef O_SYNC + if (! (flags & FsOpenReq::OM_INIT)) + new_flags |= O_SYNC; +#endif goto no_odirect; } #endif @@ -412,6 +422,11 @@ no_odirect: else if (new_flags & O_DIRECT) { new_flags &= ~O_DIRECT; + flags |= FsOpenReq::OM_SYNC; +#ifdef O_SYNC + if (! (flags & FsOpenReq::OM_INIT)) + new_flags |= O_SYNC; +#endif goto no_odirect; } #endif @@ -429,7 +444,7 @@ no_odirect: { request->error = errno; } - else if(buf.st_size != request->par.open.file_size) + else if((Uint64)buf.st_size != request->par.open.file_size) { request->error = FsRef::fsErrInvalidFileSize; } @@ -486,6 +501,7 @@ no_odirect: { ndbout_c("error on first write(%d), disable O_DIRECT", err); new_flags &= ~O_DIRECT; + flags |= FsOpenReq::OM_SYNC; close(theFd); theFd = ::open(theFileName.c_str(), new_flags, mode); if (theFd != -1) @@ -502,18 +518,40 @@ no_odirect: if(lseek(theFd, 0, SEEK_SET) != 0) request->error = errno; } + else if (flags & FsOpenReq::OM_DIRECT) + { +#ifdef O_DIRECT + do { + int ret; + char * bufptr = (char*)((UintPtr(g_odirect_readbuf)+(GLOBAL_PAGE_SIZE - 1)) & ~(GLOBAL_PAGE_SIZE - 1)); + while (((ret = ::read(theFd, bufptr, GLOBAL_PAGE_SIZE)) == -1) && (errno == EINTR)); + if (ret == -1) + { + ndbout_c("%s Failed to read using O_DIRECT, disabling", theFileName.c_str()); + flags |= FsOpenReq::OM_SYNC; + flags |= FsOpenReq::OM_INIT; + break; + } + if(lseek(theFd, 0, SEEK_SET) != 0) + { + request->error = errno; + return; + } + } while (0); +#endif + } if ((flags & FsOpenReq::OM_SYNC) && (flags & FsOpenReq::OM_INIT)) { +#ifdef O_SYNC /** * reopen file with O_SYNC */ close(theFd); new_flags &= ~(O_CREAT | O_TRUNC); -#ifdef O_SYNC new_flags |= O_SYNC; -#endif theFd = ::open(theFileName.c_str(), new_flags, mode); +#endif } #endif } @@ -737,6 +775,10 @@ AsyncFile::writeReq( Request * request) return; } } // while(write_not_complete) + + if(m_auto_sync_freq && m_write_wo_sync > m_auto_sync_freq){ + syncReq(request); + } } int @@ -746,6 +788,8 @@ AsyncFile::writeBuffer(const char * buf, size_t size, off_t offset, size_t bytes_to_write = chunk_size; int return_value; + m_write_wo_sync += size; + #ifdef NDB_WIN32 DWORD dwSFP = SetFilePointer(hFile, offset, 0, FILE_BEGIN); if(dwSFP != offset) { @@ -805,7 +849,6 @@ AsyncFile::writeBuffer(const char * buf, size_t size, off_t offset, } #endif - m_syncCount+= bytes_written; buf += bytes_written; size -= bytes_written; offset += bytes_written; @@ -856,8 +899,7 @@ bool AsyncFile::isOpen(){ void AsyncFile::syncReq(Request * request) { - if(m_openedWithSync || - m_syncCount == 0){ + if(m_auto_sync_freq && m_write_wo_sync == 0){ return; } #ifdef NDB_WIN32 @@ -871,7 +913,7 @@ AsyncFile::syncReq(Request * request) return; } #endif - m_syncCount = 0; + m_write_wo_sync = 0; } void @@ -880,7 +922,7 @@ AsyncFile::appendReq(Request * request){ const char * buf = request->par.append.buf; Uint32 size = request->par.append.size; - m_syncCount += size; + m_write_wo_sync += size; #ifdef NDB_WIN32 DWORD dwWritten = 0; @@ -912,7 +954,7 @@ AsyncFile::appendReq(Request * request){ } #endif - if(m_syncFrequency != 0 && m_syncCount > m_syncFrequency){ + if(m_auto_sync_freq && m_write_wo_sync > m_auto_sync_freq){ syncReq(request); } } diff --git a/storage/ndb/src/kernel/blocks/ndbfs/AsyncFile.hpp b/storage/ndb/src/kernel/blocks/ndbfs/AsyncFile.hpp index 22364ae32d7..007560de7c6 100644 --- a/storage/ndb/src/kernel/blocks/ndbfs/AsyncFile.hpp +++ b/storage/ndb/src/kernel/blocks/ndbfs/AsyncFile.hpp @@ -123,6 +123,7 @@ public: sync, end, append, + append_synch, rmrf, readPartial }; @@ -132,6 +133,7 @@ public: Uint32 flags; Uint32 page_size; Uint64 file_size; + Uint32 auto_sync_size; } open; struct { int numberOfPages; @@ -232,9 +234,8 @@ private: int theWriteBufferSize; char* theWriteBuffer; - bool m_openedWithSync; - Uint32 m_syncCount; - Uint32 m_syncFrequency; + size_t m_write_wo_sync; // Writes wo/ sync + size_t m_auto_sync_freq; // Auto sync freq in bytes public: SimulatedBlock& m_fs; Ptr<GlobalPage> m_page_ptr; diff --git a/storage/ndb/src/kernel/blocks/ndbfs/Filename.cpp b/storage/ndb/src/kernel/blocks/ndbfs/Filename.cpp index 91038175e3c..06d68e7ad77 100644 --- a/storage/ndb/src/kernel/blocks/ndbfs/Filename.cpp +++ b/storage/ndb/src/kernel/blocks/ndbfs/Filename.cpp @@ -149,7 +149,8 @@ Filename::set(Filename::NameSpec& spec, { Uint32 tableId = FsOpenReq::v5_getTableId(filenumber); Uint32 lcpNo = FsOpenReq::v5_getLcpNo(filenumber); - BaseString::snprintf(buf, sizeof(buf), "LCP/%d/T%d", lcpNo, tableId); + Uint32 fragId = FsOpenReq::v5_getFragmentId(filenumber); + BaseString::snprintf(buf, sizeof(buf), "LCP/%d/T%dF%d", lcpNo, tableId, fragId); strcat(theName, buf); break; } diff --git a/storage/ndb/src/kernel/blocks/ndbfs/Ndbfs.cpp b/storage/ndb/src/kernel/blocks/ndbfs/Ndbfs.cpp index da2efab5792..362a462b081 100644 --- a/storage/ndb/src/kernel/blocks/ndbfs/Ndbfs.cpp +++ b/storage/ndb/src/kernel/blocks/ndbfs/Ndbfs.cpp @@ -230,6 +230,7 @@ Ndbfs::execFSOPENREQ(Signal* signal) request->par.open.file_size = fsOpenReq->file_size_hi; request->par.open.file_size <<= 32; request->par.open.file_size |= fsOpenReq->file_size_lo; + request->par.open.auto_sync_size = fsOpenReq->auto_sync_size; ndbrequire(forward(file, request)); } @@ -567,6 +568,7 @@ Ndbfs::execFSAPPENDREQ(Signal * signal) const Uint32 tSz = myBaseAddrRef->nrr; const Uint32 offset = fsReq->offset; const Uint32 size = fsReq->size; + const Uint32 synch_flag = fsReq->synch_flag; Request *request = theRequestPool->get(); if (openFile == NULL) { @@ -596,12 +598,15 @@ Ndbfs::execFSAPPENDREQ(Signal * signal) request->error = 0; request->set(userRef, userPointer, filePointer); request->file = openFile; - request->action = Request::append; request->theTrace = signal->getTrace(); request->par.append.buf = (const char *)(tWA + offset); request->par.append.size = size << 2; - + + if (!synch_flag) + request->action = Request::append; + else + request->action = Request::append_synch; ndbrequire(forward(openFile, request)); return; @@ -744,7 +749,9 @@ Ndbfs::report(Request * request, Signal* signal) sendSignal(ref, GSN_FSSYNCREF, signal, FsRef::SignalLength, JBB); break; } - case Request::append: { + case Request::append: + case Request::append_synch: + { jam(); sendSignal(ref, GSN_FSAPPENDREF, signal, FsRef::SignalLength, JBB); break; @@ -814,7 +821,9 @@ Ndbfs::report(Request * request, Signal* signal) sendSignal(ref, GSN_FSSYNCCONF, signal, 1, JBB); break; }//case - case Request::append: { + case Request::append: + case Request::append_synch: + { jam(); signal->theData[1] = request->par.append.size; sendSignal(ref, GSN_FSAPPENDCONF, signal, 2, JBB); @@ -1053,27 +1062,10 @@ Ndbfs::execCONTINUEB(Signal* signal) return; } -bool Global_useO_SYNC = true; -bool Global_unlinkO_CREAT = false; -Uint32 Global_syncFreq = 1024 * 1024; - void Ndbfs::execDUMP_STATE_ORD(Signal* signal) { if(signal->theData[0] == 19){ - if(signal->length() > 1){ - Global_useO_SYNC = signal->theData[1]; - } - if(signal->length() > 2){ - Global_syncFreq = signal->theData[2] * 1024 * 1024; - } - if(signal->length() > 3){ - Global_unlinkO_CREAT = signal->theData[3]; - } - ndbout_c("useO_SYNC = %d syncFreq = %d unlinkO_CREATE = %d", - Global_useO_SYNC, - Global_syncFreq, - Global_unlinkO_CREAT); return; } if(signal->theData[0] == DumpStateOrd::NdbfsDumpFileStat){ diff --git a/storage/ndb/src/kernel/blocks/pgman.cpp b/storage/ndb/src/kernel/blocks/pgman.cpp index addbd5e4ba2..15f056f70a9 100644 --- a/storage/ndb/src/kernel/blocks/pgman.cpp +++ b/storage/ndb/src/kernel/blocks/pgman.cpp @@ -442,13 +442,15 @@ Pgman::release_page_entry(Ptr<Page_entry>& ptr) #endif Page_state state = ptr.p->m_state; - ndbrequire(! (state & Page_entry::REQUEST)); ndbrequire(ptr.p->m_requests.isEmpty()); ndbrequire(! (state & Page_entry::ONSTACK)); ndbrequire(! (state & Page_entry::ONQUEUE)); ndbrequire(ptr.p->m_real_page_i == RNIL); + if (! (state & Page_entry::LOCKED)) + ndbrequire(! (state & Page_entry::REQUEST)); + set_page_state(ptr, 0); m_page_hashlist.remove(ptr); m_page_entry_pool.release(ptr); @@ -1476,7 +1478,7 @@ Pgman::fsreadreq(Signal* signal, Ptr<Page_entry> ptr) FsReadWriteReq::fsFormatGlobalPage); req->data.pageData[0] = ptr.p->m_real_page_i; sendSignal(NDBFS_REF, GSN_FSREADREQ, signal, - FsReadWriteReq::FixedLength + 1, JBB); + FsReadWriteReq::FixedLength + 1, JBA); } void @@ -1518,8 +1520,19 @@ Pgman::fswritereq(Signal* signal, Ptr<Page_entry> ptr) FsReadWriteReq::setFormatFlag(req->operationFlag, FsReadWriteReq::fsFormatGlobalPage); req->data.pageData[0] = ptr.p->m_real_page_i; + +#if ERROR_INSERT_CODE + if (ptr.p->m_state & Page_entry::LOCKED) + { + sendSignalWithDelay(NDBFS_REF, GSN_FSWRITEREQ, signal, + 3000, FsReadWriteReq::FixedLength + 1); + ndbout_c("pageout locked (3s)"); + return; + } +#endif + sendSignal(NDBFS_REF, GSN_FSWRITEREQ, signal, - FsReadWriteReq::FixedLength + 1, JBB); + FsReadWriteReq::FixedLength + 1, JBA); } void @@ -1635,8 +1648,8 @@ Pgman::get_page(Signal* signal, Ptr<Page_entry> ptr, Page_request page_req) return ptr.p->m_real_page_i; } } - - if (! (req_flags & Page_request::LOCK_PAGE)) + + if (! (req_flags & (Page_request::LOCK_PAGE | Page_request::UNLOCK_PAGE))) { ndbrequire(! (state & Page_entry::LOCKED)); } @@ -1675,7 +1688,7 @@ Pgman::get_page(Signal* signal, Ptr<Page_entry> ptr, Page_request page_req) if (req_flags & Page_request::UNLOCK_PAGE) { - state &= ~ Page_entry::LOCKED; + // keep it locked } ptr.p->m_busy_count += busy_count; diff --git a/storage/ndb/src/kernel/blocks/restore.cpp b/storage/ndb/src/kernel/blocks/restore.cpp index a8b3ebb17f4..cf9dd5937fc 100644 --- a/storage/ndb/src/kernel/blocks/restore.cpp +++ b/storage/ndb/src/kernel/blocks/restore.cpp @@ -264,7 +264,7 @@ Restore::init_file(const RestoreLcpReq* req, FilePtr file_ptr) file_ptr.p->m_status = File::FIRST_READ; file_ptr.p->m_table_id = req->tableId; - file_ptr.p->m_fragment_id = RNIL; + file_ptr.p->m_fragment_id = req->fragmentId; file_ptr.p->m_table_version = RNIL; file_ptr.p->m_bytes_left = 0; // Bytes read from FS @@ -361,6 +361,7 @@ Restore::open_file(Signal* signal, FilePtr file_ptr, Uint32 lcpNo) FsOpenReq::setSuffix(req->fileNumber, FsOpenReq::S_DATA); FsOpenReq::v5_setLcpNo(req->fileNumber, lcpNo); FsOpenReq::v5_setTableId(req->fileNumber, file_ptr.p->m_table_id); + FsOpenReq::v5_setFragmentId(req->fileNumber, file_ptr.p->m_fragment_id); sendSignal(NDBFS_REF, GSN_FSOPENREQ, signal, FsOpenReq::SignalLength, JBA); } @@ -475,6 +476,11 @@ Restore::restore_next(Signal* signal, FilePtr file_ptr) */ ndbout_c("records: %d len: %x left: %d", status & File::READING_RECORDS, 4*len, left); + + if (unlikely((status & File:: FILE_THREAD_RUNNING) == 0)) + { + ndbrequire(false); + } len= 0; break; } diff --git a/storage/ndb/src/kernel/blocks/suma/Suma.cpp b/storage/ndb/src/kernel/blocks/suma/Suma.cpp index 2b746fdbdd8..4b38ac0f5ff 100644 --- a/storage/ndb/src/kernel/blocks/suma/Suma.cpp +++ b/storage/ndb/src/kernel/blocks/suma/Suma.cpp @@ -2642,12 +2642,16 @@ Suma::sendSubStopComplete(Signal* signal, SubscriberPtr subbPtr) SubTableData * data = (SubTableData*)signal->getDataPtrSend(); data->gci = m_last_complete_gci + 1; // XXX ??? data->tableId = 0; - data->operation = NdbDictionary::Event::_TE_STOP; + data->requestInfo = 0; + SubTableData::setOperation(data->requestInfo, + NdbDictionary::Event::_TE_STOP); + SubTableData::setNdbdNodeId(data->requestInfo, + getOwnNodeId()); data->senderData = subbPtr.p->m_senderData; sendSignal(subbPtr.p->m_senderRef, GSN_SUB_TABLE_DATA, signal, SubTableData::SignalLength, JBB); } - + SubStopConf * const conf = (SubStopConf*)signal->getDataPtrSend(); conf->senderRef= reference(); @@ -2681,11 +2685,14 @@ Suma::reportAllSubscribers(Signal *signal, { data->gci = m_last_complete_gci + 1; data->tableId = subPtr.p->m_tableId; - data->operation = NdbDictionary::Event::_TE_ACTIVE; - data->ndbd_nodeid = refToNode(reference()); + data->requestInfo = 0; + SubTableData::setOperation(data->requestInfo, + NdbDictionary::Event::_TE_ACTIVE); + SubTableData::setNdbdNodeId(data->requestInfo, getOwnNodeId()); + SubTableData::setReqNodeId(data->requestInfo, + refToNode(subbPtr.p->m_senderRef)); data->changeMask = 0; data->totalLen = 0; - data->req_nodeid = refToNode(subbPtr.p->m_senderRef); data->senderData = subbPtr.p->m_senderData; sendSignal(subbPtr.p->m_senderRef, GSN_SUB_TABLE_DATA, signal, SubTableData::SignalLength, JBB); @@ -2707,8 +2714,9 @@ Suma::reportAllSubscribers(Signal *signal, //#endif data->gci = m_last_complete_gci + 1; data->tableId = subPtr.p->m_tableId; - data->operation = table_event; - data->ndbd_nodeid = refToNode(reference()); + data->requestInfo = 0; + SubTableData::setOperation(data->requestInfo, table_event); + SubTableData::setNdbdNodeId(data->requestInfo, getOwnNodeId()); data->changeMask = 0; data->totalLen = 0; @@ -2720,7 +2728,8 @@ Suma::reportAllSubscribers(Signal *signal, { if (i_subbPtr.p->m_subPtrI == subPtr.i) { - data->req_nodeid = refToNode(subbPtr.p->m_senderRef); + SubTableData::setReqNodeId(data->requestInfo, + refToNode(subbPtr.p->m_senderRef)); data->senderData = i_subbPtr.p->m_senderData; sendSignal(i_subbPtr.p->m_senderRef, GSN_SUB_TABLE_DATA, signal, SubTableData::SignalLength, JBB); @@ -2729,12 +2738,14 @@ Suma::reportAllSubscribers(Signal *signal, table_event == NdbDictionary::Event::_TE_SUBSCRIBE ? "SUBSCRIBE" : "UNSUBSCRIBE", (int) table_event, refToNode(i_subbPtr.p->m_senderRef), - data->req_nodeid, data->senderData + refToNode(subbPtr.p->m_senderRef), data->senderData ); //#endif if (i_subbPtr.i != subbPtr.i) { - data->req_nodeid = refToNode(i_subbPtr.p->m_senderRef); + SubTableData::setReqNodeId(data->requestInfo, + refToNode(i_subbPtr.p->m_senderRef)); + data->senderData = subbPtr.p->m_senderData; sendSignal(subbPtr.p->m_senderRef, GSN_SUB_TABLE_DATA, signal, SubTableData::SignalLength, JBB); @@ -2743,7 +2754,7 @@ Suma::reportAllSubscribers(Signal *signal, table_event == NdbDictionary::Event::_TE_SUBSCRIBE ? "SUBSCRIBE" : "UNSUBSCRIBE", (int) table_event, refToNode(subbPtr.p->m_senderRef), - data->req_nodeid, data->senderData + refToNode(i_subbPtr.p->m_senderRef), data->senderData ); //#endif } @@ -3146,7 +3157,9 @@ Suma::execTRANSID_AI(Signal* signal) Uint32 ref = subPtr.p->m_senderRef; sdata->tableId = syncPtr.p->m_currentTableId; sdata->senderData = subPtr.p->m_senderData; - sdata->operation = NdbDictionary::Event::_TE_SCAN; // Scan + sdata->requestInfo = 0; + SubTableData::setOperation(sdata->requestInfo, + NdbDictionary::Event::_TE_SCAN); // Scan sdata->gci = 0; // Undefined #if PRINT_ONLY ndbout_c("GSN_SUB_TABLE_DATA (scan) #attr: %d len: %d", attribs, sum); @@ -3362,7 +3375,8 @@ Suma::execFIRE_TRIG_ORD(Signal* signal) SubTableData * data = (SubTableData*)signal->getDataPtrSend();//trg; data->gci = gci; data->tableId = tabPtr.p->m_tableId; - data->operation = event; + data->requestInfo = 0; + SubTableData::setOperation(data->requestInfo, event); data->logType = 0; data->changeMask = 0; data->totalLen = ptrLen; @@ -3588,8 +3602,9 @@ Suma::execDROP_TAB_CONF(Signal *signal) SubTableData * data = (SubTableData*)signal->getDataPtrSend(); data->gci = m_last_complete_gci+1; data->tableId = tableId; - data->operation = NdbDictionary::Event::_TE_DROP; - data->req_nodeid = refToNode(senderRef); + data->requestInfo = 0; + SubTableData::setOperation(data->requestInfo,NdbDictionary::Event::_TE_DROP); + SubTableData::setReqNodeId(data->requestInfo, refToNode(senderRef)); { LocalDLList<Subscriber> subbs(c_subscriberPool,tabPtr.p->c_subscribers); @@ -3667,8 +3682,10 @@ Suma::execALTER_TAB_REQ(Signal *signal) SubTableData * data = (SubTableData*)signal->getDataPtrSend(); data->gci = m_last_complete_gci+1; data->tableId = tableId; - data->operation = NdbDictionary::Event::_TE_ALTER; - data->req_nodeid = refToNode(senderRef); + data->requestInfo = 0; + SubTableData::setOperation(data->requestInfo, + NdbDictionary::Event::_TE_ALTER); + SubTableData::setReqNodeId(data->requestInfo, refToNode(senderRef)); data->logType = 0; data->changeMask = changeMask; data->totalLen = tabInfoPtr.sz; @@ -4898,7 +4915,8 @@ Suma::resend_bucket(Signal* signal, Uint32 buck, Uint32 min_gci, SubTableData * data = (SubTableData*)signal->getDataPtrSend();//trg; data->gci = last_gci; data->tableId = tabPtr.p->m_tableId; - data->operation = event; + data->requestInfo = 0; + SubTableData::setOperation(data->requestInfo, event); data->logType = 0; data->changeMask = 0; data->totalLen = ptrLen; diff --git a/storage/ndb/src/kernel/vm/DLHashTable2.hpp b/storage/ndb/src/kernel/vm/DLHashTable2.hpp index 5df0e4d5356..ff7c50f8446 100644 --- a/storage/ndb/src/kernel/vm/DLHashTable2.hpp +++ b/storage/ndb/src/kernel/vm/DLHashTable2.hpp @@ -146,6 +146,8 @@ public: * @param iter - An "uninitialized" iterator */ bool next(Uint32 bucket, Iterator & iter) const; + + inline bool isEmpty() const { Iterator iter; return ! first(iter); } private: Uint32 mask; diff --git a/storage/ndb/src/kernel/vm/DynArr256.cpp b/storage/ndb/src/kernel/vm/DynArr256.cpp new file mode 100644 index 00000000000..12e1b9ec40a --- /dev/null +++ b/storage/ndb/src/kernel/vm/DynArr256.cpp @@ -0,0 +1,1015 @@ +/* Copyright (C) 2003 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "DynArr256.hpp" +#include <stdio.h> +#include <assert.h> +#include <NdbOut.hpp> + +#define DA256_BITS 5 +#define DA256_MASK 31 + +struct DA256CL +{ + Uint32 m_magic; + Uint32 m_data[15]; +}; + +struct DA256Free +{ + Uint32 m_magic; + Uint32 m_next_free; +}; + +struct DA256Node +{ + struct DA256CL m_lines[17]; +}; + +struct DA256Page +{ + struct DA256CL m_header[2]; + struct DA256Node m_nodes[30]; +}; + +#define require(x) require_impl(x, __LINE__) +//#define DA256_USE_PX +//#define DA256_USE_PREFETCH +#define DA256_EXTRA_SAFE + + +#ifdef UNIT_TEST +#ifdef USE_CALLGRIND +#include <valgrind/callgrind.h> +#else +#define CALLGRIND_TOGGLE_COLLECT() +#endif +Uint32 allocatedpages = 0; +Uint32 allocatednodes = 0; +Uint32 releasednodes = 0; +#endif + +inline +void +require_impl(bool x, int line) +{ + if (!x) + { + ndbout_c("LINE: %d", line); + abort(); + } +} + +DynArr256Pool::DynArr256Pool() +{ + m_type_id = RNIL; + m_first_free = RNIL; + m_memroot = 0; +} + +void +DynArr256Pool::init(Uint32 type_id, const Pool_context & pc) +{ + m_ctx = pc; + m_type_id = type_id; + m_memroot = (DA256Page*)m_ctx.get_memroot(); +} + +static const Uint32 g_max_sizes[5] = { 0, 256, 65536, 16777216, ~0 }; + +/** + * sz = 0 = 1 - 0 level + * sz = 1 = 256^1 - 1 level + * sz = 2 = 256^2 - 2 level + * sz = 3 = 256^3 - 3 level + * sz = 4 = 256^4 - 4 level + */ +Uint32 * +DynArr256::get(Uint32 pos) const +{ + Uint32 sz = m_head.m_sz; + Uint32 ptrI = m_head.m_ptr_i; + DA256Page * memroot = m_pool.m_memroot; + Uint32 type_id = (~m_pool.m_type_id) & 0xFFFF; + + if (unlikely(pos >= g_max_sizes[sz])) + { + return 0; + } + +#ifdef DA256_USE_PX + Uint32 px[4] = { (pos >> 24) & 255, + (pos >> 16) & 255, + (pos >> 8) & 255, + (pos >> 0) & 255 }; +#endif + + Uint32* retVal = &m_head.m_ptr_i; + for(; sz --;) + { + if (unlikely(ptrI == RNIL)) + { + return 0; + } +#ifdef DA256_USE_PX + Uint32 p0 = px[sz]; +#else + Uint32 shr = sz << 3; + Uint32 p0 = (pos >> shr) & 255; +#endif + Uint32 page_no = ptrI >> DA256_BITS; + Uint32 page_idx = ptrI & DA256_MASK; + DA256Page * page = memroot + page_no; + + Uint32 *magic_ptr, p; + if (p0 != 255) + { + Uint32 line = ((p0 << 8) + (p0 << 4) + p0 + 255) >> 12; + Uint32 * ptr = (Uint32*)(page->m_nodes + page_idx); + + p = 0; + retVal = (ptr + 1 + p0 + line); + magic_ptr =(ptr + (p0 & ~15)); + } + else + { + Uint32 b = (page_idx + 1) >> 4; + Uint32 * ptr = (Uint32*)(page->m_header+b); + + p = page_idx - (b << 4) + b; + retVal = (ptr + 1 + p); + magic_ptr = ptr; + } + + ptrI = *retVal; + Uint32 magic = *magic_ptr; + + if (unlikely(! ((magic & (1 << p)) && (magic >> 16) == type_id))) + goto err; + } + + return retVal; +err: + require(false); + return 0; +} + +Uint32 * +DynArr256::set(Uint32 pos) +{ + Uint32 sz = m_head.m_sz; + Uint32 type_id = (~m_pool.m_type_id) & 0xFFFF; + DA256Page * memroot = m_pool.m_memroot; + + if (unlikely(pos >= g_max_sizes[sz])) + { + if (unlikely(!expand(pos))) + { + return 0; + } + sz = m_head.m_sz; + } + +#ifdef DA256_USE_PX + Uint32 px[4] = { (pos >> 24) & 255, + (pos >> 16) & 255, + (pos >> 8) & 255, + (pos >> 0) & 255 }; +#endif + + Uint32 ptrI = m_head.m_ptr_i; + Uint32 *retVal = &m_head.m_ptr_i; + for(; sz --;) + { +#ifdef DA256_USE_PX + Uint32 p0 = px[sz]; +#else + Uint32 shr = sz << 3; + Uint32 p0 = (pos >> shr) & 255; +#endif + if (ptrI == RNIL) + { + if (unlikely((ptrI = m_pool.seize()) == RNIL)) + { + return 0; + } + * retVal = ptrI; + } + + Uint32 page_no = ptrI >> DA256_BITS; + Uint32 page_idx = ptrI & DA256_MASK; + DA256Page * page = memroot + page_no; + + Uint32 *magic_ptr, p; + if (p0 != 255) + { + Uint32 line = ((p0 << 8) + (p0 << 4) + p0 + 255) >> 12; + Uint32 * ptr = (Uint32*)(page->m_nodes + page_idx); + + p = 0; + magic_ptr = (ptr + (p0 & ~15)); + retVal = (ptr + 1 + p0 + line); + } + else + { + Uint32 b = (page_idx + 1) >> 4; + Uint32 * ptr = (Uint32*)(page->m_header+b); + + p = page_idx - (b << 4) + b; + magic_ptr = ptr; + retVal = (ptr + 1 + p); + } + + ptrI = * retVal; + Uint32 magic = *magic_ptr; + + if (unlikely(! ((magic & (1 << p)) && (magic >> 16) == type_id))) + goto err; + } + + return retVal; + +err: + require(false); + return 0; +} + +static +inline +void +initpage(DA256Page* p, Uint32 page_no, Uint32 type_id) +{ + Uint32 i, j; +#ifdef DA256_USE_PREFETCH +#if defined(__GNUC__) && !(__GNUC__ == 2 && __GNUC_MINOR__ < 96) +#ifdef DA256_EXTRA_SAFE + for (i = 0; i<(30 * 17 + 2); i++) + { + __builtin_prefetch (p->m_header + i, 1); + } +#else + { + __builtin_prefetch (p->m_header + 0, 1); + __builtin_prefetch (p->m_header + 1, 1); + for (i = 0; i<30; i++) + { + __builtin_prefetch (p->m_nodes + i, 1); + } + } +#endif +#endif +#endif + DA256CL* cl; + for (i = 0; i<2; i++) + { + cl = p->m_header + i; + cl->m_magic = (~type_id << 16); + } + + DA256Free* free; + + for (i = 0; i<30; i++) + { + free = (DA256Free*)(p->m_nodes+i); + free->m_magic = type_id; + free->m_next_free = (page_no << DA256_BITS) + (i + 1); +#ifdef DA256_EXTRA_SAFE + DA256Node* node = p->m_nodes+i; + for (j = 0; j<17; j++) + node->m_lines[j].m_magic = type_id; +#endif + } + + free = (DA256Free*)(p->m_nodes+29); + free->m_next_free = RNIL; +} + +bool +DynArr256::expand(Uint32 pos) +{ + Uint32 i; + Uint32 idx = 0; + Uint32 alloc[5]; + Uint32 sz = m_head.m_sz; + Uint32 shl = 0; + + for (; pos >= g_max_sizes[sz]; sz++); + + if (m_head.m_sz == 0) + { + m_head.m_sz = sz; + return true; + } + + sz = m_head.m_sz; + for (; pos >= g_max_sizes[sz]; sz++) + { + Uint32 ptrI = m_pool.seize(); + if (unlikely(ptrI == RNIL)) + goto err; + alloc[idx++] = ptrI; + } + + alloc[idx] = m_head.m_ptr_i; + m_head.m_sz = 1; + for (Uint32 i = 0; i<idx; i++) + { + m_head.m_ptr_i = alloc[i]; + Uint32 * ptr = get(0); + * ptr = alloc[i + 1]; + } + + m_head.m_sz = sz; + m_head.m_ptr_i = alloc[0]; + + return true; + +err: + for (i = 0; i<idx; i++) + m_pool.release(alloc[i]); + return false; +} + +void +DynArr256::init(ReleaseIterator &iter) +{ + iter.m_sz = 0; + iter.m_pos = 0; + iter.m_ptr_i[0] = m_head.m_ptr_i; + iter.m_ptr_i[1] = RNIL; + iter.m_ptr_i[2] = RNIL; + iter.m_ptr_i[3] = RNIL; +} + +bool +DynArr256::release(ReleaseIterator &iter) +{ + Uint32 ptrI = iter.m_ptr_i[iter.m_sz]; + Uint32 page_no = ptrI >> DA256_BITS; + Uint32 page_idx = ptrI & DA256_MASK; + Uint32 type_id = (~m_pool.m_type_id) & 0xFFFF; + DA256Page * memroot = m_pool.m_memroot; + DA256Page * page = memroot + page_no; + + if (ptrI != RNIL) + { + Uint32 tmp = iter.m_pos & 255; + Uint32 p0 = tmp; + for (; p0<256 && p0 < tmp + 16; p0++) + { + Uint32 *retVal, *magic_ptr, p; + if (p0 != 255) + { + Uint32 line = ((p0 << 8) + (p0 << 4) + p0 + 255) >> 12; + Uint32 * ptr = (Uint32*)(page->m_nodes + page_idx); + + p = 0; + retVal = (ptr + 1 + p0 + line); + magic_ptr =(ptr + (p0 & ~15)); + } + else + { + Uint32 b = (page_idx + 1) >> 4; + Uint32 * ptr = (Uint32*)(page->m_header+b); + + p = page_idx - (b << 4) + b; + retVal = (ptr + 1 + p); + magic_ptr = ptr; + } + + Uint32 magic = *magic_ptr; + if (unlikely(! ((magic & (1 << p)) && (magic >> 16) == type_id))) + goto err; + + Uint32 val = * retVal; + if (val != RNIL) + { + if (iter.m_sz + 2 == m_head.m_sz) + { + * retVal = RNIL; + m_pool.release(val); + iter.m_pos = (iter.m_pos & ~255) + p0; + return false; + } + else + { + * retVal = RNIL; + iter.m_sz++; + iter.m_ptr_i[iter.m_sz] = val; + iter.m_pos = (p0 << 8); + return false; + } + } + } + + if (p0 == 256) + { + if (iter.m_sz == 0) + goto done; + iter.m_sz--; + iter.m_pos >>= 8; + + m_pool.release(ptrI); + return false; + } + else + { + iter.m_pos = (iter.m_pos & ~255) + p0; + return false; + } + } + +done: + if (m_head.m_ptr_i != RNIL) + { + m_pool.release(m_head.m_ptr_i); + } + + new (&m_head) Head(); + return true; + +err: + require(false); + return false; +} + +static +inline +bool +seizenode(DA256Page* page, Uint32 idx, Uint32 type_id) +{ + Uint32 i; + Uint32 b = (idx + 1) >> 4; + Uint32 p = idx - (b << 4) + b; + + DA256Node * ptr = (DA256Node*)(page->m_nodes + idx); + +#ifdef DA256_USE_PREFETCH +#if defined(__GNUC__) && !(__GNUC__ == 2 && __GNUC_MINOR__ < 96) + __builtin_prefetch (page->m_header + b, 1); + for (i = 0; i<17; i++) + { + __builtin_prefetch (ptr->m_lines+i, 1); + } +#endif +#endif + +#ifdef DA256_EXTRA_SAFE + Uint32 check = type_id; +#endif + type_id = ((~type_id) << 16) | 0xFFFF; + +#ifdef DA256_EXTRA_SAFE + if (unlikely(((page->m_header + b)->m_magic & (1 << p)) != 0)) + { + return false; + } +#endif + + (page->m_header + b)->m_magic |= (1 << p); + (page->m_header + b)->m_data[p] = RNIL; + for (i = 0; i<17; i++) + { + DA256CL * line = ptr->m_lines + i; +#ifdef DA256_EXTRA_SAFE + if (unlikely(line->m_magic != check)) + { + return false; + } +#endif + line->m_magic = type_id; + for (Uint32 j = 0; j<15; j++) + line->m_data[j] = RNIL; + } + +#ifdef UNIT_TEST + allocatednodes++; +#endif + return true; +} + +static +bool +releasenode(DA256Page* page, Uint32 idx, Uint32 type_id) +{ + Uint32 i; + Uint32 b = (idx + 1) >> 4; + Uint32 p = idx - (b << 4) + b; + + DA256Node * ptr = (DA256Node*)(page->m_nodes + idx); + +#ifdef DA256_USE_PREFETCH +#if defined(__GNUC__) && !(__GNUC__ == 2 && __GNUC_MINOR__ < 96) + __builtin_prefetch (page->m_header + b, 1); + for (i = 0; i<17; i++) + { + __builtin_prefetch (ptr->m_lines+i, 1); + } +#endif +#endif + +#ifdef DA256_EXTRA_SAFE + Uint32 check = ((~type_id) << 16) | 0xFFFF; +#endif + +#ifdef DA256_EXTRA_SAFE + if (unlikely((((page->m_header + b)->m_magic & (1 << p)) == 0))) + { + return false; + } +#endif + + (page->m_header + b)->m_magic ^= (1 << p); + for (i = 0; i<17; i++) + { + DA256CL * line = ptr->m_lines + i; +#ifdef DA256_EXTRA_SAFE + if (unlikely(line->m_magic != check)) + { + return false; + } +#endif + line->m_magic = type_id; + } + +#ifdef UNIT_TEST + releasednodes++; +#endif + + return true; +} + +Uint32 +DynArr256Pool::seize() +{ + Uint32 ff = m_first_free; + Uint32 type_id = m_type_id; + + DA256Page* page; + DA256Page * memroot = m_memroot; + if (ff == RNIL) + { + Uint32 page_no; + if (likely((page = (DA256Page*)m_ctx.alloc_page(type_id, &page_no)) != 0)) + { + initpage(page, page_no, type_id); +#ifdef UNIT_TEST + allocatedpages++; +#endif + } + else + { + return RNIL; + } + ff = (page_no << DA256_BITS); + } + else + { + page = memroot + (ff >> DA256_BITS); + } + + Uint32 idx = ff & DA256_MASK; + DA256Free * ptr = (DA256Free*)(page->m_nodes + idx); + if (likely(ptr->m_magic == type_id)) + { + Uint32 next = ptr->m_next_free; + if (likely(seizenode(page, idx, type_id))) + { + m_first_free = next; + return ff; + } + } + +//error: + require(false); + return 0; +} + +void +DynArr256Pool::release(Uint32 ptrI) +{ + Uint32 ff = m_first_free; + Uint32 type_id = m_type_id; + + Uint32 page_no = ptrI >> DA256_BITS; + Uint32 page_idx = ptrI & DA256_MASK; + DA256Page * memroot = m_memroot; + DA256Page * page = memroot + page_no; + + DA256Free * ptr = (DA256Free*)(page->m_nodes + page_idx); + if (likely(releasenode(page, page_idx, type_id))) + { + ptr->m_next_free = ff; + ptr->m_magic = type_id; + m_first_free = ptrI; + return; + } + require(false); +} + +#ifdef UNIT_TEST + +#include <NdbTick.h> +#include "ndbd_malloc_impl.hpp" +#include "SimulatedBlock.hpp" + +Ndbd_mem_manager mm; +Configuration cfg; +Block_context ctx(cfg, mm); +struct BB : public SimulatedBlock +{ + BB(int no, Block_context& ctx) : SimulatedBlock(no, ctx) {} +}; + +BB block(DBACC, ctx); + +static +void +simple(DynArr256 & arr, int argc, char* argv[]) +{ + for (Uint32 i = 1; i<(Uint32)argc; i++) + { + Uint32 * s = arr.set(atoi(argv[i])); + { + bool found = false; + for (Uint32 j = 1; j<i; j++) + { + if (atoi(argv[i]) == atoi(argv[j])) + { + found = true; + break; + } + } + if (!found) + * s = i; + } + + Uint32 * g = arr.get(atoi(argv[i])); + Uint32 v = g ? *g : ~0; + ndbout_c("p: %p %p %d", s, g, v); + } +} + +static +void +basic(DynArr256& arr, int argc, char* argv[]) +{ +#define MAXLEN 65536 + + Uint32 len = 0; + Uint32 save[2*MAXLEN]; + for (Uint32 i = 0; i<MAXLEN; i++) + { + int op = (rand() % 100) > 50; + if (len == 0) + op = 1; + if (len == MAXLEN) + op = 0; + switch(op){ + case 0:{ // get + Uint32 item = (rand() % len) << 1; + Uint32 idx = save[item]; + Uint32 val = save[item+1]; + //ndbout_c("get(%d)", idx); + Uint32 *p = arr.get(idx); + assert(p); + assert(* p == val); + break; + } + case 1:{ // set + Uint32 item = len << 1; + Uint32 idx = i; //rand() & 0xFFFFF; // & 0xFFFFF; //rand(); //(65536*i) / 10000; + Uint32 val = rand(); +#if 0 + for(Uint32 j = 0; j < item; j += 2) + { + if (save[j] == idx) + { + item = j; + break; + } + } +#endif + //ndbout_c("set(%d, %x)", idx, val); + Uint32 *p = arr.set(idx); + assert(* p); + if (item == (len << 1)) + { + *p = val; + len++; + } + else + { + assert(* p == save[item+1]); + * p = val; + } + save[item] = idx; + save[item+1] = val; + } + } + } +} + +unsigned long long +micro() +{ + struct timeval tv; + gettimeofday(&tv, 0); + unsigned long long ret = tv.tv_sec; + ret *= 1000000; + ret += tv.tv_usec; + return ret; +} + +static +void +read(DynArr256& arr, int argc, char ** argv) +{ + Uint32 cnt = 100000; + Uint64 mbytes = 16*1024; + Uint32 seed = time(0); + Uint32 seq = 0, seqmask = 0; + + for (Uint32 i = 2; i<argc; i++) + { + if (strncmp(argv[i], "--mbytes=", sizeof("--mbytes=")-1) == 0) + { + mbytes = atoi(argv[i]+sizeof("--mbytes=")-1); + if (argv[i][strlen(argv[i])-1] == 'g' || + argv[i][strlen(argv[i])-1] == 'G') + mbytes *= 1024; + } + else if (strncmp(argv[i], "--cnt=", sizeof("--cnt=")-1) == 0) + { + cnt = atoi(argv[i]+sizeof("--cnt=")-1); + } + else if (strncmp(argv[i], "--seq", sizeof("--seq")-1) == 0) + { + seq = 1; + } + } + + /** + * Populate with 5Mb + */ + Uint32 maxidx = (1024*mbytes+31) / 32; + Uint32 nodes = (maxidx+255) / 256; + Uint32 pages = (nodes + 29)/ 30; + ndbout_c("%lldmb data -> %d entries (%dkb)", + mbytes, maxidx, 32*pages); + + for (Uint32 i = 0; i<maxidx; i++) + { + Uint32 *ptr = arr.set(i); + assert(ptr); + * ptr = i; + } + + srand(seed); + + if (seq) + { + seq = rand(); + seqmask = ~(Uint32)0; + } + + ndbout_c("Timing %d %s reads (seed: %u)", cnt, + seq ? "sequential" : "random", seed); + + for (Uint32 i = 0; i<10; i++) + { + Uint32 sum0 = 0, sum1 = 0; + Uint64 start = micro(); + for (Uint32 i = 0; i<cnt; i++) + { + Uint32 idx = ((rand() & (~seqmask)) + ((i + seq) & seqmask)) % maxidx; + Uint32 *ptr = arr.get(idx); + sum0 += idx; + sum1 += *ptr; + } + start = micro() - start; + float uspg = start; uspg /= cnt; + ndbout_c("Elapsed %lldus diff: %d -> %f us/get", start, sum0 - sum1, uspg); + } +} + +static +void +write(DynArr256& arr, int argc, char ** argv) +{ + Uint32 seq = 0, seqmask = 0; + Uint32 cnt = 100000; + Uint64 mbytes = 16*1024; + Uint32 seed = time(0); + + for (Uint32 i = 2; i<argc; i++) + { + if (strncmp(argv[i], "--mbytes=", sizeof("--mbytes=")-1) == 0) + { + mbytes = atoi(argv[i]+sizeof("--mbytes=")-1); + if (argv[i][strlen(argv[i])-1] == 'g' || + argv[i][strlen(argv[i])-1] == 'G') + mbytes *= 1024; + } + else if (strncmp(argv[i], "--cnt=", sizeof("--cnt=")-1) == 0) + { + cnt = atoi(argv[i]+sizeof("--cnt=")-1); + } + else if (strncmp(argv[i], "--seq", sizeof("--seq")-1) == 0) + { + seq = 1; + } + } + + /** + * Populate with 5Mb + */ + Uint32 maxidx = (1024*mbytes+31) / 32; + Uint32 nodes = (maxidx+255) / 256; + Uint32 pages = (nodes + 29)/ 30; + ndbout_c("%lldmb data -> %d entries (%dkb)", + mbytes, maxidx, 32*pages); + + srand(seed); + + if (seq) + { + seq = rand(); + seqmask = ~(Uint32)0; + } + + ndbout_c("Timing %d %s writes (seed: %u)", cnt, + seq ? "sequential" : "random", seed); + for (Uint32 i = 0; i<10; i++) + { + Uint64 start = micro(); + for (Uint32 i = 0; i<cnt; i++) + { + Uint32 idx = ((rand() & (~seqmask)) + ((i + seq) & seqmask)) % maxidx; + Uint32 *ptr = arr.set(idx); + *ptr = i; + } + start = micro() - start; + float uspg = start; uspg /= cnt; + ndbout_c("Elapsed %lldus -> %f us/set", start, uspg); + DynArr256::ReleaseIterator iter; + arr.init(iter); + while(!arr.release(iter)); + } +} + +int +main(int argc, char** argv) +{ + if (0) + { + for (Uint32 i = 0; i<30; i++) + { + Uint32 b = (i + 1) >> 4; + Uint32 p = i - (b << 4) + b; + printf("[ %d %d %d ]\n", i, b, p); + } + return 0; + } + + Pool_context pc; + pc.m_block = █ + + Resource_limit rl; + rl.m_min = 0; + rl.m_max = 10000; + rl.m_resource_id = 0; + mm.set_resource_limit(rl); + if(!mm.init()) + { + abort(); + } + + DynArr256Pool pool; + pool.init(0x2001, pc); + + DynArr256::Head head; + DynArr256 arr(pool, head); + + if (strcmp(argv[1], "--args") == 0) + simple(arr, argc, argv); + else if (strcmp(argv[1], "--basic") == 0) + basic(arr, argc, argv); + else if (strcmp(argv[1], "--read") == 0) + read(arr, argc, argv); + else if (strcmp(argv[1], "--write") == 0) + write(arr, argc, argv); + + DynArr256::ReleaseIterator iter; + arr.init(iter); + Uint32 cnt = 0; + while (!arr.release(iter)) cnt++; + + ndbout_c("allocatedpages: %d allocatednodes: %d releasednodes: %d" + " releasecnt: %d", + allocatedpages, + allocatednodes, + releasednodes, + cnt); + + return 0; +#if 0 + printf("sizeof(DA256Page): %d\n", sizeof(DA256Page)); + + DA256Page page; + + for (Uint32 i = 0; i<10000; i++) + { + Uint32 arg = rand() & 255; + Uint32 base = 0; + Uint32 idx = arg & 256; + printf("%d\n", arg); + + assert(base <= 30); + + if (idx == 255) + { + Uint32 b = (base + 1) >> 4; + Uint32 p = base - (b << 4) + b; + Uint32 magic = page.m_header[b].m_magic; + Uint32 retVal = page.m_header[b].m_data[p]; + + require(magic & (1 << p)); + return retVal; + } + else + { + // 4 bit extra offset per idx + Uint32 line = idx / 15; + Uint32 off = idx % 15; + + { + Uint32 pos = 1 + idx + line; + Uint32 magic = pos & ~15; + + Uint32 * ptr = (Uint32*)&page.m_nodes[base]; + assert((ptr + pos) == &page.m_nodes[base].m_lines[line].m_data[off]); + assert((ptr + magic) == &page.m_nodes[base].m_lines[line].m_magic); + } + } + } +#endif +} + +Uint32 g_currentStartPhase; +Uint32 g_start_type; +NdbNodeBitmask g_nowait_nodes; + +void childExit(int code, Uint32 currentStartPhase) +{ + abort(); +} + +void childAbort(int code, Uint32 currentStartPhase) +{ + abort(); +} + +void childReportError(int error) +{ + abort(); +} + +void +UpgradeStartup::sendCmAppChg(Ndbcntr& cntr, Signal* signal, Uint32 startLevel){ +} + +void +UpgradeStartup::execCM_APPCHG(SimulatedBlock & block, Signal* signal){ +} + +void +UpgradeStartup::sendCntrMasterReq(Ndbcntr& cntr, Signal* signal, Uint32 n){ +} + +void +UpgradeStartup::execCNTR_MASTER_REPLY(SimulatedBlock & block, Signal* signal){ +} + +#include <SimBlockList.hpp> + +void +SimBlockList::unload() +{ + +} + +#endif diff --git a/storage/ndb/src/kernel/vm/DynArr256.hpp b/storage/ndb/src/kernel/vm/DynArr256.hpp new file mode 100644 index 00000000000..d70b126e8c2 --- /dev/null +++ b/storage/ndb/src/kernel/vm/DynArr256.hpp @@ -0,0 +1,79 @@ +/* Copyright (C) 2003 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef DYNARR256_HPP +#define DYNARR256_HPP + +#include "Pool.hpp" + +class DynArr256; +struct DA256Page; + +class DynArr256Pool +{ + friend class DynArr256; +public: + DynArr256Pool(); + + void init(Uint32 type_id, const Pool_context& pc); + +protected: + Uint32 m_type_id; + Uint32 m_first_free; + Pool_context m_ctx; + struct DA256Page* m_memroot; + +private: + Uint32 seize(); + void release(Uint32); +}; + +class DynArr256 +{ +public: + struct Head + { + Head() { m_ptr_i = RNIL; m_sz = 0;} + + Uint32 m_ptr_i; + Uint32 m_sz; + }; + + DynArr256(DynArr256Pool & pool, Head& head) : + m_head(head), m_pool(pool){} + + Uint32* set(Uint32 pos); + Uint32* get(Uint32 pos) const ; + + struct ReleaseIterator + { + Uint32 m_sz; + Uint32 m_pos; + Uint32 m_ptr_i[4]; + }; + + void init(ReleaseIterator&); + bool release(ReleaseIterator&); + +protected: + Head & m_head; + DynArr256Pool & m_pool; + + bool expand(Uint32 pos); + void handle_invalid_ptr(Uint32 pos, Uint32 ptrI, Uint32 p0); +}; + +#endif diff --git a/storage/ndb/src/kernel/vm/Makefile.am b/storage/ndb/src/kernel/vm/Makefile.am index 71fede97da3..5182e42e561 100644 --- a/storage/ndb/src/kernel/vm/Makefile.am +++ b/storage/ndb/src/kernel/vm/Makefile.am @@ -20,7 +20,8 @@ libkernel_a_SOURCES = \ Mutex.cpp SafeCounter.cpp \ Rope.cpp \ ndbd_malloc.cpp ndbd_malloc_impl.cpp \ - Pool.cpp WOPool.cpp RWPool.cpp + Pool.cpp WOPool.cpp RWPool.cpp \ + DynArr256.cpp INCLUDES_LOC = -I$(top_srcdir)/storage/ndb/src/mgmapi @@ -44,7 +45,7 @@ libkernel.dsp: Makefile \ @$(top_srcdir)/storage/ndb/config/win-sources $@ $(libkernel_a_SOURCES) @$(top_srcdir)/storage/ndb/config/win-libraries $@ LIB $(LDADD) -EXTRA_PROGRAMS = ndbd_malloc_impl_test bench_pool +EXTRA_PROGRAMS = ndbd_malloc_impl_test bench_pool testDynArr256 ndbd_malloc_impl_test_CXXFLAGS = -DUNIT_TEST ndbd_malloc_impl_test_SOURCES = ndbd_malloc_impl.cpp ndbd_malloc_impl_test_LDFLAGS = @ndb_bin_am_ldflags@ \ @@ -54,9 +55,19 @@ ndbd_malloc_impl_test_LDFLAGS = @ndb_bin_am_ldflags@ \ $(top_builddir)/strings/libmystrings.a bench_pool_SOURCES = bench_pool.cpp -bench_pool_LDFLAGS = @ndb_bin_am_ldflags@ ../SimBlockList.o \ +bench_pool_LDFLAGS = @ndb_bin_am_ldflags@\ libkernel.a ../error/liberror.a \ $(top_builddir)/storage/ndb/src/libndbclient.la \ $(top_builddir)/mysys/libmysys.a \ $(top_builddir)/dbug/libdbug.a \ $(top_builddir)/strings/libmystrings.a + +testDynArr256_CXXFLAGS = -DUNIT_TEST +testDynArr256_SOURCES = DynArr256.cpp +testDynArr256_LDFLAGS = @ndb_bin_am_ldflags@ \ + libkernel.a ../error/liberror.a \ + $(top_builddir)/storage/ndb/src/libndbclient.la \ + $(top_builddir)/mysys/libmysys.a \ + $(top_builddir)/dbug/libdbug.a \ + $(top_builddir)/strings/libmystrings.a + diff --git a/storage/ndb/src/kernel/vm/bench_pool.cpp b/storage/ndb/src/kernel/vm/bench_pool.cpp index d728710da61..31f8ea78e99 100644 --- a/storage/ndb/src/kernel/vm/bench_pool.cpp +++ b/storage/ndb/src/kernel/vm/bench_pool.cpp @@ -60,7 +60,7 @@ Uint32 sizes = 7; unsigned int seed; Ndbd_mem_manager mm; Configuration cfg; -Block_context ctx = { cfg, mm }; +Block_context ctx(cfg, mm); struct BB : public SimulatedBlock { BB(int no, Block_context& ctx) : SimulatedBlock(no, ctx) {} @@ -548,6 +548,8 @@ main(int argc, char **argv) } Uint32 g_currentStartPhase; +Uint32 g_start_type; +NdbNodeBitmask g_nowait_nodes; void childExit(int code, Uint32 currentStartPhase) { diff --git a/storage/ndb/src/kernel/vm/pc.hpp b/storage/ndb/src/kernel/vm/pc.hpp index 6b9b563aa27..37684fa4ca6 100644 --- a/storage/ndb/src/kernel/vm/pc.hpp +++ b/storage/ndb/src/kernel/vm/pc.hpp @@ -163,17 +163,10 @@ #define NO_OF_FRAGS_PER_CHUNK 4 #define LOG_NO_OF_FRAGS_PER_CHUNK 2 -/** -* Page Header Size for pages -*/ -#define ZPAGE_HEADER_SIZE 32 -#define ZPAGE_FRAG_PAGE_ID_POS 4 /* POSITION OF FRAG PAGE ID WHEN USED*/ - /* ---------------------------------------------------------------- */ // To avoid synching too big chunks at a time we synch after writing // a certain number of data/UNDO pages. (e.g. 2 MBytes). /* ---------------------------------------------------------------- */ -#define MAX_PAGES_WITHOUT_SYNCH 64 #define MAX_REDO_PAGES_WITHOUT_SYNCH 32 /* ------------------------------------------------------------------ */ diff --git a/storage/ndb/src/mgmapi/mgmapi.cpp b/storage/ndb/src/mgmapi/mgmapi.cpp index f9eb2abd003..11d81fa0fdd 100644 --- a/storage/ndb/src/mgmapi/mgmapi.cpp +++ b/storage/ndb/src/mgmapi/mgmapi.cpp @@ -1395,7 +1395,7 @@ ndb_mgm_listen_event_internal(NdbMgmHandle handle, const int filter[], MGM_END() }; CHECK_HANDLE(handle, -1); - + const char *hostname= ndb_mgm_get_connected_host(handle); int port= ndb_mgm_get_connected_port(handle); SocketClient s(hostname, port); @@ -1417,19 +1417,20 @@ ndb_mgm_listen_event_internal(NdbMgmHandle handle, const int filter[], } args.put("filter", tmp.c_str()); } - + int tmp = handle->socket; handle->socket = sockfd; - + const Properties *reply; reply = ndb_mgm_call(handle, stat_reply, "listen event", &args); - + handle->socket = tmp; - + if(reply == NULL) { close(sockfd); CHECK_REPLY(reply, -1); } + delete reply; return sockfd; } @@ -1442,7 +1443,7 @@ ndb_mgm_listen_event(NdbMgmHandle handle, const int filter[]) extern "C" int -ndb_mgm_dump_state(NdbMgmHandle handle, int nodeId, int* _args, +ndb_mgm_dump_state(NdbMgmHandle handle, int nodeId, const int * _args, int _num_args, struct ndb_mgm_reply* /* reply */) { SET_ERROR(handle, NDB_MGM_NO_ERROR, "Executing: ndb_mgm_dump_state"); diff --git a/storage/ndb/src/mgmapi/ndb_logevent.cpp b/storage/ndb/src/mgmapi/ndb_logevent.cpp index 50766e09134..3195fd1db42 100644 --- a/storage/ndb/src/mgmapi/ndb_logevent.cpp +++ b/storage/ndb/src/mgmapi/ndb_logevent.cpp @@ -68,6 +68,13 @@ ndb_mgm_create_logevent_handle(NdbMgmHandle mh, } extern "C" +int +ndb_logevent_get_fd(const NdbLogEventHandle h) +{ + return h->socket; +} + +extern "C" void ndb_mgm_destroy_logevent_handle(NdbLogEventHandle * h) { if( !h ) diff --git a/storage/ndb/src/mgmclient/CommandInterpreter.cpp b/storage/ndb/src/mgmclient/CommandInterpreter.cpp index c31efebbf1e..948bad9c48f 100644 --- a/storage/ndb/src/mgmclient/CommandInterpreter.cpp +++ b/storage/ndb/src/mgmclient/CommandInterpreter.cpp @@ -161,8 +161,15 @@ private: int try_reconnect; int m_error; struct NdbThread* m_event_thread; + NdbMutex *m_print_mutex; }; +struct event_thread_param { + NdbMgmHandle *m; + NdbMutex **p; +}; + +NdbMutex* print_mutex; /* * Facade object for CommandInterpreter @@ -340,6 +347,7 @@ CommandInterpreter::CommandInterpreter(const char *_host,int verbose) m_connected= false; m_event_thread= 0; try_reconnect = 0; + m_print_mutex= NdbMutex_Create(); } /* @@ -348,6 +356,7 @@ CommandInterpreter::CommandInterpreter(const char *_host,int verbose) CommandInterpreter::~CommandInterpreter() { disconnect(); + NdbMutex_Destroy(m_print_mutex); } static bool @@ -384,11 +393,13 @@ CommandInterpreter::printError() static int do_event_thread; static void* -event_thread_run(void* m) +event_thread_run(void* p) { DBUG_ENTER("event_thread_run"); - NdbMgmHandle handle= *(NdbMgmHandle*)m; + struct event_thread_param param= *(struct event_thread_param*)p; + NdbMgmHandle handle= *(param.m); + NdbMutex* printmutex= *(param.p); int filter[] = { 15, NDB_MGM_EVENT_CATEGORY_BACKUP, 1, NDB_MGM_EVENT_CATEGORY_STARTUP, @@ -406,7 +417,11 @@ event_thread_run(void* m) { const char ping_token[]= "<PING>"; if (memcmp(ping_token,tmp,sizeof(ping_token)-1)) - ndbout << tmp; + if(tmp && strlen(tmp)) + { + Guard g(printmutex); + ndbout << tmp; + } } } while(do_event_thread); NDB_CLOSE_SOCKET(fd); @@ -459,8 +474,11 @@ CommandInterpreter::connect() assert(m_event_thread == 0); assert(do_event_thread == 0); do_event_thread= 0; + struct event_thread_param p; + p.m= &m_mgmsrv2; + p.p= &m_print_mutex; m_event_thread = NdbThread_Create(event_thread_run, - (void**)&m_mgmsrv2, + (void**)&p, 32768, "CommandInterpreted_event_thread", NDB_THREAD_PRIO_LOW); @@ -547,6 +565,7 @@ CommandInterpreter::execute(const char *_line, int _try_reconnect, int result= execute_impl(_line); if (error) *error= m_error; + return result; } @@ -626,6 +645,7 @@ CommandInterpreter::execute_impl(const char *_line) DBUG_RETURN(true); if (strcasecmp(firstToken, "SHOW") == 0) { + Guard g(m_print_mutex); executeShow(allAfterFirstToken); DBUG_RETURN(true); } @@ -853,6 +873,7 @@ CommandInterpreter::executeForAll(const char * cmd, ExecuteFunction fun, ndbout_c("Trying to start all nodes of system."); ndbout_c("Use ALL STATUS to see the system start-up phases."); } else { + Guard g(m_print_mutex); struct ndb_mgm_cluster_state *cl= ndb_mgm_get_status(m_mgmsrv); if(cl == 0){ ndbout_c("Unable get status from management server"); @@ -1152,6 +1173,7 @@ CommandInterpreter::executeShow(char* parameters) if(it == 0){ ndbout_c("Unable to create config iterator"); + ndb_mgm_destroy_configuration(conf); return; } NdbAutoPtr<ndb_mgm_configuration_iterator> ptr(it); @@ -1196,6 +1218,7 @@ CommandInterpreter::executeShow(char* parameters) print_nodes(state, it, "ndb_mgmd", mgm_nodes, NDB_MGM_NODE_TYPE_MGM, 0); print_nodes(state, it, "mysqld", api_nodes, NDB_MGM_NODE_TYPE_API, 0); // ndbout << helpTextShow; + ndb_mgm_destroy_configuration(conf); return; } else if (strcasecmp(parameters, "PROPERTIES") == 0 || strcasecmp(parameters, "PROP") == 0) { diff --git a/storage/ndb/src/mgmsrv/ConfigInfo.cpp b/storage/ndb/src/mgmsrv/ConfigInfo.cpp index 6c172a29819..fb15e35ecc7 100644 --- a/storage/ndb/src/mgmsrv/ConfigInfo.cpp +++ b/storage/ndb/src/mgmsrv/ConfigInfo.cpp @@ -877,7 +877,7 @@ const ConfigInfo::ParamInfo ConfigInfo::m_ParamInfo[] = { ConfigInfo::CI_USED, false, ConfigInfo::CI_INT, - "8", + "16", "3", STR_VALUE(MAX_INT_RNIL) }, @@ -952,8 +952,8 @@ const ConfigInfo::ParamInfo ConfigInfo::m_ParamInfo[] = { CFG_DB_LCP_DISC_PAGES_TUP_SR, "NoOfDiskPagesToDiskDuringRestartTUP", DB_TOKEN, - "?", - ConfigInfo::CI_USED, + "DiskCheckpointSpeedSr", + ConfigInfo::CI_DEPRICATED, true, ConfigInfo::CI_INT, "40", @@ -964,8 +964,8 @@ const ConfigInfo::ParamInfo ConfigInfo::m_ParamInfo[] = { CFG_DB_LCP_DISC_PAGES_TUP, "NoOfDiskPagesToDiskAfterRestartTUP", DB_TOKEN, - "?", - ConfigInfo::CI_USED, + "DiskCheckpointSpeed", + ConfigInfo::CI_DEPRICATED, true, ConfigInfo::CI_INT, "40", @@ -976,8 +976,8 @@ const ConfigInfo::ParamInfo ConfigInfo::m_ParamInfo[] = { CFG_DB_LCP_DISC_PAGES_ACC_SR, "NoOfDiskPagesToDiskDuringRestartACC", DB_TOKEN, - "?", - ConfigInfo::CI_USED, + "DiskCheckpointSpeedSr", + ConfigInfo::CI_DEPRICATED, true, ConfigInfo::CI_INT, "20", @@ -988,8 +988,8 @@ const ConfigInfo::ParamInfo ConfigInfo::m_ParamInfo[] = { CFG_DB_LCP_DISC_PAGES_ACC, "NoOfDiskPagesToDiskAfterRestartACC", DB_TOKEN, - "?", - ConfigInfo::CI_USED, + "DiskCheckpointSpeed", + ConfigInfo::CI_DEPRICATED, true, ConfigInfo::CI_INT, "20", @@ -1192,6 +1192,42 @@ const ConfigInfo::ParamInfo ConfigInfo::m_ParamInfo[] = { 0, 0 }, { + CFG_DB_DISK_SYNCH_SIZE, + "DiskSyncSize", + DB_TOKEN, + "Data written to a file before a synch is forced", + ConfigInfo::CI_USED, + false, + ConfigInfo::CI_INT, + "4M", + "32k", + STR_VALUE(MAX_INT_RNIL) }, + + { + CFG_DB_CHECKPOINT_SPEED, + "DiskCheckpointSpeed", + DB_TOKEN, + "Bytes per second allowed to be written by checkpoint", + ConfigInfo::CI_USED, + false, + ConfigInfo::CI_INT, + "10M", + "1M", + STR_VALUE(MAX_INT_RNIL) }, + + { + CFG_DB_CHECKPOINT_SPEED_SR, + "DiskCheckpointSpeedInRestart", + DB_TOKEN, + "Bytes per second allowed to be written by checkpoint during restart", + ConfigInfo::CI_USED, + false, + ConfigInfo::CI_INT, + "100M", + "1M", + STR_VALUE(MAX_INT_RNIL) }, + + { CFG_DB_BACKUP_MEM, "BackupMemory", DB_TOKEN, diff --git a/storage/ndb/src/mgmsrv/InitConfigFileParser.cpp b/storage/ndb/src/mgmsrv/InitConfigFileParser.cpp index 68a5f02f4c5..bf5cb9d726e 100644 --- a/storage/ndb/src/mgmsrv/InitConfigFileParser.cpp +++ b/storage/ndb/src/mgmsrv/InitConfigFileParser.cpp @@ -655,6 +655,18 @@ InitConfigFileParser::store_in_properties(Vector<struct my_option>& options, m_info->getMax(ctx.m_currentInfo, fname)); return false; } + + ConfigInfo::Status status = m_info->getStatus(ctx.m_currentInfo, fname); + if (status == ConfigInfo::CI_DEPRICATED) { + const char * desc = m_info->getDescription(ctx.m_currentInfo, fname); + if(desc && desc[0]){ + ctx.reportWarning("[%s] %s is depricated, use %s instead", + ctx.fname, fname, desc); + } else if (desc == 0){ + ctx.reportWarning("[%s] %s is depricated", ctx.fname, fname); + } + } + if (options[i].var_type == GET_INT) ctx.m_currentSection->put(options[i].name, (Uint32)value_int); else diff --git a/storage/ndb/src/mgmsrv/MgmtSrvr.cpp b/storage/ndb/src/mgmsrv/MgmtSrvr.cpp index 50a623920d2..a2a56905392 100644 --- a/storage/ndb/src/mgmsrv/MgmtSrvr.cpp +++ b/storage/ndb/src/mgmsrv/MgmtSrvr.cpp @@ -77,7 +77,6 @@ }\ } -extern int global_flag_send_heartbeat_now; extern int g_no_nodeid_checks; extern my_bool opt_core; @@ -1450,6 +1449,12 @@ MgmtSrvr::exitSingleUser(int * stopCount, bool abort) #include <ClusterMgr.hpp> +void +MgmtSrvr::updateStatus() +{ + theFacade->theClusterMgr->forceHB(); +} + int MgmtSrvr::status(int nodeId, ndb_mgm_node_status * _status, @@ -2260,7 +2265,7 @@ MgmtSrvr::alloc_node_id(NodeId * nodeId, if (found_matching_type && !found_free_node) { // we have a temporary error which might be due to that // we have got the latest connect status from db-nodes. Force update. - global_flag_send_heartbeat_now= 1; + updateStatus(); } BaseString type_string, type_c_string; @@ -2603,7 +2608,7 @@ MgmtSrvr::Allocated_resources::~Allocated_resources() if (!m_reserved_nodes.isclear()) { m_mgmsrv.m_reserved_nodes.bitANDC(m_reserved_nodes); // node has been reserved, force update signal to ndb nodes - global_flag_send_heartbeat_now= 1; + m_mgmsrv.updateStatus(); char tmp_str[128]; m_mgmsrv.m_reserved_nodes.getText(tmp_str); diff --git a/storage/ndb/src/mgmsrv/MgmtSrvr.hpp b/storage/ndb/src/mgmsrv/MgmtSrvr.hpp index 1473ec90c33..5eee7447c98 100644 --- a/storage/ndb/src/mgmsrv/MgmtSrvr.hpp +++ b/storage/ndb/src/mgmsrv/MgmtSrvr.hpp @@ -485,6 +485,8 @@ public: void get_connected_nodes(NodeBitmask &connected_nodes) const; SocketServer *get_socket_server() { return m_socket_server; } + void updateStatus(); + //************************************************************************** private: //************************************************************************** diff --git a/storage/ndb/src/mgmsrv/Services.cpp b/storage/ndb/src/mgmsrv/Services.cpp index cc7892f8b36..5132b343fbd 100644 --- a/storage/ndb/src/mgmsrv/Services.cpp +++ b/storage/ndb/src/mgmsrv/Services.cpp @@ -972,6 +972,7 @@ printNodeStatus(OutputStream *output, MgmtSrvr &mgmsrv, enum ndb_mgm_node_type type) { NodeId nodeId = 0; + mgmsrv.updateStatus(); while(mgmsrv.getNextNodeId(&nodeId, type)) { enum ndb_mgm_node_status status; Uint32 startPhase = 0, diff --git a/storage/ndb/src/ndbapi/ClusterMgr.cpp b/storage/ndb/src/ndbapi/ClusterMgr.cpp index 49815ae6c13..b171457c2a9 100644 --- a/storage/ndb/src/ndbapi/ClusterMgr.cpp +++ b/storage/ndb/src/ndbapi/ClusterMgr.cpp @@ -37,8 +37,8 @@ #include <mgmapi_configuration.hpp> #include <mgmapi_config_parameters.h> -int global_flag_send_heartbeat_now= 0; int global_flag_skip_invalidate_cache = 0; +//#define DEBUG_REG // Just a C wrapper for threadMain extern "C" @@ -68,6 +68,8 @@ ClusterMgr::ClusterMgr(TransporterFacade & _facade): DBUG_ENTER("ClusterMgr::ClusterMgr"); ndbSetOwnVersion(); clusterMgrThreadMutex = NdbMutex_Create(); + waitForHBCond= NdbCondition_Create(); + waitingForHB= false; noOfAliveNodes= 0; noOfConnectedNodes= 0; theClusterMgrThread= 0; @@ -79,7 +81,8 @@ ClusterMgr::ClusterMgr(TransporterFacade & _facade): ClusterMgr::~ClusterMgr() { DBUG_ENTER("ClusterMgr::~ClusterMgr"); - doStop(); + doStop(); + NdbCondition_Destroy(waitForHBCond); NdbMutex_Destroy(clusterMgrThreadMutex); DBUG_VOID_RETURN; } @@ -154,6 +157,70 @@ ClusterMgr::doStop( ){ } void +ClusterMgr::forceHB() +{ + theFacade.lock_mutex(); + + if(waitingForHB) + { + NdbCondition_WaitTimeout(waitForHBCond, theFacade.theMutexPtr, 1000); + theFacade.unlock_mutex(); + return; + } + + waitingForHB= true; + + NodeBitmask ndb_nodes; + ndb_nodes.clear(); + waitForHBFromNodes.clear(); + for(Uint32 i = 0; i < MAX_NODES; i++) + { + if(!theNodes[i].defined) + continue; + if(theNodes[i].m_info.m_type == NodeInfo::DB) + { + ndb_nodes.set(i); + const ClusterMgr::Node &node= getNodeInfo(i); + waitForHBFromNodes.bitOR(node.m_state.m_connected_nodes); + } + } + waitForHBFromNodes.bitAND(ndb_nodes); + +#ifdef DEBUG_REG + char buf[128]; + ndbout << "Waiting for HB from " << waitForHBFromNodes.getText(buf) << endl; +#endif + NdbApiSignal signal(numberToRef(API_CLUSTERMGR, theFacade.ownId())); + + signal.theVerId_signalNumber = GSN_API_REGREQ; + signal.theReceiversBlockNumber = QMGR; + signal.theTrace = 0; + signal.theLength = ApiRegReq::SignalLength; + + ApiRegReq * req = CAST_PTR(ApiRegReq, signal.getDataPtrSend()); + req->ref = numberToRef(API_CLUSTERMGR, theFacade.ownId()); + req->version = NDB_VERSION; + + int nodeId= 0; + for(int i=0; + NodeBitmask::NotFound!=(nodeId= waitForHBFromNodes.find(i)); + i= nodeId+1) + { +#ifdef DEBUG_REG + ndbout << "FORCE HB to " << nodeId << endl; +#endif + theFacade.sendSignalUnCond(&signal, nodeId); + } + + NdbCondition_WaitTimeout(waitForHBCond, theFacade.theMutexPtr, 1000); + waitingForHB= false; +#ifdef DEBUG_REG + ndbout << "Still waiting for HB from " << waitForHBFromNodes.getText(buf) << endl; +#endif + theFacade.unlock_mutex(); +} + +void ClusterMgr::threadMain( ){ NdbApiSignal signal(numberToRef(API_CLUSTERMGR, theFacade.ownId())); @@ -174,9 +241,6 @@ ClusterMgr::threadMain( ){ /** * Start of Secure area for use of Transporter */ - int send_heartbeat_now= global_flag_send_heartbeat_now; - global_flag_send_heartbeat_now= 0; - if (m_cluster_state == CS_waiting_for_clean_cache) { theFacade.m_globalDictCache.lock(); @@ -209,8 +273,7 @@ ClusterMgr::threadMain( ){ } theNode.hbCounter += timeSlept; - if (theNode.hbCounter >= theNode.hbFrequency || - send_heartbeat_now) { + if (theNode.hbCounter >= theNode.hbFrequency) { /** * It is now time to send a new Heartbeat */ @@ -219,7 +282,7 @@ ClusterMgr::threadMain( ){ theNode.hbCounter = 0; } -#if 0 +#ifdef DEBUG_REG ndbout_c("ClusterMgr: Sending API_REGREQ to node %d", (int)nodeId); #endif theFacade.sendSignalUnCond(&signal, nodeId); @@ -272,7 +335,7 @@ ClusterMgr::execAPI_REGREQ(const Uint32 * theData){ const ApiRegReq * const apiRegReq = (ApiRegReq *)&theData[0]; const NodeId nodeId = refToNode(apiRegReq->ref); -#if 0 +#ifdef DEBUG_REG ndbout_c("ClusterMgr: Recd API_REGREQ from node %d", nodeId); #endif @@ -313,7 +376,7 @@ ClusterMgr::execAPI_REGCONF(const Uint32 * theData){ const ApiRegConf * const apiRegConf = (ApiRegConf *)&theData[0]; const NodeId nodeId = refToNode(apiRegConf->qmgrRef); -#if 0 +#ifdef DEBUG_REG ndbout_c("ClusterMgr: Recd API_REGCONF from node %d", nodeId); #endif @@ -342,6 +405,17 @@ ClusterMgr::execAPI_REGCONF(const Uint32 * theData){ }//if node.m_info.m_heartbeat_cnt = 0; node.hbCounter = 0; + + if(waitingForHB) + { + waitForHBFromNodes.clear(nodeId); + + if(waitForHBFromNodes.isclear()) + { + waitingForHB= false; + NdbCondition_Broadcast(waitForHBCond); + } + } node.hbFrequency = (apiRegConf->apiHeartbeatFrequency * 10) - 50; } @@ -371,6 +445,10 @@ ClusterMgr::execAPI_REGREF(const Uint32 * theData){ default: break; } + + waitForHBFromNodes.clear(nodeId); + if(waitForHBFromNodes.isclear()) + NdbCondition_Signal(waitForHBCond); } void diff --git a/storage/ndb/src/ndbapi/ClusterMgr.hpp b/storage/ndb/src/ndbapi/ClusterMgr.hpp index ca879e7948e..20912938cf3 100644 --- a/storage/ndb/src/ndbapi/ClusterMgr.hpp +++ b/storage/ndb/src/ndbapi/ClusterMgr.hpp @@ -49,7 +49,9 @@ public: void doStop(); void startThread(); - + + void forceHB(); + private: void threadMain(); @@ -91,6 +93,11 @@ private: Uint32 noOfConnectedNodes; Node theNodes[MAX_NODES]; NdbThread* theClusterMgrThread; + + NodeBitmask waitForHBFromNodes; // used in forcing HBs + NdbCondition* waitForHBCond; + bool waitingForHB; + enum Cluster_state m_cluster_state; /** * Used for controlling start/stop of the thread diff --git a/storage/ndb/src/ndbapi/NdbBlob.cpp b/storage/ndb/src/ndbapi/NdbBlob.cpp index 081c7e2f995..4c4e9328894 100644 --- a/storage/ndb/src/ndbapi/NdbBlob.cpp +++ b/storage/ndb/src/ndbapi/NdbBlob.cpp @@ -1894,7 +1894,8 @@ int NdbBlob::atNextEvent() { DBUG_ENTER("NdbBlob::atNextEvent"); - Uint32 optype = theEventOp->m_data_item->sdata->operation; + Uint32 optype = + SubTableData::getOperation(theEventOp->m_data_item->sdata->requestInfo); DBUG_PRINT("info", ("this=%p op=%p blob op=%p version=%d optype=%u", this, theEventOp, theBlobEventOp, theEventBlobVersion, optype)); if (theState == Invalid) DBUG_RETURN(-1); diff --git a/storage/ndb/src/ndbapi/NdbDictionaryImpl.cpp b/storage/ndb/src/ndbapi/NdbDictionaryImpl.cpp index 5de686a257f..6c8a447f627 100644 --- a/storage/ndb/src/ndbapi/NdbDictionaryImpl.cpp +++ b/storage/ndb/src/ndbapi/NdbDictionaryImpl.cpp @@ -2545,6 +2545,9 @@ loop: else { // error set by get filegroup + if (m_error.code == 723) + m_error.code = 755; + NdbMem_Free((void*)tmpTab); DBUG_RETURN(-1); } diff --git a/storage/ndb/src/ndbapi/NdbEventOperation.cpp b/storage/ndb/src/ndbapi/NdbEventOperation.cpp index 4e96ee63565..cbcab9b2641 100644 --- a/storage/ndb/src/ndbapi/NdbEventOperation.cpp +++ b/storage/ndb/src/ndbapi/NdbEventOperation.cpp @@ -192,12 +192,12 @@ void * NdbEventOperation::getCustomData() const int NdbEventOperation::getReqNodeId() const { - return m_impl.m_data_item->sdata->req_nodeid; + return SubTableData::getReqNodeId(m_impl.m_data_item->sdata->requestInfo); } int NdbEventOperation::getNdbdNodeId() const { - return m_impl.m_data_item->sdata->ndbd_nodeid; + return SubTableData::getNdbdNodeId(m_impl.m_data_item->sdata->requestInfo); } /* diff --git a/storage/ndb/src/ndbapi/NdbEventOperationImpl.cpp b/storage/ndb/src/ndbapi/NdbEventOperationImpl.cpp index d3992981522..9f2a5844476 100644 --- a/storage/ndb/src/ndbapi/NdbEventOperationImpl.cpp +++ b/storage/ndb/src/ndbapi/NdbEventOperationImpl.cpp @@ -54,7 +54,8 @@ static const Uint32 ACTIVE_GCI_MASK = ACTIVE_GCI_DIRECTORY_SIZE - 1; static void print_std(const SubTableData * sdata, LinearSectionPtr ptr[3]) { - printf("addr=%p gci=%d op=%d\n", (void*)sdata, sdata->gci, sdata->operation); + printf("addr=%p gci=%d op=%d\n", (void*)sdata, sdata->gci, + SubTableData::getOperation(sdata->requestInfo)); for (int i = 0; i <= 2; i++) { printf("sec=%d addr=%p sz=%d\n", i, (void*)ptr[i].p, ptr[i].sz); for (int j = 0; j < ptr[i].sz; j++) @@ -672,7 +673,8 @@ NdbEventOperationImpl::execSUB_TABLE_DATA(NdbApiSignal * signal, int NdbEventOperationImpl::receive_event() { - Uint32 operation= (Uint32)m_data_item->sdata->operation; + Uint32 operation= + SubTableData::getOperation(m_data_item->sdata->requestInfo); if (unlikely(operation >= NdbDictionary::Event::_TE_FIRST_NON_DATA_EVENT)) { DBUG_ENTER("NdbEventOperationImpl::receive_event"); @@ -869,7 +871,7 @@ NdbDictionary::Event::TableEvent NdbEventOperationImpl::getEventType() { return (NdbDictionary::Event::TableEvent) - (1 << (unsigned)m_data_item->sdata->operation); + (1 << SubTableData::getOperation(m_data_item->sdata->requestInfo)); } @@ -1192,7 +1194,8 @@ NdbEventBuffer::nextEvent() } assert(gci_ops && (op->getGCI() == gci_ops->m_gci)); // to return TE_NUL it should be made into data event - if (data->sdata->operation == NdbDictionary::Event::_TE_NUL) + if (SubTableData::getOperation(data->sdata->requestInfo) == + NdbDictionary::Event::_TE_NUL) { DBUG_PRINT_EVENT("info", ("skip _TE_NUL")); continue; @@ -1559,9 +1562,11 @@ NdbEventBuffer::report_node_connected(Uint32 node_id) bzero(ptr, sizeof(ptr)); data.tableId = ~0; - data.operation = NdbDictionary::Event::_TE_ACTIVE; - data.req_nodeid = (Uint8)node_id; - data.ndbd_nodeid = (Uint8)node_id; + data.requestInfo = 0; + SubTableData::setOperation(data.requestInfo, + NdbDictionary::Event::_TE_ACTIVE); + SubTableData::setReqNodeId(data.requestInfo, node_id); + SubTableData::setNdbdNodeId(data.requestInfo, node_id); data.logType = SubTableData::LOG; data.gci = m_latestGCI + 1; /** @@ -1599,9 +1604,11 @@ NdbEventBuffer::report_node_failure(Uint32 node_id) bzero(ptr, sizeof(ptr)); data.tableId = ~0; - data.operation = NdbDictionary::Event::_TE_NODE_FAILURE; - data.req_nodeid = (Uint8)node_id; - data.ndbd_nodeid = (Uint8)node_id; + data.requestInfo = 0; + SubTableData::setOperation(data.requestInfo, + NdbDictionary::Event::_TE_NODE_FAILURE); + SubTableData::setReqNodeId(data.requestInfo, node_id); + SubTableData::setNdbdNodeId(data.requestInfo, node_id); data.logType = SubTableData::LOG; data.gci = m_latestGCI + 1; /** @@ -1639,7 +1646,9 @@ NdbEventBuffer::completeClusterFailed() bzero(ptr, sizeof(ptr)); data.tableId = ~0; - data.operation = NdbDictionary::Event::_TE_CLUSTER_FAILURE; + data.requestInfo = 0; + SubTableData::setOperation(data.requestInfo, + NdbDictionary::Event::_TE_CLUSTER_FAILURE); data.logType = SubTableData::LOG; data.gci = m_latestGCI + 1; @@ -1742,19 +1751,21 @@ NdbEventBuffer::insertDataL(NdbEventOperationImpl *op, LinearSectionPtr ptr[3]) { DBUG_ENTER_EVENT("NdbEventBuffer::insertDataL"); + const Uint32 ri = sdata->requestInfo; + const Uint32 operation = SubTableData::getOperation(ri); Uint64 gci= sdata->gci; - const bool is_data_event = - sdata->operation < NdbDictionary::Event::_TE_FIRST_NON_DATA_EVENT; + const bool is_data_event = + operation < NdbDictionary::Event::_TE_FIRST_NON_DATA_EVENT; if (!is_data_event) { - switch (sdata->operation) + switch (operation) { case NdbDictionary::Event::_TE_NODE_FAILURE: - op->m_node_bit_mask.clear(sdata->ndbd_nodeid); + op->m_node_bit_mask.clear(SubTableData::getNdbdNodeId(ri)); break; case NdbDictionary::Event::_TE_ACTIVE: - op->m_node_bit_mask.set(sdata->ndbd_nodeid); + op->m_node_bit_mask.set(SubTableData::getNdbdNodeId(ri)); // internal event, do not relay to user DBUG_RETURN_EVENT(0); break; @@ -1765,7 +1776,7 @@ NdbEventBuffer::insertDataL(NdbEventOperationImpl *op, DBUG_PRINT("info", ("m_ref_count: %u for op: %p", op->m_ref_count, op)); break; case NdbDictionary::Event::_TE_STOP: - op->m_node_bit_mask.clear(sdata->ndbd_nodeid); + op->m_node_bit_mask.clear(SubTableData::getNdbdNodeId(ri)); if (op->m_node_bit_mask.isclear()) { DBUG_ASSERT(op->m_ref_count > 0); @@ -1777,19 +1788,20 @@ NdbEventBuffer::insertDataL(NdbEventOperationImpl *op, break; } } - - if ( likely((Uint32)op->mi_type & (1 << (Uint32)sdata->operation)) ) + + if ( likely((Uint32)op->mi_type & (1 << operation))) { Gci_container* bucket= find_bucket(&m_active_gci, gci #ifdef VM_TRACE , m_flush_gci #endif ); - + DBUG_PRINT_EVENT("info", ("data insertion in eventId %d", op->m_eventId)); DBUG_PRINT_EVENT("info", ("gci=%d tab=%d op=%d node=%d", - sdata->gci, sdata->tableId, sdata->operation, - sdata->req_nodeid)); + sdata->gci, sdata->tableId, + SubTableData::getOperation(sdata->requestInfo), + SubTableData::getReqNodeId(sdata->requestInfo))); if (unlikely(bucket == 0)) { @@ -1809,7 +1821,7 @@ NdbEventBuffer::insertDataL(NdbEventOperationImpl *op, DBUG_PRINT_EVENT("info", ("ignore non-data event on blob table")); DBUG_RETURN_EVENT(0); } - + // find position in bucket hash table EventBufData* data = 0; EventBufData_hash::Pos hpos; @@ -1818,7 +1830,7 @@ NdbEventBuffer::insertDataL(NdbEventOperationImpl *op, bucket->m_data_hash.search(hpos, op, ptr); data = hpos.data; } - + if (data == 0) { // allocate new result buffer @@ -1886,20 +1898,22 @@ NdbEventBuffer::insertDataL(NdbEventOperationImpl *op, // since the flags represent multiple ops on multiple PKs // XXX fix by doing merge at end of epoch (extra mem cost) { - EventBufData_list::Gci_op g = { op, (1 << sdata->operation) }; + EventBufData_list::Gci_op g = { op, (1 << operation) }; bucket->m_data.add_gci_op(g); } { - EventBufData_list::Gci_op g = { op, (1 << data->sdata->operation) }; + EventBufData_list::Gci_op + g = { op, + (1 << SubTableData::getOperation(data->sdata->requestInfo))}; bucket->m_data.add_gci_op(g); } } } DBUG_RETURN_EVENT(0); } - + #ifdef VM_TRACE - if ((Uint32)op->m_eventImpl->mi_type & (1 << (Uint32)sdata->operation)) + if ((Uint32)op->m_eventImpl->mi_type & (1 << operation)) { DBUG_PRINT_EVENT("info",("Data arrived before ready eventId", op->m_eventId)); DBUG_RETURN_EVENT(0); @@ -2085,8 +2099,8 @@ NdbEventBuffer::merge_data(const SubTableData * const sdata, Uint32 nkey = data->m_event_op->m_eventImpl->m_tableImpl->m_noOfKeys; - int t1 = data->sdata->operation; - int t2 = sdata->operation; + int t1 = SubTableData::getOperation(data->sdata->requestInfo); + int t2 = SubTableData::getOperation(sdata->requestInfo); if (t1 == Ev_t::enum_NUL) DBUG_RETURN_EVENT(copy_data(sdata, ptr2, data)); @@ -2150,7 +2164,7 @@ NdbEventBuffer::merge_data(const SubTableData * const sdata, goto end; } *data->sdata = *sdata; - data->sdata->operation = tp->t3; + SubTableData::setOperation(data->sdata->requestInfo, tp->t3); } ptr[0].sz = ptr[1].sz = ptr[2].sz = 0; @@ -2342,7 +2356,7 @@ NdbEventBuffer::get_main_data(Gci_container* bucket, DBUG_RETURN_EVENT(-1); SubTableData sdata = *blob_data->sdata; sdata.tableId = main_op->m_eventImpl->m_tableImpl->m_id; - sdata.operation = NdbDictionary::Event::_TE_NUL; + SubTableData::setOperation(sdata.requestInfo, NdbDictionary::Event::_TE_NUL); if (copy_data(&sdata, ptr, main_data) != 0) DBUG_RETURN_EVENT(-1); hpos.data = main_data; diff --git a/storage/ndb/src/ndbapi/NdbEventOperationImpl.hpp b/storage/ndb/src/ndbapi/NdbEventOperationImpl.hpp index 1444d182042..0d6618a7365 100644 --- a/storage/ndb/src/ndbapi/NdbEventOperationImpl.hpp +++ b/storage/ndb/src/ndbapi/NdbEventOperationImpl.hpp @@ -225,7 +225,8 @@ void EventBufData_list::append_used_data(EventBufData *data) inline void EventBufData_list::append_data(EventBufData *data) { - Gci_op g = { data->m_event_op, 1 << (Uint32)data->sdata->operation }; + Gci_op g = { data->m_event_op, + 1 << SubTableData::getOperation(data->sdata->requestInfo) }; add_gci_op(g); append_used_data(data); diff --git a/storage/ndb/src/ndbapi/NdbScanOperation.cpp b/storage/ndb/src/ndbapi/NdbScanOperation.cpp index 21caf8349b6..21e4f215c65 100644 --- a/storage/ndb/src/ndbapi/NdbScanOperation.cpp +++ b/storage/ndb/src/ndbapi/NdbScanOperation.cpp @@ -496,6 +496,7 @@ int NdbScanOperation::nextResultImpl(bool fetchAllowed, bool forceSend) idx = m_current_api_receiver; last = m_api_receivers_count; + Uint32 timeout = tp->m_waitfor_timeout; do { if(theError.code){ @@ -521,8 +522,7 @@ int NdbScanOperation::nextResultImpl(bool fetchAllowed, bool forceSend) /** * No completed... */ - int ret_code= poll_guard.wait_scan(WAITFOR_SCAN_TIMEOUT, nodeId, - forceSend); + int ret_code= poll_guard.wait_scan(3*timeout, nodeId, forceSend); if (ret_code == 0 && seq == tp->getNodeSequence(nodeId)) { continue; } else { @@ -1425,13 +1425,13 @@ NdbIndexScanOperation::next_result_ordered(bool fetchAllowed, return -1; Uint32 seq = theNdbCon->theNodeSequence; Uint32 nodeId = theNdbCon->theDBnode; + Uint32 timeout = tp->m_waitfor_timeout; if(seq == tp->getNodeSequence(nodeId) && !send_next_scan_ordered(s_idx)){ Uint32 tmp = m_sent_receivers_count; s_idx = m_current_api_receiver; while(m_sent_receivers_count > 0 && !theError.code){ - int ret_code= poll_guard.wait_scan(WAITFOR_SCAN_TIMEOUT, nodeId, - forceSend); + int ret_code= poll_guard.wait_scan(3*timeout, nodeId, forceSend); if (ret_code == 0 && seq == tp->getNodeSequence(nodeId)) { continue; } @@ -1574,12 +1574,13 @@ NdbScanOperation::close_impl(TransporterFacade* tp, bool forceSend, return -1; } + Uint32 timeout = tp->m_waitfor_timeout; /** * Wait for outstanding */ while(theError.code == 0 && m_sent_receivers_count) { - int return_code= poll_guard->wait_scan(WAITFOR_SCAN_TIMEOUT, nodeId, forceSend); + int return_code= poll_guard->wait_scan(3*timeout, nodeId, forceSend); switch(return_code){ case 0: break; @@ -1647,8 +1648,7 @@ NdbScanOperation::close_impl(TransporterFacade* tp, bool forceSend, */ while(m_sent_receivers_count+m_api_receivers_count+m_conf_receivers_count) { - int return_code= poll_guard->wait_scan(WAITFOR_SCAN_TIMEOUT, nodeId, - forceSend); + int return_code= poll_guard->wait_scan(3*timeout, nodeId, forceSend); switch(return_code){ case 0: break; diff --git a/storage/ndb/src/ndbapi/Ndbif.cpp b/storage/ndb/src/ndbapi/Ndbif.cpp index 0527744afe1..5683ebe2e6f 100644 --- a/storage/ndb/src/ndbapi/Ndbif.cpp +++ b/storage/ndb/src/ndbapi/Ndbif.cpp @@ -742,22 +742,27 @@ Ndb::handleReceivedSignal(NdbApiSignal* aSignal, LinearSectionPtr ptr[3]) const Uint32 oid = sdata->senderData; NdbEventOperationImpl *op= (NdbEventOperationImpl*)int2void(oid); - if (op->m_magic_number != NDB_EVENT_OP_MAGIC_NUMBER) + if (unlikely(op == 0 || op->m_magic_number != NDB_EVENT_OP_MAGIC_NUMBER)) + { g_eventLogger.error("dropped GSN_SUB_TABLE_DATA due to wrong magic " "number"); + return ; + } // Accumulate DIC_TAB_INFO for TE_ALTER events - if (sdata->operation == NdbDictionary::Event::_TE_ALTER && + if (SubTableData::getOperation(sdata->requestInfo) == + NdbDictionary::Event::_TE_ALTER && !op->execSUB_TABLE_DATA(aSignal, ptr)) return; - + for (int i= aSignal->m_noOfSections;i < 3; i++) { ptr[i].p = NULL; ptr[i].sz = 0; } DBUG_PRINT("info",("oid=senderData: %d, gci: %d, operation: %d, " "tableId: %d", - sdata->senderData, sdata->gci, sdata->operation, + sdata->senderData, sdata->gci, + SubTableData::getOperation(sdata->requestInfo), sdata->tableId)); theEventBuffer->insertDataL(op,sdata, ptr); diff --git a/storage/ndb/src/ndbapi/TransporterFacade.cpp b/storage/ndb/src/ndbapi/TransporterFacade.cpp index 2f421271e91..8d0693f17a7 100644 --- a/storage/ndb/src/ndbapi/TransporterFacade.cpp +++ b/storage/ndb/src/ndbapi/TransporterFacade.cpp @@ -1265,6 +1265,7 @@ TransporterFacade::get_an_alive_node() } TransporterFacade::ThreadData::ThreadData(Uint32 size){ + m_use_cnt = 0; m_firstFree = END_OF_LIST; expand(size); } @@ -1302,6 +1303,7 @@ TransporterFacade::ThreadData::open(void* objRef, nextFree = m_firstFree; } + m_use_cnt++; m_firstFree = m_statusNext[nextFree]; Object_Execute oe = { objRef , fun }; @@ -1318,6 +1320,8 @@ TransporterFacade::ThreadData::close(int number){ number= numberToIndex(number); assert(getInUse(number)); m_statusNext[number] = m_firstFree; + assert(m_use_cnt); + m_use_cnt--; m_firstFree = number; Object_Execute oe = { 0, 0 }; m_objectExecute[number] = oe; @@ -1325,6 +1329,12 @@ TransporterFacade::ThreadData::close(int number){ return 0; } +Uint32 +TransporterFacade::get_active_ndb_objects() const +{ + return m_threads.m_use_cnt; +} + PollGuard::PollGuard(TransporterFacade *tp, NdbWaiter *aWaiter, Uint32 block_no) { diff --git a/storage/ndb/src/ndbapi/TransporterFacade.hpp b/storage/ndb/src/ndbapi/TransporterFacade.hpp index 2d47a2febf8..e444b7e77bb 100644 --- a/storage/ndb/src/ndbapi/TransporterFacade.hpp +++ b/storage/ndb/src/ndbapi/TransporterFacade.hpp @@ -68,6 +68,7 @@ public: // Close this block number int close(BlockNumber blockNumber, Uint64 trans_id); + Uint32 get_active_ndb_objects() const; // Only sends to nodes which are alive int sendSignal(NdbApiSignal * signal, NodeId nodeId); @@ -240,6 +241,7 @@ private: NodeStatusFunction m_statusFunction; }; + Uint32 m_use_cnt; Uint32 m_firstFree; Vector<Uint32> m_statusNext; Vector<Object_Execute> m_objectExecute; diff --git a/storage/ndb/src/ndbapi/ndb_cluster_connection.cpp b/storage/ndb/src/ndbapi/ndb_cluster_connection.cpp index 12264a60082..b7c43bf81c9 100644 --- a/storage/ndb/src/ndbapi/ndb_cluster_connection.cpp +++ b/storage/ndb/src/ndbapi/ndb_cluster_connection.cpp @@ -664,5 +664,10 @@ Ndb_cluster_connection::get_next_node(Ndb_cluster_connection_node_iter &iter) return m_impl.get_next_node(iter); } +unsigned +Ndb_cluster_connection::get_active_ndb_objects() const +{ + return m_impl.m_transporter_facade->get_active_ndb_objects(); +} template class Vector<Ndb_cluster_connection_impl::Node>; diff --git a/storage/ndb/src/ndbapi/ndberror.c b/storage/ndb/src/ndbapi/ndberror.c index d0d26c19cfa..c3874cbf8eb 100644 --- a/storage/ndb/src/ndbapi/ndberror.c +++ b/storage/ndb/src/ndbapi/ndberror.c @@ -376,7 +376,7 @@ ErrorBundle ErrorCodes[] = { { 752, DMEC, SE, "Invalid file format" }, { 753, IE, SE, "Invalid filegroup for file" }, { 754, IE, SE, "Invalid filegroup version when creating file" }, - { 755, DMEC, SE, "Invalid tablespace" }, + { 755, HA_WRONG_CREATE_OPTION, SE, "Invalid tablespace" }, { 756, DMEC, SE, "Index on disk column is not supported" }, { 757, DMEC, SE, "Varsize bitfield not supported" }, { 758, DMEC, SE, "Tablespace has changed" }, diff --git a/storage/ndb/test/include/HugoTransactions.hpp b/storage/ndb/test/include/HugoTransactions.hpp index 5c987a576bc..f9acaf322f7 100644 --- a/storage/ndb/test/include/HugoTransactions.hpp +++ b/storage/ndb/test/include/HugoTransactions.hpp @@ -34,7 +34,8 @@ public: bool allowConstraintViolation = true, int doSleep = 0, bool oneTrans = false, - int updateValue = 0); + int updateValue = 0, + bool abort = false); int scanReadRecords(Ndb*, int records, diff --git a/storage/ndb/test/include/NDBT_Tables.hpp b/storage/ndb/test/include/NDBT_Tables.hpp index 0b4a9f8bb09..75c81c70f1d 100644 --- a/storage/ndb/test/include/NDBT_Tables.hpp +++ b/storage/ndb/test/include/NDBT_Tables.hpp @@ -44,6 +44,8 @@ public: static const NdbDictionary::Table* getTable(int _num); static int getNumTables(); + static const char** getIndexes(const char* table); + static int create_default_tablespace(Ndb* pNdb); private: diff --git a/storage/ndb/test/include/NdbRestarter.hpp b/storage/ndb/test/include/NdbRestarter.hpp index 403e21ab62e..37b03e187e0 100644 --- a/storage/ndb/test/include/NdbRestarter.hpp +++ b/storage/ndb/test/include/NdbRestarter.hpp @@ -38,16 +38,16 @@ public: bool abort = false); int startAll(); - int startNodes(int * _nodes, int _num_nodes); + int startNodes(const int * _nodes, int _num_nodes); int waitClusterStarted(unsigned int _timeout = 120); int waitClusterSingleUser(unsigned int _timeout = 120); int waitClusterStartPhase(int _startphase, unsigned int _timeout = 120); int waitClusterNoStart(unsigned int _timeout = 120); - int waitNodesStarted(int * _nodes, int _num_nodes, + int waitNodesStarted(const int * _nodes, int _num_nodes, unsigned int _timeout = 120); - int waitNodesStartPhase(int * _nodes, int _num_nodes, + int waitNodesStartPhase(const int * _nodes, int _num_nodes, int _startphase, unsigned int _timeout = 120); - int waitNodesNoStart(int * _nodes, int _num_nodes, + int waitNodesNoStart(const int * _nodes, int _num_nodes, unsigned int _timeout = 120); @@ -58,8 +58,8 @@ public: int enterSingleUserMode(int _nodeId); int exitSingleUserMode(); - int dumpStateOneNode(int _nodeId, int * _args, int _num_args); - int dumpStateAllNodes(int * _args, int _num_args); + int dumpStateOneNode(int _nodeId, const int * _args, int _num_args); + int dumpStateAllNodes(const int * _args, int _num_args); int getMasterNodeId(); int getRandomNodeSameNodeGroup(int nodeId, int randomNumber); @@ -74,7 +74,7 @@ protected: unsigned int _timeout, int _startphase = -1); - int waitNodesState(int * _nodes, int _num_nodes, + int waitNodesState(const int * _nodes, int _num_nodes, ndb_mgm_node_status _status, unsigned int _timeout, int _startphase = -1); diff --git a/storage/ndb/test/ndbapi/testDict.cpp b/storage/ndb/test/ndbapi/testDict.cpp index 2972123cbbe..9bc085aa48d 100644 --- a/storage/ndb/test/ndbapi/testDict.cpp +++ b/storage/ndb/test/ndbapi/testDict.cpp @@ -1022,8 +1022,8 @@ int verifyTablesAreEqual(const NdbDictionary::Table* pTab, const NdbDictionary:: if (!pTab->equal(*pTab2)){ g_err << "equal failed" << endl; - g_info << *pTab; - g_info << *pTab2; + g_info << *(NDBT_Table*)pTab; // gcc-4.1.2 + g_info << *(NDBT_Table*)pTab2; return NDBT_FAILED; } return NDBT_OK; @@ -1033,7 +1033,7 @@ int runGetPrimaryKey(NDBT_Context* ctx, NDBT_Step* step){ Ndb* pNdb = GETNDB(step); const NdbDictionary::Table* pTab = ctx->getTab(); ndbout << "|- " << pTab->getName() << endl; - g_info << *pTab; + g_info << *(NDBT_Table*)pTab; // Try to create table in db if (pTab->createTableInDb(pNdb) != 0){ return NDBT_FAILED; @@ -1985,7 +1985,53 @@ runDictOps(NDBT_Context* ctx, NDBT_Step* step) // replace by the Retrieved table pTab = pTab2; - int records = ctx->getNumRecords(); + // create indexes + const char** indlist = NDBT_Tables::getIndexes(tabName); + uint indnum = 0; + while (*indlist != 0) { + uint count = 0; + try_create_index: + count++; + if (count == 1) + g_info << "2: create index " << indnum << " " << *indlist << endl; + NdbDictionary::Index ind; + char indName[200]; + sprintf(indName, "%s_X%u", tabName, indnum); + ind.setName(indName); + ind.setTable(tabName); + if (strcmp(*indlist, "UNIQUE") == 0) { + ind.setType(NdbDictionary::Index::UniqueHashIndex); + ind.setLogging(pTab->getLogging()); + } else if (strcmp(*indlist, "ORDERED") == 0) { + ind.setType(NdbDictionary::Index::OrderedIndex); + ind.setLogging(false); + } else { + assert(false); + } + const char** indtemp = indlist; + while (*++indtemp != 0) { + ind.addColumn(*indtemp); + } + if (pDic->createIndex(ind) != 0) { + const NdbError err = pDic->getNdbError(); + if (count == 1) + g_err << "2: " << indName << ": create failed: " << err << endl; + if (err.code != 711) { + result = NDBT_FAILED; + break; + } + NdbSleep_MilliSleep(myRandom48(maxsleep)); + goto try_create_index; + } + indlist = ++indtemp; + indnum++; + } + if (result == NDBT_FAILED) + break; + + uint indcount = indnum; + + int records = myRandom48(ctx->getNumRecords()); g_info << "2: load " << records << " records" << endl; HugoTransactions hugoTrans(*pTab); if (hugoTrans.loadTable(pNdb, records) != 0) { @@ -1996,6 +2042,32 @@ runDictOps(NDBT_Context* ctx, NDBT_Step* step) } NdbSleep_MilliSleep(myRandom48(maxsleep)); + // drop indexes + indnum = 0; + while (indnum < indcount) { + uint count = 0; + try_drop_index: + count++; + if (count == 1) + g_info << "2: drop index " << indnum << endl; + char indName[200]; + sprintf(indName, "%s_X%u", tabName, indnum); + if (pDic->dropIndex(indName, tabName) != 0) { + const NdbError err = pDic->getNdbError(); + if (count == 1) + g_err << "2: " << indName << ": drop failed: " << err << endl; + if (err.code != 711) { + result = NDBT_FAILED; + break; + } + NdbSleep_MilliSleep(myRandom48(maxsleep)); + goto try_drop_index; + } + indnum++; + } + if (result == NDBT_FAILED) + break; + g_info << "2: drop" << endl; { uint count = 0; diff --git a/storage/ndb/test/ndbapi/testIndex.cpp b/storage/ndb/test/ndbapi/testIndex.cpp index 5785db232c4..c25aae55897 100644 --- a/storage/ndb/test/ndbapi/testIndex.cpp +++ b/storage/ndb/test/ndbapi/testIndex.cpp @@ -1199,6 +1199,48 @@ int runLQHKEYREF(NDBT_Context* ctx, NDBT_Step* step){ return NDBT_OK; } +int +runBug21384(NDBT_Context* ctx, NDBT_Step* step) +{ + Ndb* pNdb = GETNDB(step); + HugoTransactions hugoTrans(*ctx->getTab()); + NdbRestarter restarter; + + int loops = ctx->getNumLoops(); + const int rows = ctx->getNumRecords(); + const int batchsize = ctx->getProperty("BatchSize", 50); + + while (loops--) + { + if(restarter.insertErrorInAllNodes(8037) != 0) + { + g_err << "Failed to error insert(8037)" << endl; + return NDBT_FAILED; + } + + if (hugoTrans.indexReadRecords(pNdb, pkIdxName, rows, batchsize) == 0) + { + g_err << "Index succeded (it should have failed" << endl; + return NDBT_FAILED; + } + + if(restarter.insertErrorInAllNodes(0) != 0) + { + g_err << "Failed to error insert(0)" << endl; + return NDBT_FAILED; + } + + if (hugoTrans.indexReadRecords(pNdb, pkIdxName, rows, batchsize) != 0){ + g_err << "Index read failed" << endl; + return NDBT_FAILED; + } + } + + return NDBT_OK; +} + + + NDBT_TESTSUITE(testIndex); TESTCASE("CreateAll", "Test that we can create all various indexes on each table\n" @@ -1512,6 +1554,16 @@ TESTCASE("UniqueNull", FINALIZER(createPkIndex_Drop); FINALIZER(runClearTable); } +TESTCASE("Bug21384", + "Test that unique indexes and nulls"){ + TC_PROPERTY("LoggedIndexes", (unsigned)0); + INITIALIZER(runClearTable); + INITIALIZER(createPkIndex); + INITIALIZER(runLoadTable); + STEP(runBug21384); + FINALIZER(createPkIndex_Drop); + FINALIZER(runClearTable); +} NDBT_TESTSUITE_END(testIndex); int main(int argc, const char** argv){ diff --git a/storage/ndb/test/ndbapi/testNodeRestart.cpp b/storage/ndb/test/ndbapi/testNodeRestart.cpp index ad1ea5ed6f2..444ed5c9eb4 100644 --- a/storage/ndb/test/ndbapi/testNodeRestart.cpp +++ b/storage/ndb/test/ndbapi/testNodeRestart.cpp @@ -931,6 +931,41 @@ retry: return NDBT_OK; } +int +runBug21271(NDBT_Context* ctx, NDBT_Step* step){ + int result = NDBT_OK; + int loops = ctx->getNumLoops(); + int records = ctx->getNumRecords(); + NdbRestarter restarter; + HugoOperations hugoOps(*ctx->getTab()); + Ndb* pNdb = GETNDB(step); + + const int masterNode = restarter.getMasterNodeId(); + const int nodeId = restarter.getRandomNodeSameNodeGroup(masterNode, rand()); + + int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; + if (restarter.dumpStateOneNode(nodeId, val2, 2)) + return NDBT_FAILED; + + Uint32 tableId = ctx->getTab()->getTableId(); + int dump[] = { DumpStateOrd::LqhErrorInsert5042, 0, 5044 }; + dump[1] = tableId; + + if (restarter.dumpStateOneNode(nodeId, dump, 3)) + return NDBT_FAILED; + + restarter.waitNodesNoStart(&nodeId, 1); + ctx->stopTest(); + + restarter.startNodes(&nodeId, 1); + + if (restarter.waitClusterStarted() != 0) + return NDBT_FAILED; + + return NDBT_OK; + return NDBT_OK; +} + NDBT_TESTSUITE(testNodeRestart); TESTCASE("NoLoad", @@ -1244,6 +1279,13 @@ TESTCASE("Bug20185", STEP(runBug20185); FINALIZER(runClearTable); } +TESTCASE("Bug21271", + ""){ + INITIALIZER(runLoadTable); + STEP(runBug21271); + STEP(runPkUpdateUntilStopped); + FINALIZER(runClearTable); +} NDBT_TESTSUITE_END(testNodeRestart); int main(int argc, const char** argv){ diff --git a/storage/ndb/test/ndbapi/testSRBank.cpp b/storage/ndb/test/ndbapi/testSRBank.cpp index 22fd45bc255..e42a372555d 100644 --- a/storage/ndb/test/ndbapi/testSRBank.cpp +++ b/storage/ndb/test/ndbapi/testSRBank.cpp @@ -124,6 +124,7 @@ int runBankSum(NDBT_Context* ctx, NDBT_Step* step){ result = NDBT_FAILED; \ continue; } +static int restart_cluster(NDBT_Context* ctx, NDBT_Step* step, NdbRestarter& restarter) { @@ -177,6 +178,7 @@ restart_cluster(NDBT_Context* ctx, NDBT_Step* step, NdbRestarter& restarter) return result; } +static ndb_mgm_node_state* select_node_to_stop(Vector<ndb_mgm_node_state>& nodes) { @@ -215,6 +217,7 @@ select_node_to_stop(Vector<ndb_mgm_node_state>& nodes) } } +static ndb_mgm_node_state* select_node_to_start(Vector<ndb_mgm_node_state>& nodes) { @@ -294,23 +297,27 @@ loop: goto loop; if (action == AA_RestartNode) - { g_err << "Restarting " << node->node_id << endl; - if (restarter.restartOneDbNode(node->node_id, false, false, true)) - return NDBT_FAILED; - } - if (action == AA_StopNode) - { + else g_err << "Stopping " << node->node_id << endl; - if (restarter.restartOneDbNode(node->node_id, false, true, true)) - return NDBT_FAILED; - node->node_status = NDB_MGM_NODE_STATUS_NOT_STARTED; - } - break; + + if (restarter.restartOneDbNode(node->node_id, false, true, true)) + return NDBT_FAILED; + + if (restarter.waitNodesNoStart(&node->node_id, 1)) + return NDBT_FAILED; + + node->node_status = NDB_MGM_NODE_STATUS_NOT_STARTED; + + if (action == AA_StopNode) + break; + else + goto start; } case AA_StartNode: if ((node = select_node_to_start(nodes)) == 0) goto loop; + start: g_err << "Starting " << node->node_id << endl; if (restarter.startNodes(&node->node_id, 1)) return NDBT_FAILED; @@ -321,7 +328,26 @@ loop: break; } } + + Vector<int> not_started; + { + ndb_mgm_node_state* node = 0; + while((node = select_node_to_start(nodes))) + { + not_started.push_back(node->node_id); + node->node_status = NDB_MGM_NODE_STATUS_STARTED; + } + } + if (not_started.size()) + { + g_err << "Starting stopped nodes " << endl; + if (restarter.startNodes(not_started.getBase(), not_started.size())) + return NDBT_FAILED; + if (restarter.waitClusterStarted()) + return NDBT_FAILED; + } + ctx->stopTest(); return NDBT_OK; } diff --git a/storage/ndb/test/ndbapi/testSystemRestart.cpp b/storage/ndb/test/ndbapi/testSystemRestart.cpp index 30f7aca9b06..8a0100ff3e4 100644 --- a/storage/ndb/test/ndbapi/testSystemRestart.cpp +++ b/storage/ndb/test/ndbapi/testSystemRestart.cpp @@ -1121,6 +1121,46 @@ int runClearTable(NDBT_Context* ctx, NDBT_Step* step){ return NDBT_OK; } +int +runBug21536(NDBT_Context* ctx, NDBT_Step* step) +{ + NdbRestarter restarter; + const Uint32 nodeCount = restarter.getNumDbNodes(); + if(nodeCount != 2){ + g_info << "Bug21536 - 2 nodes to test" << endl; + return NDBT_OK; + } + + int node1 = restarter.getDbNodeId(rand() % nodeCount); + int node2 = restarter.getRandomNodeSameNodeGroup(node1, rand()); + + if (node1 == -1 || node2 == -1) + return NDBT_OK; + + int result = NDBT_OK; + do { + CHECK(restarter.restartOneDbNode(node1, false, true, true) == 0); + CHECK(restarter.waitNodesNoStart(&node1, 1) == 0); + CHECK(restarter.insertErrorInNode(node1, 1000) == 0); + int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; + CHECK(restarter.dumpStateOneNode(node1, val2, 2) == 0); + CHECK(restarter.startNodes(&node1, 1) == 0); + restarter.waitNodesStartPhase(&node1, 1, 3, 120); + CHECK(restarter.waitNodesNoStart(&node1, 1) == 0); + + CHECK(restarter.restartOneDbNode(node2, true, true, true) == 0); + CHECK(restarter.waitNodesNoStart(&node2, 1) == 0); + CHECK(restarter.startNodes(&node1, 1) == 0); + CHECK(restarter.waitNodesStarted(&node1, 1) == 0); + CHECK(restarter.startNodes(&node2, 1) == 0); + CHECK(restarter.waitClusterStarted() == 0); + + } while(0); + + g_info << "Bug21536 finished" << endl; + + return result; +} NDBT_TESTSUITE(testSystemRestart); TESTCASE("SR1", @@ -1287,6 +1327,13 @@ TESTCASE("Bug18385", STEP(runBug18385); FINALIZER(runClearTable); } +TESTCASE("Bug21536", + "Perform partition system restart with other nodes with higher GCI"){ + INITIALIZER(runWaitStarted); + INITIALIZER(runClearTable); + STEP(runBug21536); + FINALIZER(runClearTable); +} NDBT_TESTSUITE_END(testSystemRestart); int main(int argc, const char** argv){ diff --git a/storage/ndb/test/run-test/daily-basic-tests.txt b/storage/ndb/test/run-test/daily-basic-tests.txt index f58a6f050d5..ee669ba5f1c 100644 --- a/storage/ndb/test/run-test/daily-basic-tests.txt +++ b/storage/ndb/test/run-test/daily-basic-tests.txt @@ -489,6 +489,14 @@ max-time: 1000 cmd: testNodeRestart args: -n Bug20185 T1 +max-time: 1000 +cmd: testNodeRestart +args: -n Bug21271 T6 + +max-time: 1000 +cmd: testIndex +args: -n Bug21384 + # # DICT TESTS max-time: 1500 diff --git a/storage/ndb/test/src/HugoTransactions.cpp b/storage/ndb/test/src/HugoTransactions.cpp index 74aab2aa55a..525f50f5231 100644 --- a/storage/ndb/test/src/HugoTransactions.cpp +++ b/storage/ndb/test/src/HugoTransactions.cpp @@ -519,7 +519,8 @@ HugoTransactions::loadTable(Ndb* pNdb, bool allowConstraintViolation, int doSleep, bool oneTrans, - int value){ + int value, + bool abort){ int check, a; int retryAttempt = 0; int retryMax = 5; @@ -585,10 +586,22 @@ HugoTransactions::loadTable(Ndb* pNdb, if (!oneTrans || (c + batch) >= records) { // closeTrans = true; closeTrans = false; - check = pTrans->execute( Commit ); - if(check != -1) - m_latest_gci = pTrans->getGCI(); - pTrans->restart(); + if (!abort) + { + check = pTrans->execute( Commit ); + if(check != -1) + m_latest_gci = pTrans->getGCI(); + pTrans->restart(); + } + else + { + check = pTrans->execute( NoCommit ); + if (check != -1) + { + check = pTrans->execute( Rollback ); + closeTransaction(pNdb); + } + } } else { closeTrans = false; check = pTrans->execute( NoCommit ); diff --git a/storage/ndb/test/src/NDBT_Tables.cpp b/storage/ndb/test/src/NDBT_Tables.cpp index 71da6cfc2d9..0f0fcf6ab36 100644 --- a/storage/ndb/test/src/NDBT_Tables.cpp +++ b/storage/ndb/test/src/NDBT_Tables.cpp @@ -829,6 +829,17 @@ NDBT_Tables::getNumTables(){ return numTestTables; } +const char** +NDBT_Tables::getIndexes(const char* table) +{ + Uint32 i = 0; + for (i = 0; indexes[i].m_table != 0; i++) { + if (strcmp(indexes[i].m_table, table) == 0) + return indexes[i].m_indexes; + } + return 0; +} + int NDBT_Tables::createAllTables(Ndb* pNdb, bool _temp, bool existsOk){ @@ -974,7 +985,7 @@ NDBT_Tables::createTable(Ndb* pNdb, const char* _name, bool _temp, loop: r = pNdb->getDictionary()->createTable(tmpTab); if(r == -1){ - if(pNdb->getDictionary()->getNdbError().code == 723) + if(pNdb->getDictionary()->getNdbError().code == 755) { if (create_default_tablespace(pNdb) == 0) { diff --git a/storage/ndb/test/src/NdbRestarter.cpp b/storage/ndb/test/src/NdbRestarter.cpp index b25c42ec18e..c12afd8e613 100644 --- a/storage/ndb/test/src/NdbRestarter.cpp +++ b/storage/ndb/test/src/NdbRestarter.cpp @@ -249,7 +249,7 @@ NdbRestarter::waitClusterState(ndb_mgm_node_status _status, int -NdbRestarter::waitNodesState(int * _nodes, int _num_nodes, +NdbRestarter::waitNodesState(const int * _nodes, int _num_nodes, ndb_mgm_node_status _status, unsigned int _timeout, int _startphase){ @@ -367,20 +367,20 @@ NdbRestarter::waitNodesState(int * _nodes, int _num_nodes, return 0; } -int NdbRestarter::waitNodesStarted(int * _nodes, int _num_nodes, +int NdbRestarter::waitNodesStarted(const int * _nodes, int _num_nodes, unsigned int _timeout){ return waitNodesState(_nodes, _num_nodes, - NDB_MGM_NODE_STATUS_STARTED, _timeout); + NDB_MGM_NODE_STATUS_STARTED, _timeout); } -int NdbRestarter::waitNodesStartPhase(int * _nodes, int _num_nodes, +int NdbRestarter::waitNodesStartPhase(const int * _nodes, int _num_nodes, int _startphase, unsigned int _timeout){ return waitNodesState(_nodes, _num_nodes, NDB_MGM_NODE_STATUS_STARTING, _timeout, _startphase); } -int NdbRestarter::waitNodesNoStart(int * _nodes, int _num_nodes, +int NdbRestarter::waitNodesNoStart(const int * _nodes, int _num_nodes, unsigned int _timeout){ return waitNodesState(_nodes, _num_nodes, NDB_MGM_NODE_STATUS_NOT_STARTED, _timeout); @@ -549,7 +549,7 @@ int NdbRestarter::startAll(){ } -int NdbRestarter::startNodes(int * nodes, int num_nodes){ +int NdbRestarter::startNodes(const int * nodes, int num_nodes){ if (!isConnected()) return -1; @@ -599,7 +599,7 @@ int NdbRestarter::insertErrorInAllNodes(int _error){ -int NdbRestarter::dumpStateOneNode(int _nodeId, int * _args, int _num_args){ +int NdbRestarter::dumpStateOneNode(int _nodeId, const int * _args, int _num_args){ if (!isConnected()) return -1; @@ -617,7 +617,7 @@ int NdbRestarter::dumpStateOneNode(int _nodeId, int * _args, int _num_args){ return reply.return_code; } -int NdbRestarter::dumpStateAllNodes(int * _args, int _num_args){ +int NdbRestarter::dumpStateAllNodes(const int * _args, int _num_args){ if (!isConnected()) return -1; diff --git a/storage/ndb/test/tools/hugoLoad.cpp b/storage/ndb/test/tools/hugoLoad.cpp index 49c489e5db5..870ce8b0289 100644 --- a/storage/ndb/test/tools/hugoLoad.cpp +++ b/storage/ndb/test/tools/hugoLoad.cpp @@ -31,6 +31,8 @@ int main(int argc, const char** argv){ int _batch = 512; int _loops = -1; int _rand = 0; + int _onetrans = 0; + int _abort = 0; const char* db = 0; struct getargs args[] = { @@ -39,7 +41,9 @@ int main(int argc, const char** argv){ { "loops", 'l', arg_integer, &_loops, "Number of loops", "" }, { "database", 'd', arg_string, &db, "Database", "" }, { "usage", '?', arg_flag, &_help, "Print help", "" }, - { "rnd-rows", 0, arg_flag, &_rand, "Rand number of records", "recs" } + { "rnd-rows", 0, arg_flag, &_rand, "Rand number of records", "recs" }, + { "one-trans", 0, arg_flag, &_onetrans, "Insert as 1 trans", "" }, + { "abort", 0, arg_integer, &_abort, "Abort probability", "" } }; int num_args = sizeof(args) / sizeof(args[0]); int optind = 0; @@ -92,10 +96,13 @@ int main(int argc, const char** argv){ HugoTransactions hugoTrans(*pTab); loop: int rows = (_rand ? rand() % _records : _records); + int abort = (rand() % 100) < _abort ? 1 : 0; + if (abort) + ndbout << "load+abort" << endl; if (hugoTrans.loadTable(&MyNdb, rows, _batch, - true, 0, false, _loops) != 0){ + true, 0, _onetrans, _loops, abort) != 0){ return NDBT_ProgramExit(NDBT_FAILED); } |