diff options
Diffstat (limited to 'sql')
130 files changed, 30540 insertions, 7870 deletions
diff --git a/sql/Makefile.am b/sql/Makefile.am index 1437751bf2f..e2cf53c13aa 100644 --- a/sql/Makefile.am +++ b/sql/Makefile.am @@ -19,8 +19,8 @@ MYSQLDATAdir = $(localstatedir) MYSQLSHAREdir = $(pkgdatadir) MYSQLBASEdir= $(prefix) +MYSQLLIBdir= $(pkglibdir) INCLUDES = @ZLIB_INCLUDES@ \ - @bdb_includes@ @innodb_includes@ @ndbcluster_includes@ \ -I$(top_builddir)/include -I$(top_srcdir)/include \ -I$(top_srcdir)/regex -I$(srcdir) $(yassl_includes) \ $(openssl_includes) @@ -30,48 +30,46 @@ libexec_PROGRAMS = mysqld noinst_PROGRAMS = gen_lex_hash bin_PROGRAMS = mysql_tzinfo_to_sql gen_lex_hash_LDFLAGS = @NOINST_LDFLAGS@ -LDADD = $(top_builddir)/myisam/libmyisam.a \ - $(top_builddir)/myisammrg/libmyisammrg.a \ - $(top_builddir)/heap/libheap.a \ +LDADD = $(top_builddir)/storage/myisam/libmyisam.a \ + $(top_builddir)/storage/myisammrg/libmyisammrg.a \ + $(top_builddir)/storage/heap/libheap.a \ $(top_builddir)/vio/libvio.a \ $(top_builddir)/mysys/libmysys.a \ $(top_builddir)/dbug/libdbug.a \ $(top_builddir)/regex/libregex.a \ - $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ @NDB_SCI_LIBS@ + $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ mysqld_LDADD = @MYSQLD_EXTRA_LDFLAGS@ \ - @bdb_libs@ @innodb_libs@ @pstack_libs@ \ - @innodb_system_libs@ \ - @ndbcluster_libs@ @ndbcluster_system_libs@ \ + @pstack_libs@ \ + @mysql_se_objs@ @mysql_se_libs@ \ $(LDADD) $(CXXLDFLAGS) $(WRAPLIBS) @LIBDL@ \ @yassl_libs@ @openssl_libs@ noinst_HEADERS = item.h item_func.h item_sum.h item_cmpfunc.h \ item_strfunc.h item_timefunc.h item_uniq.h \ + item_xmlfunc.h \ item_create.h item_subselect.h item_row.h \ mysql_priv.h item_geofunc.h sql_bitmap.h \ procedure.h sql_class.h sql_lex.h sql_list.h \ sql_manager.h sql_map.h sql_string.h unireg.h \ sql_error.h field.h handler.h mysqld_suffix.h \ - ha_myisammrg.h\ - ha_heap.h ha_myisam.h ha_berkeley.h ha_innodb.h \ - ha_ndbcluster.h opt_range.h protocol.h \ + ha_heap.h ha_myisam.h ha_myisammrg.h ha_partition.h \ + opt_range.h protocol.h rpl_tblmap.h \ + log.h sql_show.h rpl_rli.h \ sql_select.h structs.h table.h sql_udf.h hash_filo.h\ lex.h lex_symbol.h sql_acl.h sql_crypt.h \ - log_event.h sql_repl.h slave.h \ + log_event.h sql_repl.h slave.h rpl_filter.h \ stacktrace.h sql_sort.h sql_cache.h set_var.h \ spatial.h gstream.h client_settings.h tzfile.h \ tztime.h my_decimal.h\ sp_head.h sp_pcontext.h sp_rcontext.h sp.h sp_cache.h \ parse_file.h sql_view.h sql_trigger.h \ sql_array.h sql_cursor.h \ - examples/ha_example.h ha_archive.h \ - examples/ha_tina.h ha_blackhole.h \ - ha_federated.h -mysqld_SOURCES = sql_lex.cc sql_handler.cc \ + sql_plugin.h authors.h +mysqld_SOURCES = sql_lex.cc sql_handler.cc sql_partition.cc \ item.cc item_sum.cc item_buff.cc item_func.cc \ item_cmpfunc.cc item_strfunc.cc item_timefunc.cc \ thr_malloc.cc item_create.cc item_subselect.cc \ - item_row.cc item_geofunc.cc \ + item_row.cc item_geofunc.cc item_xmlfunc.cc \ field.cc strfunc.cc key.cc sql_class.cc sql_list.cc \ net_serv.cc protocol.cc sql_state.c \ lock.cc my_lock.c \ @@ -86,13 +84,12 @@ mysqld_SOURCES = sql_lex.cc sql_handler.cc \ unireg.cc des_key_file.cc \ discover.cc time.cc opt_range.cc opt_sum.cc \ records.cc filesort.cc handler.cc \ - ha_heap.cc ha_myisam.cc ha_myisammrg.cc \ - ha_berkeley.cc ha_innodb.cc \ - ha_ndbcluster.cc \ + ha_heap.cc ha_myisam.cc ha_myisammrg.cc \ sql_db.cc sql_table.cc sql_rename.cc sql_crypt.cc \ sql_load.cc mf_iocache.cc field_conv.cc sql_show.cc \ sql_udf.cc sql_analyse.cc sql_analyse.h sql_cache.cc \ - slave.cc sql_repl.cc sql_union.cc sql_derived.cc \ + slave.cc sql_repl.cc rpl_filter.cc rpl_tblmap.cc \ + sql_union.cc sql_derived.cc \ client.c sql_client.cc mini_client_errors.c pack.c\ stacktrace.c repl_failsafe.h repl_failsafe.cc \ sql_olap.cc sql_view.cc \ @@ -100,10 +97,14 @@ mysqld_SOURCES = sql_lex.cc sql_handler.cc \ tztime.cc my_time.c my_decimal.cc\ sp_head.cc sp_pcontext.cc sp_rcontext.cc sp.cc \ sp_cache.cc parse_file.cc sql_trigger.cc \ - examples/ha_example.cc ha_archive.cc \ - examples/ha_tina.cc ha_blackhole.cc \ - ha_federated.cc - + sql_plugin.cc sql_binlog.cc \ + handlerton.cc +EXTRA_mysqld_SOURCES = ha_innodb.cc ha_berkeley.cc ha_archive.cc \ + ha_innodb.h ha_berkeley.h ha_archive.h \ + ha_blackhole.cc ha_federated.cc ha_ndbcluster.cc \ + ha_blackhole.h ha_federated.h ha_ndbcluster.h \ + ha_partition.cc ha_partition.h +mysqld_DEPENDENCIES = @mysql_se_objs@ gen_lex_hash_SOURCES = gen_lex_hash.cc gen_lex_hash_LDADD = $(LDADD) $(CXXLDFLAGS) mysql_tzinfo_to_sql_SOURCES = mysql_tzinfo_to_sql.cc @@ -113,10 +114,11 @@ DEFS = -DMYSQL_SERVER \ -DDEFAULT_MYSQL_HOME="\"$(MYSQLBASEdir)\"" \ -DDATADIR="\"$(MYSQLDATAdir)\"" \ -DSHAREDIR="\"$(MYSQLSHAREdir)\"" \ + -DLIBDIR="\"$(MYSQLLIBdir)\"" \ @DEFS@ BUILT_SOURCES = sql_yacc.cc sql_yacc.h lex_hash.h -EXTRA_DIST = udf_example.cc $(BUILT_SOURCES) +EXTRA_DIST = udf_example.cc handlerton-win.cc $(BUILT_SOURCES) DISTCLEANFILES = lex_hash.h AM_YFLAGS = -d @@ -151,6 +153,16 @@ sql_yacc.o: sql_yacc.cc sql_yacc.h $(HEADERS) lex_hash.h: gen_lex_hash$(EXEEXT) ./gen_lex_hash$(EXEEXT) > $@ +ha_berkeley.o: ha_berkeley.cc ha_berkeley.h + $(CXXCOMPILE) @bdb_includes@ $(LM_CFLAGS) -c $< + +ha_ndbcluster.o:ha_ndbcluster.cc ha_ndbcluster.h + $(CXXCOMPILE) @ndbcluster_includes@ $(LM_CFLAGS) -c $< + +#Until we can get rid of dependencies on ha_ndbcluster.h +handler.o: handler.cc ha_ndbcluster.h + $(CXXCOMPILE) @ndbcluster_includes@ $(CXXFLAGS) -c $< + # For testing of udf_example.so; Works on platforms with gcc # (This is not part of our build process but only provided as an example) udf_example.so: udf_example.cc diff --git a/sql/authors.h b/sql/authors.h new file mode 100644 index 00000000000..fde1806f4be --- /dev/null +++ b/sql/authors.h @@ -0,0 +1,146 @@ +/* Copyright (C) 2005 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Structure of the name list */ + +struct show_table_authors_st { + const char *name; + const char *location; + const char *comment; +}; + +/* + Output from "SHOW AUTHORS" + + If you can update it, you get to be in it :) + + Don't be offended if your name is not in here, just add it! + + IMPORTANT: Names should be added in alphabetical order (by last name). + + Names should be encoded using UTF-8. +*/ + +struct show_table_authors_st show_table_authors[]= { + { "Brian (Krow) Aker", "Seattle, WA, USA", + "Architecture, archive, federated, bunch of little stuff :)" }, + { "Venu Anuganti", "", "Client/server protocol (4.1)" }, + { "David Axmark", "Uppsala, Sweden", + "Small stuff long time ago, Monty ripped it out!" }, + { "Alexander (Bar) Barkov", "Izhevsk, Russia", + "Unicode and character sets (4.1)" }, + { "Omer BarNir", "Sunnyvale, CA, USA", + "Testing (sometimes) and general QA stuff" }, + { "Guilhem Bichot", "Bordeaux, France", "Replication (since 4.0)" }, + { "John Birrell", "", "Emulation of pthread_mutex() for OS/2" }, + { "Andreas F. Bobak", "", "AGGREGATE extension to user-defined functions" }, + { "Alexey Botchkov (Holyfoot)", "Izhevsk, Russia", + "GIS extensions (4.1), embedded server (4.1), precision math (5.0)"}, + { "Reggie Burnett", "Nashville, TN, USA", "Windows development, Connectors" }, + { "Oleksandr Byelkin", "Lugansk, Ukraine", + "Query Cache (4.0), Subqueries (4.1), Views (5.0)" }, + { "Kent Boortz", "Orebro, Sweden", "Test platform, and general build stuff" }, + { "Tim Bunce", "", "mysqlhotcopy" }, + { "Yves Carlier", "", "mysqlaccess" }, + { "Joshua Chamas", "Cupertino, CA, USA", + "Concurrent insert, extended date syntax" }, + { "Petr Chardin", "Moscow, Russia", "Instance Manager (5.0)" }, + { "Wei-Jou Chen", "", "Chinese (Big5) character set" }, + { "Albert Chin-A-Young", "", + "Tru64 port, large file support, better TCP wrappers support" }, + { "Jorge del Conde", "Mexico City, Mexico", "Windows development" }, + { "Antony T. Curtis", "Norwalk, CA, USA", + "Parser, port to OS/2, storage engines and some random stuff" }, + { "Yuri Dario", "", "OS/2 port" }, + { "Sergei Golubchik", "Kerpen, Germany", + "Full-text search, precision math" }, + { "Lenz Grimmer", "Hamburg, Germany", + "Production (build and release) engineering" }, + { "Nikolay Grishakin", "Austin, TX, USA", "Testing - Server" }, + { "Wei He", "", "Chinese (GBK) character set" }, + { "Eric Herman", "Amsterdam, Netherlands", "Bug fixing - federated" }, + { "Alexander (Alexi) Ivanov", "St. Petersburg, Russia", "Replication" }, + { "Alexander (Salle) Keremidarski", "Sofia, Bulgaria", + "Bug fixing" }, + { "Mats Kindahl", "Storvreta, Sweden", "Replication" }, + { "Serge Kozlov", "Velikie Luki, Russia", "Testing - Cluster" }, + { "Greg (Groggy) Lehey", "Uchunga, SA, Australia", "Backup" }, + { "Matthias Leich", "Berlin, Germany", "Testing - Server" }, + { "Dmitri Lenev", "Moscow, Russia", + "Time zones support (4.1), Triggers (5.0)" }, + { "Marc Liyanage", "", "Created Mac OS X packages" }, + { "Zarko Mocnik", "", "Sorting for Slovenian language" }, + { "Per-Erik Martin", "Uppsala, Sweden", "Stored Procedures (5.0)" }, + { "Alexis Mikhailov", "", "User-defined functions" }, + { "Sinisa Milivojevic", "Larnaca, Cyprus", + "UNION (4.0), Subqueries in FROM clause (4.1), many other features" }, + { "Jonathan (Jeb) Miller", "Kyle, TX, USA", + "Testing - Cluster, Replication" }, + { "Elliot Murphy", "Cocoa, FL, USA", "Replication and backup" }, + { "Kristian Nielsen", "Copenhagen, Denmark", + "General build stuff" }, + { "Pekka Nouisiainen", "Stockholm, Sweden", + "NDB Cluster: BLOB support, character set support, ordered indexes" }, + { "Alexander Nozdrin", "Moscow, Russia", + "Bug fixing (Stored Procedures, 5.0)" }, + { "Per Eric Olsson", "", "Testing of dynamic record format" }, + { "Jonas Oreland", "Stockholm, Sweden", + "NDB Cluster, Online Backup, lots of other things" }, + { "Konstantin Osipov", "Moscow, Russia", + "Prepared statements (4.1), Cursors (5.0)" }, + { "Alexander (Sasha) Pachev", "Provo, UT, USA", + "Statement-based replication, SHOW CREATE TABLE, mysql-bench" }, + { "Irena Pancirov", "", "Port to Windows with Borland compiler" }, + { "Jan Pazdziora", "", "Czech sorting order" }, + { "Benjamin Pflugmann", "", + "Extended MERGE storage engine to handle INSERT" }, + { "Igor Romanenko", "", + "mysqldump" }, + { "Mikael Ronström", "Stockholm, Sweden", + "NDB Cluster, Partitioning (5.1), Optimizations" }, + { "Tõnu Samuel", "", + "VIO interface, other miscellaneous features" }, + { "Carsten Segieth (Pino)", "Fredersdorf, Germany", "Testing - Server"}, + { "Martin Sköld", "Stockholm, Sweden", + "NDB Cluster: Unique indexes, integration into MySQL" }, + { "Timothy Smith", "Auckland, New Zealand", + "Dynamic character sets, parts of the build system, libmysqld"}, + { "Miguel Solorzano", "Florianopolis, Santa Catarina, Brazil", + "Windows development, Windows NT service"}, + { "Punita Srivastava", "Austin, TX, USA", "Testing - Merlin"}, + { "Alexey Stroganov (Ranger)", "Lugansk, Ukraine", "Testing - Benchmarks"}, + { "Ingo Strüwing", "Berlin, Germany", "Bug fixing" }, + { "Magnus Svensson", "Öregrund, Sweden", + "NDB Cluster: Integration into MySQL, test framework" }, + { "Zeev Suraski", "", "FROM_UNIXTIME(), ENCRYPT()" }, + { "TAMITO", "", + "The _MB character set macros and UJIS and SJIS character sets" }, + { "Jani Tolonen", "Helsinki, Finland", + "mysqlimport, extensions to command-line clients, PROCEDURE ANALYSE()" }, + { "Lars Thalmann", "Stockholm, Sweden", + "Replication and cluster development" }, + { "Tomas Ulin", "Stockholm, Sweden", + "NDB Cluster: Configuration, installation" }, + { "Gianmassimo Vigazzola", "", "Initial Windows port" }, + { "Sergey Vojtovich", "Izhevsk, Russia", "Plugins infrastructure (5.1)" }, + { "Matt Wagner", "Northfield, MN, USA", "Bug fixing" }, + { "Jim Winstead Jr.", "Los Angeles, CA, USA", "Bug fixing" }, + { "Michael (Monty) Widenius", "Tusby, Finland", + "Lead developer and main author" }, + { "Peter Zaitsev", "Tacoma, WA, USA", + "SHA1(), AES_ENCRYPT(), AES_DECRYPT(), bug fixing" }, + {NULL, NULL, NULL} +}; diff --git a/sql/discover.cc b/sql/discover.cc index 1251055c70e..2a3da55f154 100644 --- a/sql/discover.cc +++ b/sql/discover.cc @@ -55,7 +55,8 @@ int readfrm(const char *name, *frmdata= NULL; // In case of errors *len= 0; error= 1; - if ((file=my_open(fn_format(index_file,name,"",reg_ext,4), + if ((file=my_open(fn_format(index_file,name,"",reg_ext, + MY_UNPACK_FILENAME|MY_APPEND_EXT), O_RDONLY | O_SHARE, MYF(0))) < 0) goto err_end; @@ -112,7 +113,8 @@ int writefrm(const char *name, const void *frmdata, uint len) //DBUG_DUMP("frmdata", (char*)frmdata, len); error= 0; - if ((file=my_create(fn_format(index_file,name,"",reg_ext,4), + if ((file=my_create(fn_format(index_file,name,"",reg_ext, + MY_UNPACK_FILENAME|MY_APPEND_EXT), CREATE_MODE,O_RDWR | O_TRUNC,MYF(MY_WME))) >= 0) { if (my_write(file,(byte*)frmdata,len,MYF(MY_WME | MY_NABP))) diff --git a/sql/examples/ha_example.cc b/sql/examples/ha_example.cc deleted file mode 100644 index 471ece77490..00000000000 --- a/sql/examples/ha_example.cc +++ /dev/null @@ -1,701 +0,0 @@ -/* Copyright (C) 2003 MySQL AB - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - -/* - ha_example is a stubbed storage engine. It does nothing at this point. It - will let you create/open/delete tables but that is all. You can enable it - in your buld by doing the following during your build process: - ./configure --with-example-storage-engine - - Once this is done mysql will let you create tables with: - CREATE TABLE A (...) ENGINE=EXAMPLE; - - The example is setup to use table locks. It implements an example "SHARE" - that is inserted into a hash by table name. You can use this to store - information of state that any example handler object will be able to see - if it is using the same table. - - Please read the object definition in ha_example.h before reading the rest - if this file. - - To get an idea of what occurs here is an example select that would do a - scan of an entire table: - ha_example::store_lock - ha_example::external_lock - ha_example::info - ha_example::rnd_init - ha_example::extra - ENUM HA_EXTRA_CACHE Cash record in HA_rrnd() - ha_example::rnd_next - ha_example::rnd_next - ha_example::rnd_next - ha_example::rnd_next - ha_example::rnd_next - ha_example::rnd_next - ha_example::rnd_next - ha_example::rnd_next - ha_example::rnd_next - ha_example::extra - ENUM HA_EXTRA_NO_CACHE End cacheing of records (def) - ha_example::external_lock - ha_example::extra - ENUM HA_EXTRA_RESET Reset database to after open - - In the above example has 9 row called before rnd_next signalled that it was - at the end of its data. In the above example the table was already opened - (or you would have seen a call to ha_example::open(). Calls to - ha_example::extra() are hints as to what will be occuring to the request. - - Happy coding! - -Brian -*/ - -#ifdef USE_PRAGMA_IMPLEMENTATION -#pragma implementation // gcc: Class implementation -#endif - -#include "../mysql_priv.h" - -#ifdef HAVE_EXAMPLE_DB -#include "ha_example.h" - - -handlerton example_hton= { - "EXAMPLE", - SHOW_OPTION_YES, - "Example storage engine", - DB_TYPE_EXAMPLE_DB, - NULL, /* We do need to write one! */ - 0, /* slot */ - 0, /* savepoint size. */ - NULL, /* close_connection */ - NULL, /* savepoint */ - NULL, /* rollback to savepoint */ - NULL, /* release savepoint */ - NULL, /* commit */ - NULL, /* rollback */ - NULL, /* prepare */ - NULL, /* recover */ - NULL, /* commit_by_xid */ - NULL, /* rollback_by_xid */ - NULL, /* create_cursor_read_view */ - NULL, /* set_cursor_read_view */ - NULL, /* close_cursor_read_view */ - HTON_CAN_RECREATE -}; - -/* Variables for example share methods */ -static HASH example_open_tables; // Hash used to track open tables -pthread_mutex_t example_mutex; // This is the mutex we use to init the hash -static int example_init= 0; // Variable for checking the init state of hash - - -/* - Function we use in the creation of our hash to get key. -*/ -static byte* example_get_key(EXAMPLE_SHARE *share,uint *length, - my_bool not_used __attribute__((unused))) -{ - *length=share->table_name_length; - return (byte*) share->table_name; -} - - -/* - Example of simple lock controls. The "share" it creates is structure we will - pass to each example handler. Do you have to have one of these? Well, you have - pieces that are used for locking, and they are needed to function. -*/ -static EXAMPLE_SHARE *get_share(const char *table_name, TABLE *table) -{ - EXAMPLE_SHARE *share; - uint length; - char *tmp_name; - - /* - So why does this exist? There is no way currently to init a storage engine. - Innodb and BDB both have modifications to the server to allow them to - do this. Since you will not want to do this, this is probably the next - best method. - */ - if (!example_init) - { - /* Hijack a mutex for init'ing the storage engine */ - pthread_mutex_lock(&LOCK_mysql_create_db); - if (!example_init) - { - example_init++; - VOID(pthread_mutex_init(&example_mutex,MY_MUTEX_INIT_FAST)); - (void) hash_init(&example_open_tables,system_charset_info,32,0,0, - (hash_get_key) example_get_key,0,0); - } - pthread_mutex_unlock(&LOCK_mysql_create_db); - } - pthread_mutex_lock(&example_mutex); - length=(uint) strlen(table_name); - - if (!(share=(EXAMPLE_SHARE*) hash_search(&example_open_tables, - (byte*) table_name, - length))) - { - if (!(share=(EXAMPLE_SHARE *) - my_multi_malloc(MYF(MY_WME | MY_ZEROFILL), - &share, sizeof(*share), - &tmp_name, length+1, - NullS))) - { - pthread_mutex_unlock(&example_mutex); - return NULL; - } - - share->use_count=0; - share->table_name_length=length; - share->table_name=tmp_name; - strmov(share->table_name,table_name); - if (my_hash_insert(&example_open_tables, (byte*) share)) - goto error; - thr_lock_init(&share->lock); - pthread_mutex_init(&share->mutex,MY_MUTEX_INIT_FAST); - } - share->use_count++; - pthread_mutex_unlock(&example_mutex); - - return share; - -error: - pthread_mutex_destroy(&share->mutex); - pthread_mutex_unlock(&example_mutex); - my_free((gptr) share, MYF(0)); - - return NULL; -} - - -/* - Free lock controls. We call this whenever we close a table. If the table had - the last reference to the share then we free memory associated with it. -*/ -static int free_share(EXAMPLE_SHARE *share) -{ - pthread_mutex_lock(&example_mutex); - if (!--share->use_count) - { - hash_delete(&example_open_tables, (byte*) share); - thr_lock_delete(&share->lock); - pthread_mutex_destroy(&share->mutex); - my_free((gptr) share, MYF(0)); - } - pthread_mutex_unlock(&example_mutex); - - return 0; -} - - -ha_example::ha_example(TABLE *table_arg) - :handler(&example_hton, table_arg) -{} - -/* - If frm_error() is called then we will use this to to find out what file extentions - exist for the storage engine. This is also used by the default rename_table and - delete_table method in handler.cc. -*/ -static const char *ha_example_exts[] = { - NullS -}; - -const char **ha_example::bas_ext() const -{ - return ha_example_exts; -} - - -/* - Used for opening tables. The name will be the name of the file. - A table is opened when it needs to be opened. For instance - when a request comes in for a select on the table (tables are not - open and closed for each request, they are cached). - - Called from handler.cc by handler::ha_open(). The server opens all tables by - calling ha_open() which then calls the handler specific open(). -*/ -int ha_example::open(const char *name, int mode, uint test_if_locked) -{ - DBUG_ENTER("ha_example::open"); - - if (!(share = get_share(name, table))) - DBUG_RETURN(1); - thr_lock_data_init(&share->lock,&lock,NULL); - - DBUG_RETURN(0); -} - - -/* - Closes a table. We call the free_share() function to free any resources - that we have allocated in the "shared" structure. - - Called from sql_base.cc, sql_select.cc, and table.cc. - In sql_select.cc it is only used to close up temporary tables or during - the process where a temporary table is converted over to being a - myisam table. - For sql_base.cc look at close_data_tables(). -*/ -int ha_example::close(void) -{ - DBUG_ENTER("ha_example::close"); - DBUG_RETURN(free_share(share)); -} - - -/* - write_row() inserts a row. No extra() hint is given currently if a bulk load - is happeneding. buf() is a byte array of data. You can use the field - information to extract the data from the native byte array type. - Example of this would be: - for (Field **field=table->field ; *field ; field++) - { - ... - } - - See ha_tina.cc for an example of extracting all of the data as strings. - ha_berekly.cc has an example of how to store it intact by "packing" it - for ha_berkeley's own native storage type. - - See the note for update_row() on auto_increments and timestamps. This - case also applied to write_row(). - - Called from item_sum.cc, item_sum.cc, sql_acl.cc, sql_insert.cc, - sql_insert.cc, sql_select.cc, sql_table.cc, sql_udf.cc, and sql_update.cc. -*/ -int ha_example::write_row(byte * buf) -{ - DBUG_ENTER("ha_example::write_row"); - DBUG_RETURN(HA_ERR_WRONG_COMMAND); -} - - -/* - Yes, update_row() does what you expect, it updates a row. old_data will have - the previous row record in it, while new_data will have the newest data in - it. - Keep in mind that the server can do updates based on ordering if an ORDER BY - clause was used. Consecutive ordering is not guarenteed. - Currently new_data will not have an updated auto_increament record, or - and updated timestamp field. You can do these for example by doing these: - if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE) - table->timestamp_field->set_time(); - if (table->next_number_field && record == table->record[0]) - update_auto_increment(); - - Called from sql_select.cc, sql_acl.cc, sql_update.cc, and sql_insert.cc. -*/ -int ha_example::update_row(const byte * old_data, byte * new_data) -{ - - DBUG_ENTER("ha_example::update_row"); - DBUG_RETURN(HA_ERR_WRONG_COMMAND); -} - - -/* - This will delete a row. buf will contain a copy of the row to be deleted. - The server will call this right after the current row has been called (from - either a previous rnd_nexT() or index call). - If you keep a pointer to the last row or can access a primary key it will - make doing the deletion quite a bit easier. - Keep in mind that the server does no guarentee consecutive deletions. ORDER BY - clauses can be used. - - Called in sql_acl.cc and sql_udf.cc to manage internal table information. - Called in sql_delete.cc, sql_insert.cc, and sql_select.cc. In sql_select it is - used for removing duplicates while in insert it is used for REPLACE calls. -*/ -int ha_example::delete_row(const byte * buf) -{ - DBUG_ENTER("ha_example::delete_row"); - DBUG_RETURN(HA_ERR_WRONG_COMMAND); -} - - -/* - Positions an index cursor to the index specified in the handle. Fetches the - row if available. If the key value is null, begin at the first key of the - index. -*/ -int ha_example::index_read(byte * buf, const byte * key, - uint key_len __attribute__((unused)), - enum ha_rkey_function find_flag - __attribute__((unused))) -{ - DBUG_ENTER("ha_example::index_read"); - DBUG_RETURN(HA_ERR_WRONG_COMMAND); -} - - -/* - Positions an index cursor to the index specified in key. Fetches the - row if any. This is only used to read whole keys. -*/ -int ha_example::index_read_idx(byte * buf, uint index, const byte * key, - uint key_len __attribute__((unused)), - enum ha_rkey_function find_flag - __attribute__((unused))) -{ - DBUG_ENTER("ha_example::index_read_idx"); - DBUG_RETURN(HA_ERR_WRONG_COMMAND); -} - - -/* - Used to read forward through the index. -*/ -int ha_example::index_next(byte * buf) -{ - DBUG_ENTER("ha_example::index_next"); - DBUG_RETURN(HA_ERR_WRONG_COMMAND); -} - - -/* - Used to read backwards through the index. -*/ -int ha_example::index_prev(byte * buf) -{ - DBUG_ENTER("ha_example::index_prev"); - DBUG_RETURN(HA_ERR_WRONG_COMMAND); -} - - -/* - index_first() asks for the first key in the index. - - Called from opt_range.cc, opt_sum.cc, sql_handler.cc, - and sql_select.cc. -*/ -int ha_example::index_first(byte * buf) -{ - DBUG_ENTER("ha_example::index_first"); - DBUG_RETURN(HA_ERR_WRONG_COMMAND); -} - - -/* - index_last() asks for the last key in the index. - - Called from opt_range.cc, opt_sum.cc, sql_handler.cc, - and sql_select.cc. -*/ -int ha_example::index_last(byte * buf) -{ - DBUG_ENTER("ha_example::index_last"); - DBUG_RETURN(HA_ERR_WRONG_COMMAND); -} - - -/* - rnd_init() is called when the system wants the storage engine to do a table - scan. - See the example in the introduction at the top of this file to see when - rnd_init() is called. - - Called from filesort.cc, records.cc, sql_handler.cc, sql_select.cc, sql_table.cc, - and sql_update.cc. -*/ -int ha_example::rnd_init(bool scan) -{ - DBUG_ENTER("ha_example::rnd_init"); - DBUG_RETURN(HA_ERR_WRONG_COMMAND); -} - -int ha_example::rnd_end() -{ - DBUG_ENTER("ha_example::rnd_end"); - DBUG_RETURN(0); -} - -/* - This is called for each row of the table scan. When you run out of records - you should return HA_ERR_END_OF_FILE. Fill buff up with the row information. - The Field structure for the table is the key to getting data into buf - in a manner that will allow the server to understand it. - - Called from filesort.cc, records.cc, sql_handler.cc, sql_select.cc, sql_table.cc, - and sql_update.cc. -*/ -int ha_example::rnd_next(byte *buf) -{ - DBUG_ENTER("ha_example::rnd_next"); - DBUG_RETURN(HA_ERR_END_OF_FILE); -} - - -/* - position() is called after each call to rnd_next() if the data needs - to be ordered. You can do something like the following to store - the position: - my_store_ptr(ref, ref_length, current_position); - - The server uses ref to store data. ref_length in the above case is - the size needed to store current_position. ref is just a byte array - that the server will maintain. If you are using offsets to mark rows, then - current_position should be the offset. If it is a primary key like in - BDB, then it needs to be a primary key. - - Called from filesort.cc, sql_select.cc, sql_delete.cc and sql_update.cc. -*/ -void ha_example::position(const byte *record) -{ - DBUG_ENTER("ha_example::position"); - DBUG_VOID_RETURN; -} - - -/* - This is like rnd_next, but you are given a position to use - to determine the row. The position will be of the type that you stored in - ref. You can use ha_get_ptr(pos,ref_length) to retrieve whatever key - or position you saved when position() was called. - Called from filesort.cc records.cc sql_insert.cc sql_select.cc sql_update.cc. -*/ -int ha_example::rnd_pos(byte * buf, byte *pos) -{ - DBUG_ENTER("ha_example::rnd_pos"); - DBUG_RETURN(HA_ERR_WRONG_COMMAND); -} - - -/* - ::info() is used to return information to the optimizer. - see my_base.h for the complete description - - Currently this table handler doesn't implement most of the fields - really needed. SHOW also makes use of this data - Another note, you will probably want to have the following in your - code: - if (records < 2) - records = 2; - The reason is that the server will optimize for cases of only a single - record. If in a table scan you don't know the number of records - it will probably be better to set records to two so you can return - as many records as you need. - Along with records a few more variables you may wish to set are: - records - deleted - data_file_length - index_file_length - delete_length - check_time - Take a look at the public variables in handler.h for more information. - - Called in: - filesort.cc - ha_heap.cc - item_sum.cc - opt_sum.cc - sql_delete.cc - sql_delete.cc - sql_derived.cc - sql_select.cc - sql_select.cc - sql_select.cc - sql_select.cc - sql_select.cc - sql_show.cc - sql_show.cc - sql_show.cc - sql_show.cc - sql_table.cc - sql_union.cc - sql_update.cc - -*/ -void ha_example::info(uint flag) -{ - DBUG_ENTER("ha_example::info"); - DBUG_VOID_RETURN; -} - - -/* - extra() is called whenever the server wishes to send a hint to - the storage engine. The myisam engine implements the most hints. - ha_innodb.cc has the most exhaustive list of these hints. -*/ -int ha_example::extra(enum ha_extra_function operation) -{ - DBUG_ENTER("ha_example::extra"); - DBUG_RETURN(0); -} - - -/* - Deprecated and likely to be removed in the future. Storage engines normally - just make a call like: - ha_example::extra(HA_EXTRA_RESET); - to handle it. -*/ -int ha_example::reset(void) -{ - DBUG_ENTER("ha_example::reset"); - DBUG_RETURN(0); -} - - -/* - Used to delete all rows in a table. Both for cases of truncate and - for cases where the optimizer realizes that all rows will be - removed as a result of a SQL statement. - - Called from item_sum.cc by Item_func_group_concat::clear(), - Item_sum_count_distinct::clear(), and Item_func_group_concat::clear(). - Called from sql_delete.cc by mysql_delete(). - Called from sql_select.cc by JOIN::reinit(). - Called from sql_union.cc by st_select_lex_unit::exec(). -*/ -int ha_example::delete_all_rows() -{ - DBUG_ENTER("ha_example::delete_all_rows"); - DBUG_RETURN(HA_ERR_WRONG_COMMAND); -} - - -/* - First you should go read the section "locking functions for mysql" in - lock.cc to understand this. - This create a lock on the table. If you are implementing a storage engine - that can handle transacations look at ha_berkely.cc to see how you will - want to goo about doing this. Otherwise you should consider calling flock() - here. - - Called from lock.cc by lock_external() and unlock_external(). Also called - from sql_table.cc by copy_data_between_tables(). -*/ -int ha_example::external_lock(THD *thd, int lock_type) -{ - DBUG_ENTER("ha_example::external_lock"); - DBUG_RETURN(0); -} - - -/* - The idea with handler::store_lock() is the following: - - The statement decided which locks we should need for the table - for updates/deletes/inserts we get WRITE locks, for SELECT... we get - read locks. - - Before adding the lock into the table lock handler (see thr_lock.c) - mysqld calls store lock with the requested locks. Store lock can now - modify a write lock to a read lock (or some other lock), ignore the - lock (if we don't want to use MySQL table locks at all) or add locks - for many tables (like we do when we are using a MERGE handler). - - Berkeley DB for example changes all WRITE locks to TL_WRITE_ALLOW_WRITE - (which signals that we are doing WRITES, but we are still allowing other - reader's and writer's. - - When releasing locks, store_lock() are also called. In this case one - usually doesn't have to do anything. - - In some exceptional cases MySQL may send a request for a TL_IGNORE; - This means that we are requesting the same lock as last time and this - should also be ignored. (This may happen when someone does a flush - table when we have opened a part of the tables, in which case mysqld - closes and reopens the tables and tries to get the same locks at last - time). In the future we will probably try to remove this. - - Called from lock.cc by get_lock_data(). -*/ -THR_LOCK_DATA **ha_example::store_lock(THD *thd, - THR_LOCK_DATA **to, - enum thr_lock_type lock_type) -{ - if (lock_type != TL_IGNORE && lock.type == TL_UNLOCK) - lock.type=lock_type; - *to++= &lock; - return to; -} - -/* - Used to delete a table. By the time delete_table() has been called all - opened references to this table will have been closed (and your globally - shared references released. The variable name will just be the name of - the table. You will need to remove any files you have created at this point. - - If you do not implement this, the default delete_table() is called from - handler.cc and it will delete all files with the file extentions returned - by bas_ext(). - - Called from handler.cc by delete_table and ha_create_table(). Only used - during create if the table_flag HA_DROP_BEFORE_CREATE was specified for - the storage engine. -*/ -int ha_example::delete_table(const char *name) -{ - DBUG_ENTER("ha_example::delete_table"); - /* This is not implemented but we want someone to be able that it works. */ - DBUG_RETURN(0); -} - -/* - Renames a table from one name to another from alter table call. - - If you do not implement this, the default rename_table() is called from - handler.cc and it will delete all files with the file extentions returned - by bas_ext(). - - Called from sql_table.cc by mysql_rename_table(). -*/ -int ha_example::rename_table(const char * from, const char * to) -{ - DBUG_ENTER("ha_example::rename_table "); - DBUG_RETURN(HA_ERR_WRONG_COMMAND); -} - -/* - Given a starting key, and an ending key estimate the number of rows that - will exist between the two. end_key may be empty which in case determine - if start_key matches any rows. - - Called from opt_range.cc by check_quick_keys(). -*/ -ha_rows ha_example::records_in_range(uint inx, key_range *min_key, - key_range *max_key) -{ - DBUG_ENTER("ha_example::records_in_range"); - DBUG_RETURN(10); // low number to force index usage -} - - -/* - create() is called to create a database. The variable name will have the name - of the table. When create() is called you do not need to worry about opening - the table. Also, the FRM file will have already been created so adjusting - create_info will not do you any good. You can overwrite the frm file at this - point if you wish to change the table definition, but there are no methods - currently provided for doing that. - - Called from handle.cc by ha_create_table(). -*/ -int ha_example::create(const char *name, TABLE *table_arg, - HA_CREATE_INFO *create_info) -{ - DBUG_ENTER("ha_example::create"); - /* This is not implemented but we want someone to be able that it works. */ - DBUG_RETURN(0); -} -#endif /* HAVE_EXAMPLE_DB */ diff --git a/sql/examples/ha_example.h b/sql/examples/ha_example.h deleted file mode 100644 index 37f38fe5210..00000000000 --- a/sql/examples/ha_example.h +++ /dev/null @@ -1,154 +0,0 @@ -/* Copyright (C) 2003 MySQL AB - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - -/* - Please read ha_exmple.cc before reading this file. - Please keep in mind that the example storage engine implements all methods - that are required to be implemented. handler.h has a full list of methods - that you can implement. -*/ - -#ifdef USE_PRAGMA_INTERFACE -#pragma interface /* gcc class implementation */ -#endif - -/* - EXAMPLE_SHARE is a structure that will be shared amoung all open handlers - The example implements the minimum of what you will probably need. -*/ -typedef struct st_example_share { - char *table_name; - uint table_name_length,use_count; - pthread_mutex_t mutex; - THR_LOCK lock; -} EXAMPLE_SHARE; - -/* - Class definition for the storage engine -*/ -class ha_example: public handler -{ - THR_LOCK_DATA lock; /* MySQL lock */ - EXAMPLE_SHARE *share; /* Shared lock info */ - -public: - ha_example(TABLE *table_arg); - ~ha_example() - { - } - /* The name that will be used for display purposes */ - const char *table_type() const { return "EXAMPLE"; } - /* - The name of the index type that will be used for display - don't implement this method unless you really have indexes - */ - const char *index_type(uint inx) { return "HASH"; } - const char **bas_ext() const; - /* - This is a list of flags that says what the storage engine - implements. The current table flags are documented in - handler.h - */ - ulong table_flags() const - { - return 0; - } - /* - This is a bitmap of flags that says how the storage engine - implements indexes. The current index flags are documented in - handler.h. If you do not implement indexes, just return zero - here. - - part is the key part to check. First key part is 0 - If all_parts it's set, MySQL want to know the flags for the combined - index up to and including 'part'. - */ - ulong index_flags(uint inx, uint part, bool all_parts) const - { - return 0; - } - /* - unireg.cc will call the following to make sure that the storage engine can - handle the data it is about to send. - - Return *real* limits of your storage engine here. MySQL will do - min(your_limits, MySQL_limits) automatically - - There is no need to implement ..._key_... methods if you don't suport - indexes. - */ - uint max_supported_record_length() const { return HA_MAX_REC_LENGTH; } - uint max_supported_keys() const { return 0; } - uint max_supported_key_parts() const { return 0; } - uint max_supported_key_length() const { return 0; } - /* - Called in test_quick_select to determine if indexes should be used. - */ - virtual double scan_time() { return (double) (records+deleted) / 20.0+10; } - /* - The next method will never be called if you do not implement indexes. - */ - virtual double read_time(ha_rows rows) { return (double) rows / 20.0+1; } - - /* - Everything below are methods that we implment in ha_example.cc. - - Most of these methods are not obligatory, skip them and - MySQL will treat them as not implemented - */ - int open(const char *name, int mode, uint test_if_locked); // required - int close(void); // required - - int write_row(byte * buf); - int update_row(const byte * old_data, byte * new_data); - int delete_row(const byte * buf); - int index_read(byte * buf, const byte * key, - uint key_len, enum ha_rkey_function find_flag); - int index_read_idx(byte * buf, uint idx, const byte * key, - uint key_len, enum ha_rkey_function find_flag); - int index_next(byte * buf); - int index_prev(byte * buf); - int index_first(byte * buf); - int index_last(byte * buf); - /* - unlike index_init(), rnd_init() can be called two times - without rnd_end() in between (it only makes sense if scan=1). - then the second call should prepare for the new table scan - (e.g if rnd_init allocates the cursor, second call should - position it to the start of the table, no need to deallocate - and allocate it again - */ - int rnd_init(bool scan); //required - int rnd_end(); - int rnd_next(byte *buf); //required - int rnd_pos(byte * buf, byte *pos); //required - void position(const byte *record); //required - void info(uint); //required - - int extra(enum ha_extra_function operation); - int reset(void); - int external_lock(THD *thd, int lock_type); //required - int delete_all_rows(void); - ha_rows records_in_range(uint inx, key_range *min_key, - key_range *max_key); - int delete_table(const char *from); - int rename_table(const char * from, const char * to); - int create(const char *name, TABLE *form, - HA_CREATE_INFO *create_info); //required - - THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to, - enum thr_lock_type lock_type); //required -}; diff --git a/sql/examples/ha_tina.cc b/sql/examples/ha_tina.cc deleted file mode 100644 index 8ae82f97d0b..00000000000 --- a/sql/examples/ha_tina.cc +++ /dev/null @@ -1,893 +0,0 @@ -/* Copyright (C) 2003 MySQL AB - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - -/* - Make sure to look at ha_tina.h for more details. - - First off, this is a play thing for me, there are a number of things wrong with it: - *) It was designed for csv and therefor its performance is highly questionable. - *) Indexes have not been implemented. This is because the files can be traded in - and out of the table directory without having to worry about rebuilding anything. - *) NULLs and "" are treated equally (like a spreadsheet). - *) There was in the beginning no point to anyone seeing this other then me, so there - is a good chance that I haven't quite documented it well. - *) Less design, more "make it work" - - Now there are a few cool things with it: - *) Errors can result in corrupted data files. - *) Data files can be read by spreadsheets directly. - -TODO: - *) Move to a block system for larger files - *) Error recovery, its all there, just need to finish it - *) Document how the chains work. - - -Brian -*/ - -#ifdef USE_PRAGMA_IMPLEMENTATION -#pragma implementation // gcc: Class implementation -#endif - -#include "mysql_priv.h" - -#ifdef HAVE_CSV_DB - -#include "ha_tina.h" -#include <sys/mman.h> - -/* Stuff for shares */ -pthread_mutex_t tina_mutex; -static HASH tina_open_tables; -static int tina_init= 0; - -handlerton tina_hton= { - "CSV", - SHOW_OPTION_YES, - "CSV storage engine", - DB_TYPE_CSV_DB, - NULL, /* One needs to be written! */ - 0, /* slot */ - 0, /* savepoint size. */ - NULL, /* close_connection */ - NULL, /* savepoint */ - NULL, /* rollback to savepoint */ - NULL, /* release savepoint */ - NULL, /* commit */ - NULL, /* rollback */ - NULL, /* prepare */ - NULL, /* recover */ - NULL, /* commit_by_xid */ - NULL, /* rollback_by_xid */ - NULL, /* create_cursor_read_view */ - NULL, /* set_cursor_read_view */ - NULL, /* close_cursor_read_view */ - HTON_CAN_RECREATE -}; - -/***************************************************************************** - ** TINA tables - *****************************************************************************/ - -/* - Used for sorting chains with qsort(). -*/ -int sort_set (tina_set *a, tina_set *b) -{ - /* - We assume that intervals do not intersect. So, it is enought to compare - any two points. Here we take start of intervals for comparison. - */ - return ( a->begin > b->begin ? -1 : ( a->begin < b->begin ? 1 : 0 ) ); -} - -static byte* tina_get_key(TINA_SHARE *share,uint *length, - my_bool not_used __attribute__((unused))) -{ - *length=share->table_name_length; - return (byte*) share->table_name; -} - -/* - Reloads the mmap file. -*/ -int get_mmap(TINA_SHARE *share, int write) -{ - DBUG_ENTER("ha_tina::get_mmap"); - if (share->mapped_file && munmap(share->mapped_file, share->file_stat.st_size)) - DBUG_RETURN(1); - - if (my_fstat(share->data_file, &share->file_stat, MYF(MY_WME)) == -1) - DBUG_RETURN(1); - - if (share->file_stat.st_size) - { - if (write) - share->mapped_file= (byte *)mmap(NULL, share->file_stat.st_size, - PROT_READ|PROT_WRITE, MAP_SHARED, - share->data_file, 0); - else - share->mapped_file= (byte *)mmap(NULL, share->file_stat.st_size, - PROT_READ, MAP_PRIVATE, - share->data_file, 0); - if ((share->mapped_file ==(caddr_t)-1)) - { - /* - Bad idea you think? See the problem is that nothing actually checks - the return value of ::rnd_init(), so tossing an error is about - it for us. - Never going to happen right? :) - */ - my_message(errno, "Woops, blew up opening a mapped file", 0); - DBUG_ASSERT(0); - DBUG_RETURN(1); - } - } - else - share->mapped_file= NULL; - - DBUG_RETURN(0); -} - -/* - Simple lock controls. -*/ -static TINA_SHARE *get_share(const char *table_name, TABLE *table) -{ - TINA_SHARE *share; - char *tmp_name; - uint length; - - if (!tina_init) - { - /* Hijack a mutex for init'ing the storage engine */ - pthread_mutex_lock(&LOCK_mysql_create_db); - if (!tina_init) - { - tina_init++; - VOID(pthread_mutex_init(&tina_mutex,MY_MUTEX_INIT_FAST)); - (void) hash_init(&tina_open_tables,system_charset_info,32,0,0, - (hash_get_key) tina_get_key,0,0); - } - pthread_mutex_unlock(&LOCK_mysql_create_db); - } - pthread_mutex_lock(&tina_mutex); - length=(uint) strlen(table_name); - if (!(share=(TINA_SHARE*) hash_search(&tina_open_tables, - (byte*) table_name, - length))) - { - char data_file_name[FN_REFLEN]; - if (!my_multi_malloc(MYF(MY_WME | MY_ZEROFILL), - &share, sizeof(*share), - &tmp_name, length+1, - NullS)) - { - pthread_mutex_unlock(&tina_mutex); - return NULL; - } - - share->use_count=0; - share->table_name_length=length; - share->table_name=tmp_name; - strmov(share->table_name,table_name); - fn_format(data_file_name, table_name, "", ".CSV",MY_REPLACE_EXT|MY_UNPACK_FILENAME); - if (my_hash_insert(&tina_open_tables, (byte*) share)) - goto error; - thr_lock_init(&share->lock); - pthread_mutex_init(&share->mutex,MY_MUTEX_INIT_FAST); - - if ((share->data_file= my_open(data_file_name, O_RDWR|O_APPEND, - MYF(0))) == -1) - goto error2; - - /* We only use share->data_file for writing, so we scan to the end to append */ - if (my_seek(share->data_file, 0, SEEK_END, MYF(0)) == MY_FILEPOS_ERROR) - goto error2; - - share->mapped_file= NULL; // We don't know the state since we just allocated it - if (get_mmap(share, 0) > 0) - goto error3; - } - share->use_count++; - pthread_mutex_unlock(&tina_mutex); - - return share; - -error3: - my_close(share->data_file,MYF(0)); -error2: - thr_lock_delete(&share->lock); - pthread_mutex_destroy(&share->mutex); -error: - pthread_mutex_unlock(&tina_mutex); - my_free((gptr) share, MYF(0)); - - return NULL; -} - - -/* - Free lock controls. -*/ -static int free_share(TINA_SHARE *share) -{ - DBUG_ENTER("ha_tina::free_share"); - pthread_mutex_lock(&tina_mutex); - int result_code= 0; - if (!--share->use_count){ - /* Drop the mapped file */ - if (share->mapped_file) - munmap(share->mapped_file, share->file_stat.st_size); - result_code= my_close(share->data_file,MYF(0)); - hash_delete(&tina_open_tables, (byte*) share); - thr_lock_delete(&share->lock); - pthread_mutex_destroy(&share->mutex); - my_free((gptr) share, MYF(0)); - } - pthread_mutex_unlock(&tina_mutex); - - DBUG_RETURN(result_code); -} - -bool tina_end() -{ - if (tina_init) - { - hash_free(&tina_open_tables); - VOID(pthread_mutex_destroy(&tina_mutex)); - } - tina_init= 0; - return FALSE; -} - -/* - Finds the end of a line. - Currently only supports files written on a UNIX OS. -*/ -byte * find_eoln(byte *data, off_t begin, off_t end) -{ - for (off_t x= begin; x < end; x++) - if (data[x] == '\n') - return data + x; - - return 0; -} - - -ha_tina::ha_tina(TABLE *table_arg) - :handler(&tina_hton, table_arg), - /* - These definitions are found in hanler.h - These are not probably completely right. - */ - current_position(0), next_position(0), chain_alloced(0), - chain_size(DEFAULT_CHAIN_LENGTH), records_is_known(0) -{ - /* Set our original buffers from pre-allocated memory */ - buffer.set(byte_buffer, IO_SIZE, system_charset_info); - chain= chain_buffer; -} - -/* - Encode a buffer into the quoted format. -*/ -int ha_tina::encode_quote(byte *buf) -{ - char attribute_buffer[1024]; - String attribute(attribute_buffer, sizeof(attribute_buffer), &my_charset_bin); - - buffer.length(0); - for (Field **field=table->field ; *field ; field++) - { - const char *ptr; - const char *end_ptr; - - (*field)->val_str(&attribute,&attribute); - ptr= attribute.ptr(); - end_ptr= attribute.length() + ptr; - - buffer.append('"'); - - while (ptr < end_ptr) - { - if (*ptr == '"') - { - buffer.append('\\'); - buffer.append('"'); - *ptr++; - } - else if (*ptr == '\r') - { - buffer.append('\\'); - buffer.append('r'); - *ptr++; - } - else if (*ptr == '\\') - { - buffer.append('\\'); - buffer.append('\\'); - *ptr++; - } - else if (*ptr == '\n') - { - buffer.append('\\'); - buffer.append('n'); - *ptr++; - } - else - buffer.append(*ptr++); - } - buffer.append('"'); - buffer.append(','); - } - // Remove the comma, add a line feed - buffer.length(buffer.length() - 1); - buffer.append('\n'); - //buffer.replace(buffer.length(), 0, "\n", 1); - - return (buffer.length()); -} - -/* - chain_append() adds delete positions to the chain that we use to keep track of space. -*/ -int ha_tina::chain_append() -{ - if ( chain_ptr != chain && (chain_ptr -1)->end == current_position) - (chain_ptr -1)->end= next_position; - else - { - /* We set up for the next position */ - if ((off_t)(chain_ptr - chain) == (chain_size -1)) - { - off_t location= chain_ptr - chain; - chain_size += DEFAULT_CHAIN_LENGTH; - if (chain_alloced) - { - /* Must cast since my_malloc unlike malloc doesn't have a void ptr */ - if ((chain= (tina_set *)my_realloc((gptr)chain,chain_size,MYF(MY_WME))) == NULL) - return -1; - } - else - { - tina_set *ptr= (tina_set *)my_malloc(chain_size * sizeof(tina_set),MYF(MY_WME)); - memcpy(ptr, chain, DEFAULT_CHAIN_LENGTH * sizeof(tina_set)); - chain= ptr; - chain_alloced++; - } - chain_ptr= chain + location; - } - chain_ptr->begin= current_position; - chain_ptr->end= next_position; - chain_ptr++; - } - - return 0; -} - - -/* - Scans for a row. -*/ -int ha_tina::find_current_row(byte *buf) -{ - byte *mapped_ptr= (byte *)share->mapped_file + current_position; - byte *end_ptr; - DBUG_ENTER("ha_tina::find_current_row"); - - /* EOF should be counted as new line */ - if ((end_ptr= find_eoln(share->mapped_file, current_position, share->file_stat.st_size)) == 0) - DBUG_RETURN(HA_ERR_END_OF_FILE); - - for (Field **field=table->field ; *field ; field++) - { - buffer.length(0); - mapped_ptr++; // Increment past the first quote - for(;mapped_ptr != end_ptr; mapped_ptr++) - { - //Need to convert line feeds! - if (*mapped_ptr == '"' && - (((mapped_ptr[1] == ',') && (mapped_ptr[2] == '"')) || (mapped_ptr == end_ptr -1 ))) - { - mapped_ptr += 2; // Move past the , and the " - break; - } - if (*mapped_ptr == '\\' && mapped_ptr != (end_ptr - 1)) - { - mapped_ptr++; - if (*mapped_ptr == 'r') - buffer.append('\r'); - else if (*mapped_ptr == 'n' ) - buffer.append('\n'); - else if ((*mapped_ptr == '\\') || (*mapped_ptr == '"')) - buffer.append(*mapped_ptr); - else /* This could only happed with an externally created file */ - { - buffer.append('\\'); - buffer.append(*mapped_ptr); - } - } - else - buffer.append(*mapped_ptr); - } - (*field)->store(buffer.ptr(), buffer.length(), system_charset_info); - } - next_position= (end_ptr - share->mapped_file)+1; - /* Maybe use \N for null? */ - memset(buf, 0, table->s->null_bytes); /* We do not implement nulls! */ - - DBUG_RETURN(0); -} - -/* - If frm_error() is called in table.cc this is called to find out what file - extensions exist for this handler. -*/ -static const char *ha_tina_exts[] = { - ".CSV", - NullS -}; - -const char **ha_tina::bas_ext() const -{ - return ha_tina_exts; -} - - -/* - Open a database file. Keep in mind that tables are caches, so - this will not be called for every request. Any sort of positions - that need to be reset should be kept in the ::extra() call. -*/ -int ha_tina::open(const char *name, int mode, uint test_if_locked) -{ - DBUG_ENTER("ha_tina::open"); - - if (!(share= get_share(name, table))) - DBUG_RETURN(1); - thr_lock_data_init(&share->lock,&lock,NULL); - ref_length=sizeof(off_t); - - DBUG_RETURN(0); -} - - -/* - Close a database file. We remove ourselves from the shared strucutre. - If it is empty we destroy it and free the mapped file. -*/ -int ha_tina::close(void) -{ - DBUG_ENTER("ha_tina::close"); - DBUG_RETURN(free_share(share)); -} - -/* - This is an INSERT. At the moment this handler just seeks to the end - of the file and appends the data. In an error case it really should - just truncate to the original position (this is not done yet). -*/ -int ha_tina::write_row(byte * buf) -{ - int size; - DBUG_ENTER("ha_tina::write_row"); - - statistic_increment(table->in_use->status_var.ha_write_count, &LOCK_status); - - if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT) - table->timestamp_field->set_time(); - - size= encode_quote(buf); - - if (my_write(share->data_file, buffer.ptr(), size, MYF(MY_WME | MY_NABP))) - DBUG_RETURN(-1); - - /* - Ok, this is means that we will be doing potentially bad things - during a bulk insert on some OS'es. What we need is a cleanup - call for ::write_row that would let us fix up everything after the bulk - insert. The archive handler does this with an extra mutx call, which - might be a solution for this. - */ - if (get_mmap(share, 0) > 0) - DBUG_RETURN(-1); - records++; - DBUG_RETURN(0); -} - - -/* - This is called for an update. - Make sure you put in code to increment the auto increment, also - update any timestamp data. Currently auto increment is not being - fixed since autoincrements have yet to be added to this table handler. - This will be called in a table scan right before the previous ::rnd_next() - call. -*/ -int ha_tina::update_row(const byte * old_data, byte * new_data) -{ - int size; - DBUG_ENTER("ha_tina::update_row"); - - statistic_increment(table->in_use->status_var.ha_read_rnd_next_count, - &LOCK_status); - - if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE) - table->timestamp_field->set_time(); - - size= encode_quote(new_data); - - if (chain_append()) - DBUG_RETURN(-1); - - if (my_write(share->data_file, buffer.ptr(), size, MYF(MY_WME | MY_NABP))) - DBUG_RETURN(-1); - DBUG_RETURN(0); -} - - -/* - Deletes a row. First the database will find the row, and then call this method. - In the case of a table scan, the previous call to this will be the ::rnd_next() - that found this row. - The exception to this is an ORDER BY. This will cause the table handler to walk - the table noting the positions of all rows that match a query. The table will - then be deleted/positioned based on the ORDER (so RANDOM, DESC, ASC). -*/ -int ha_tina::delete_row(const byte * buf) -{ - DBUG_ENTER("ha_tina::delete_row"); - statistic_increment(table->in_use->status_var.ha_delete_count,&LOCK_status); - - if (chain_append()) - DBUG_RETURN(-1); - - --records; - - DBUG_RETURN(0); -} - -/* - Fill buf with value from key. Simply this is used for a single index read - with a key. -*/ -int ha_tina::index_read(byte * buf, const byte * key, - uint key_len __attribute__((unused)), - enum ha_rkey_function find_flag - __attribute__((unused))) -{ - DBUG_ENTER("ha_tina::index_read"); - DBUG_ASSERT(0); - DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED); -} - -/* - Fill buf with value from key. Simply this is used for a single index read - with a key. - Whatever the current key is we will use it. This is what will be in "index". -*/ -int ha_tina::index_read_idx(byte * buf, uint index, const byte * key, - uint key_len __attribute__((unused)), - enum ha_rkey_function find_flag - __attribute__((unused))) -{ - DBUG_ENTER("ha_tina::index_read_idx"); - DBUG_ASSERT(0); - DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED); -} - - -/* - Read the next position in the index. -*/ -int ha_tina::index_next(byte * buf) -{ - DBUG_ENTER("ha_tina::index_next"); - DBUG_ASSERT(0); - DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED); -} - -/* - Read the previous position in the index. -*/ -int ha_tina::index_prev(byte * buf) -{ - DBUG_ENTER("ha_tina::index_prev"); - DBUG_ASSERT(0); - DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED); -} - -/* - Read the first position in the index -*/ -int ha_tina::index_first(byte * buf) -{ - DBUG_ENTER("ha_tina::index_first"); - DBUG_ASSERT(0); - DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED); -} - -/* - Read the last position in the index - With this we don't need to do a filesort() with index. - We just read the last row and call previous. -*/ -int ha_tina::index_last(byte * buf) -{ - DBUG_ENTER("ha_tina::index_last"); - DBUG_ASSERT(0); - DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED); -} - -/* - All table scans call this first. - The order of a table scan is: - - ha_tina::store_lock - ha_tina::external_lock - ha_tina::info - ha_tina::rnd_init - ha_tina::extra - ENUM HA_EXTRA_CACHE Cash record in HA_rrnd() - ha_tina::rnd_next - ha_tina::rnd_next - ha_tina::rnd_next - ha_tina::rnd_next - ha_tina::rnd_next - ha_tina::rnd_next - ha_tina::rnd_next - ha_tina::rnd_next - ha_tina::rnd_next - ha_tina::extra - ENUM HA_EXTRA_NO_CACHE End cacheing of records (def) - ha_tina::external_lock - ha_tina::extra - ENUM HA_EXTRA_RESET Reset database to after open - - Each call to ::rnd_next() represents a row returned in the can. When no more - rows can be returned, rnd_next() returns a value of HA_ERR_END_OF_FILE. - The ::info() call is just for the optimizer. - -*/ - -int ha_tina::rnd_init(bool scan) -{ - DBUG_ENTER("ha_tina::rnd_init"); - - current_position= next_position= 0; - records= 0; - records_is_known= 0; - chain_ptr= chain; -#ifdef HAVE_MADVISE - if (scan) - (void)madvise(share->mapped_file,share->file_stat.st_size,MADV_SEQUENTIAL); -#endif - - DBUG_RETURN(0); -} - -/* - ::rnd_next() does all the heavy lifting for a table scan. You will need to populate *buf - with the correct field data. You can walk the field to determine at what position you - should store the data (take a look at how ::find_current_row() works). The structure - is something like: - 0Foo Dog Friend - The first offset is for the first attribute. All space before that is reserved for null count. - Basically this works as a mask for which rows are nulled (compared to just empty). - This table handler doesn't do nulls and does not know the difference between NULL and "". This - is ok since this table handler is for spreadsheets and they don't know about them either :) -*/ -int ha_tina::rnd_next(byte *buf) -{ - DBUG_ENTER("ha_tina::rnd_next"); - - statistic_increment(table->in_use->status_var.ha_read_rnd_next_count, - &LOCK_status); - - current_position= next_position; - if (!share->mapped_file) - DBUG_RETURN(HA_ERR_END_OF_FILE); - if (HA_ERR_END_OF_FILE == find_current_row(buf) ) - DBUG_RETURN(HA_ERR_END_OF_FILE); - - records++; - DBUG_RETURN(0); -} - -/* - In the case of an order by rows will need to be sorted. - ::position() is called after each call to ::rnd_next(), - the data it stores is to a byte array. You can store this - data via my_store_ptr(). ref_length is a variable defined to the - class that is the sizeof() of position being stored. In our case - its just a position. Look at the bdb code if you want to see a case - where something other then a number is stored. -*/ -void ha_tina::position(const byte *record) -{ - DBUG_ENTER("ha_tina::position"); - my_store_ptr(ref, ref_length, current_position); - DBUG_VOID_RETURN; -} - - -/* - Used to fetch a row from a posiion stored with ::position(). - my_get_ptr() retrieves the data for you. -*/ - -int ha_tina::rnd_pos(byte * buf, byte *pos) -{ - DBUG_ENTER("ha_tina::rnd_pos"); - statistic_increment(table->in_use->status_var.ha_read_rnd_next_count, - &LOCK_status); - current_position= my_get_ptr(pos,ref_length); - DBUG_RETURN(find_current_row(buf)); -} - -/* - ::info() is used to return information to the optimizer. - Currently this table handler doesn't implement most of the fields - really needed. SHOW also makes use of this data -*/ -void ha_tina::info(uint flag) -{ - DBUG_ENTER("ha_tina::info"); - /* This is a lie, but you don't want the optimizer to see zero or 1 */ - if (!records_is_known && records < 2) - records= 2; - DBUG_VOID_RETURN; -} - -/* - Grab bag of flags that are sent to the able handler every so often. - HA_EXTRA_RESET and HA_EXTRA_RESET_STATE are the most frequently called. - You are not required to implement any of these. -*/ -int ha_tina::extra(enum ha_extra_function operation) -{ - DBUG_ENTER("ha_tina::extra"); - DBUG_RETURN(0); -} - -/* - This is no longer used. -*/ -int ha_tina::reset(void) -{ - DBUG_ENTER("ha_tina::reset"); - ha_tina::extra(HA_EXTRA_RESET); - DBUG_RETURN(0); -} - - -/* - Called after deletes, inserts, and updates. This is where we clean up all of - the dead space we have collected while writing the file. -*/ -int ha_tina::rnd_end() -{ - DBUG_ENTER("ha_tina::rnd_end"); - - records_is_known= 1; - - /* First position will be truncate position, second will be increment */ - if ((chain_ptr - chain) > 0) - { - tina_set *ptr; - off_t length; - - /* - Setting up writable map, this will contain all of the data after the - get_mmap call that we have added to the file. - */ - if (get_mmap(share, 1) > 0) - DBUG_RETURN(-1); - length= share->file_stat.st_size; - - /* - The sort handles updates/deletes with random orders. - It also sorts so that we move the final blocks to the - beginning so that we move the smallest amount of data possible. - */ - qsort(chain, (size_t)(chain_ptr - chain), sizeof(tina_set), (qsort_cmp)sort_set); - for (ptr= chain; ptr < chain_ptr; ptr++) - { - memmove(share->mapped_file + ptr->begin, share->mapped_file + ptr->end, - length - (size_t)ptr->end); - length= length - (size_t)(ptr->end - ptr->begin); - } - - /* Truncate the file to the new size */ - if (my_chsize(share->data_file, length, 0, MYF(MY_WME))) - DBUG_RETURN(-1); - - if (munmap(share->mapped_file, length)) - DBUG_RETURN(-1); - - /* We set it to null so that get_mmap() won't try to unmap it */ - share->mapped_file= NULL; - if (get_mmap(share, 0) > 0) - DBUG_RETURN(-1); - } - - DBUG_RETURN(0); -} - -/* - DELETE without WHERE calls it -*/ -int ha_tina::delete_all_rows() -{ - DBUG_ENTER("ha_tina::delete_all_rows"); - - if (!records_is_known) - return (my_errno=HA_ERR_WRONG_COMMAND); - - int rc= my_chsize(share->data_file, 0, 0, MYF(MY_WME)); - - if (get_mmap(share, 0) > 0) - DBUG_RETURN(-1); - - records=0; - DBUG_RETURN(rc); -} - -/* - Always called by the start of a transaction (or by "lock tables"); -*/ -int ha_tina::external_lock(THD *thd, int lock_type) -{ - DBUG_ENTER("ha_tina::external_lock"); - DBUG_RETURN(0); // No external locking -} - -/* - Called by the database to lock the table. Keep in mind that this - is an internal lock. -*/ -THR_LOCK_DATA **ha_tina::store_lock(THD *thd, - THR_LOCK_DATA **to, - enum thr_lock_type lock_type) -{ - if (lock_type != TL_IGNORE && lock.type == TL_UNLOCK) - lock.type=lock_type; - *to++= &lock; - return to; -} - -/* - Create a table. You do not want to leave the table open after a call to - this (the database will call ::open() if it needs to). -*/ - -int ha_tina::create(const char *name, TABLE *table_arg, HA_CREATE_INFO *create_info) -{ - char name_buff[FN_REFLEN]; - File create_file; - DBUG_ENTER("ha_tina::create"); - - if ((create_file= my_create(fn_format(name_buff,name,"",".CSV",MY_REPLACE_EXT|MY_UNPACK_FILENAME),0, - O_RDWR | O_TRUNC,MYF(MY_WME))) < 0) - DBUG_RETURN(-1); - - my_close(create_file,MYF(0)); - - DBUG_RETURN(0); -} - -#endif /* enable CSV */ diff --git a/sql/examples/ha_tina.h b/sql/examples/ha_tina.h deleted file mode 100644 index 97659f99dd9..00000000000 --- a/sql/examples/ha_tina.h +++ /dev/null @@ -1,125 +0,0 @@ -/* Copyright (C) 2003 MySQL AB - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - -#include <sys/types.h> -#include <sys/stat.h> -#include <my_dir.h> - -#define DEFAULT_CHAIN_LENGTH 512 - -typedef struct st_tina_share { - char *table_name; - byte *mapped_file; /* mapped region of file */ - uint table_name_length,use_count; - MY_STAT file_stat; /* Stat information for the data file */ - File data_file; /* Current open data file */ - pthread_mutex_t mutex; - THR_LOCK lock; -} TINA_SHARE; - -typedef struct tina_set { - off_t begin; - off_t end; -}; - -class ha_tina: public handler -{ - THR_LOCK_DATA lock; /* MySQL lock */ - TINA_SHARE *share; /* Shared lock info */ - off_t current_position; /* Current position in the file during a file scan */ - off_t next_position; /* Next position in the file scan */ - byte byte_buffer[IO_SIZE]; - String buffer; - tina_set chain_buffer[DEFAULT_CHAIN_LENGTH]; - tina_set *chain; - tina_set *chain_ptr; - byte chain_alloced; - uint32 chain_size; - bool records_is_known; - -public: - ha_tina(TABLE *table_arg); - ~ha_tina() - { - if (chain_alloced) - my_free((gptr)chain,0); - } - const char *table_type() const { return "CSV"; } - const char *index_type(uint inx) { return "NONE"; } - const char **bas_ext() const; - ulong table_flags() const - { - return (HA_REC_NOT_IN_SEQ | HA_NOT_EXACT_COUNT | - HA_NO_AUTO_INCREMENT ); - } - ulong index_flags(uint idx, uint part, bool all_parts) const - { - /* We will never have indexes so this will never be called(AKA we return zero) */ - return 0; - } - uint max_record_length() const { return HA_MAX_REC_LENGTH; } - uint max_keys() const { return 0; } - uint max_key_parts() const { return 0; } - uint max_key_length() const { return 0; } - /* - Called in test_quick_select to determine if indexes should be used. - */ - virtual double scan_time() { return (double) (records+deleted) / 20.0+10; } - /* The next method will never be called */ - virtual bool fast_key_read() { return 1;} - /* - TODO: return actual upper bound of number of records in the table. - (e.g. save number of records seen on full table scan and/or use file size - as upper bound) - */ - ha_rows estimate_rows_upper_bound() { return HA_POS_ERROR; } - - int open(const char *name, int mode, uint test_if_locked); - int close(void); - int write_row(byte * buf); - int update_row(const byte * old_data, byte * new_data); - int delete_row(const byte * buf); - int index_read(byte * buf, const byte * key, - uint key_len, enum ha_rkey_function find_flag); - int index_read_idx(byte * buf, uint idx, const byte * key, - uint key_len, enum ha_rkey_function find_flag); - int index_next(byte * buf); - int index_prev(byte * buf); - int index_first(byte * buf); - int index_last(byte * buf); - int rnd_init(bool scan=1); - int rnd_next(byte *buf); - int rnd_pos(byte * buf, byte *pos); - int rnd_end(); - void position(const byte *record); - void info(uint); - int extra(enum ha_extra_function operation); - int reset(void); - int external_lock(THD *thd, int lock_type); - int delete_all_rows(void); - int create(const char *name, TABLE *form, HA_CREATE_INFO *create_info); - - THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to, - enum thr_lock_type lock_type); - - /* The following methods were added just for TINA */ - int encode_quote(byte *buf); - int find_current_row(byte *buf); - int chain_append(); -}; - -bool tina_end(); - diff --git a/sql/field.cc b/sql/field.cc index 1f3649e1b88..d162b9e202b 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -67,6 +67,7 @@ inline int field_type2index (enum_field_types field_type) ((int)FIELDTYPE_TEAR_FROM) + (field_type - FIELDTYPE_TEAR_TO) - 1); } + static enum_field_types field_types_merge_rules [FIELDTYPE_NUM][FIELDTYPE_NUM]= { /* MYSQL_TYPE_DECIMAL -> */ @@ -1023,10 +1024,9 @@ bool Field::type_can_have_key_part(enum enum_field_types type) Field_num::Field_num(char *ptr_arg,uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg, uint8 dec_arg, bool zero_arg, bool unsigned_arg) :Field(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, - unireg_check_arg, field_name_arg, table_arg), + unireg_check_arg, field_name_arg), dec(dec_arg),zerofill(zero_arg),unsigned_flag(unsigned_arg) { if (zerofill) @@ -1215,16 +1215,11 @@ String *Field::val_int_as_str(String *val_buffer, my_bool unsigned_val) } -/* This is used as a table name when the table structure is not set up */ -const char *unknown_table_name= 0; - Field::Field(char *ptr_arg,uint32 length_arg,uchar *null_ptr_arg, uchar null_bit_arg, - utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg) + utype unireg_check_arg, const char *field_name_arg) :ptr(ptr_arg),null_ptr(null_ptr_arg), - table(table_arg),orig_table(table_arg), - table_name(table_arg ? &table_arg->alias : &unknown_table_name), + table(0), orig_table(0), table_name(0), field_name(field_name_arg), query_id(0), key_start(0), part_of_key(0), part_of_sortkey(0), unireg_check(unireg_check_arg), @@ -1233,6 +1228,7 @@ Field::Field(char *ptr_arg,uint32 length_arg,uchar *null_ptr_arg, flags=null_ptr ? 0: NOT_NULL_FLAG; comment.str= (char*) ""; comment.length=0; + fieldnr= 0; } uint Field::offset() @@ -1280,10 +1276,10 @@ void Field_num::add_zerofill_and_unsigned(String &res) const void Field::make_field(Send_field *field) { - if (orig_table->s->table_cache_key && *(orig_table->s->table_cache_key)) + if (orig_table->s->db.str && *orig_table->s->db.str) { - field->org_table_name= orig_table->s->table_name; - field->db_name= orig_table->s->table_cache_key; + field->db_name= orig_table->s->db.str; + field->org_table_name= orig_table->s->table_name.str; } else field->org_table_name= field->db_name= ""; @@ -1391,10 +1387,9 @@ my_decimal* Field_num::val_decimal(my_decimal *decimal_value) Field_str::Field_str(char *ptr_arg,uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, utype unireg_check_arg, - const char *field_name_arg, - struct st_table *table_arg,CHARSET_INFO *charset) + const char *field_name_arg, CHARSET_INFO *charset) :Field(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, - unireg_check_arg, field_name_arg, table_arg) + unireg_check_arg, field_name_arg) { field_charset=charset; if (charset->state & MY_CS_BINSORT) @@ -1527,7 +1522,7 @@ Field *Field::new_field(MEM_ROOT *root, struct st_table *new_table) tmp->key_start.init(0); tmp->part_of_key.init(0); tmp->part_of_sortkey.init(0); - tmp->unireg_check=Field::NONE; + tmp->unireg_check= Field::NONE; tmp->flags&= (NOT_NULL_FLAG | BLOB_FLAG | UNSIGNED_FLAG | ZEROFILL_FLAG | BINARY_FLAG | ENUM_FLAG | SET_FLAG); tmp->reset_fields(); @@ -1649,6 +1644,21 @@ bool Field::needs_quotes(void) } +/* This is used to generate a field in TABLE from TABLE_SHARE */ + +Field *Field::clone(MEM_ROOT *root, struct st_table *new_table) +{ + Field *tmp; + if ((tmp= (Field*) memdup_root(root,(char*) this,size_of()))) + { + tmp->init(new_table); + tmp->move_field_offset((my_ptrdiff_t) (new_table->record[0] - + new_table->s->default_values)); + } + return tmp; +} + + /**************************************************************************** Field_null, a field that always return NULL ****************************************************************************/ @@ -2278,13 +2288,10 @@ Field_new_decimal::Field_new_decimal(char *ptr_arg, uchar null_bit_arg, enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg, uint8 dec_arg,bool zero_arg, bool unsigned_arg) - :Field_num(ptr_arg, len_arg, - null_ptr_arg, null_bit_arg, - unireg_check_arg, field_name_arg, table_arg, - dec_arg, zero_arg, unsigned_arg) + :Field_num(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, + unireg_check_arg, field_name_arg, dec_arg, zero_arg, unsigned_arg) { precision= my_decimal_length_to_precision(len_arg, dec_arg, unsigned_arg); DBUG_ASSERT((precision <= DECIMAL_MAX_PRECISION) && @@ -2296,14 +2303,11 @@ Field_new_decimal::Field_new_decimal(char *ptr_arg, Field_new_decimal::Field_new_decimal(uint32 len_arg, bool maybe_null, const char *name, - struct st_table *t_arg, uint8 dec_arg, bool unsigned_arg) :Field_num((char*) 0, len_arg, maybe_null ? (uchar*) "": 0, 0, - NONE, name, t_arg, - dec_arg, - 0, unsigned_arg) + NONE, name, dec_arg, 0, unsigned_arg) { precision= my_decimal_length_to_precision(len_arg, dec_arg, unsigned_arg); DBUG_ASSERT((precision <= DECIMAL_MAX_PRECISION) && @@ -2382,7 +2386,7 @@ bool Field_new_decimal::store_value(const my_decimal *decimal_value) #ifndef DBUG_OFF { char dbug_buff[DECIMAL_MAX_STR_LENGTH+1]; - DBUG_PRINT("info", ("saving with precision %d, scale: %d, value %s", + DBUG_PRINT("info", ("saving with precision %d scale: %d value %s", (int)precision, (int)dec, dbug_decimal_as_string(dbug_buff, decimal_value))); } @@ -2397,7 +2401,8 @@ bool Field_new_decimal::store_value(const my_decimal *decimal_value) my_decimal2binary(E_DEC_FATAL_ERROR, &buff, ptr, precision, dec); error= 1; } - DBUG_EXECUTE("info", print_decimal_buff(decimal_value, (byte *) ptr, bin_size);); + DBUG_EXECUTE("info", print_decimal_buff(decimal_value, (byte *) ptr, + bin_size);); DBUG_RETURN(error); } @@ -4444,19 +4449,18 @@ Field_timestamp::Field_timestamp(char *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg, + TABLE_SHARE *share, CHARSET_INFO *cs) :Field_str(ptr_arg, 19, null_ptr_arg, null_bit_arg, - unireg_check_arg, field_name_arg, table_arg, cs) + unireg_check_arg, field_name_arg, cs) { /* For 4.0 MYD and 4.0 InnoDB compatibility */ flags|= ZEROFILL_FLAG | UNSIGNED_FLAG; - if (table && !table->timestamp_field && - unireg_check != NONE) + if (!share->timestamp_field && unireg_check != NONE) { /* This timestamp has auto-update */ - table->timestamp_field= this; - flags|=TIMESTAMP_FLAG; + share->timestamp_field= this; + flags|= TIMESTAMP_FLAG; } } @@ -5959,6 +5963,26 @@ int Field_str::store(double nr) } +uint Field::is_equal(create_field *new_field) +{ + return (new_field->sql_type == type()); +} + + +uint Field_str::is_equal(create_field *new_field) +{ + if (((new_field->flags & (BINCMP_FLAG | BINARY_FLAG)) && + !(flags & (BINCMP_FLAG | BINARY_FLAG))) || + (!(new_field->flags & (BINCMP_FLAG | BINARY_FLAG)) && + (flags & (BINCMP_FLAG | BINARY_FLAG)))) + return 0; /* One of the fields is binary and the other one isn't */ + + return ((new_field->sql_type == type()) && + new_field->charset == field_charset && + new_field->length == max_length()); +} + + int Field_string::store(longlong nr, bool unsigned_val) { char buff[64]; @@ -6202,8 +6226,7 @@ uint Field_string::max_packed_col_length(uint max_length) Field *Field_string::new_field(MEM_ROOT *root, struct st_table *new_table) { - Field *new_field; - + Field *field; if (type() != MYSQL_TYPE_VAR_STRING || table == new_table) return Field::new_field(root, new_table); @@ -6212,19 +6235,23 @@ Field *Field_string::new_field(MEM_ROOT *root, struct st_table *new_table) This is done to ensure that ALTER TABLE will convert old VARCHAR fields to now VARCHAR fields. */ - if ((new_field= new Field_varstring(field_length, maybe_null(), - field_name, new_table, charset()))) + if ((field= new Field_varstring(field_length, maybe_null(), field_name, + new_table->s, charset()))) { + field->init(new_table); /* delayed_insert::get_local_table() needs a ptr copied from old table. This is what other new_field() methods do too. The above method of Field_varstring sets ptr to NULL. */ - new_field->ptr= ptr; + field->ptr= ptr; + field->null_ptr= null_ptr; + field->null_bit= null_bit; } - return new_field; + return field; } + /**************************************************************************** VARCHAR type Data in field->ptr is stored as: @@ -6353,7 +6380,8 @@ my_decimal *Field_varstring::val_decimal(my_decimal *decimal_value) } -int Field_varstring::cmp(const char *a_ptr, const char *b_ptr) +int Field_varstring::cmp_max(const char *a_ptr, const char *b_ptr, + uint max_len) { uint a_length, b_length; int diff; @@ -6368,6 +6396,8 @@ int Field_varstring::cmp(const char *a_ptr, const char *b_ptr) a_length= uint2korr(a_ptr); b_length= uint2korr(b_ptr); } + set_if_smaller(a_length, max_len); + set_if_smaller(b_length, max_len); diff= field_charset->coll->strnncollsp(field_charset, (const uchar*) a_ptr+ length_bytes, @@ -6742,6 +6772,22 @@ Field *Field_varstring::new_key_field(MEM_ROOT *root, } +uint Field_varstring::is_equal(create_field *new_field) +{ + if (new_field->sql_type == type() && + new_field->charset == field_charset) + { + if (new_field->length == max_length()) + return IS_EQUAL_YES; + if (new_field->length > max_length() && + ((new_field->length <= 255 && max_length() <= 255) || + (new_field->length > 255 && max_length() > 255))) + return IS_EQUAL_PACK_LENGTH; // VARCHAR, longer variable length + } + return IS_EQUAL_NO; +} + + /**************************************************************************** ** blob type ** A blob is saved as a length and a pointer. The length is stored in the @@ -6750,19 +6796,16 @@ Field *Field_varstring::new_key_field(MEM_ROOT *root, Field_blob::Field_blob(char *ptr_arg, uchar *null_ptr_arg, uchar null_bit_arg, enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg,uint blob_pack_length, + TABLE_SHARE *share, uint blob_pack_length, CHARSET_INFO *cs) :Field_longstr(ptr_arg, BLOB_PACK_LENGTH_TO_MAX_LENGH(blob_pack_length), - null_ptr_arg, null_bit_arg, unireg_check_arg, field_name_arg, - table_arg, cs), + null_ptr_arg, null_bit_arg, unireg_check_arg, field_name_arg, + cs), packlength(blob_pack_length) { flags|= BLOB_FLAG; - if (table) - { - table->s->blob_fields++; - /* TODO: why do not fill table->s->blob_field array here? */ - } + share->blob_fields++; + /* TODO: why do not fill table->s->blob_field array here? */ } @@ -7016,13 +7059,16 @@ int Field_blob::cmp(const char *a,uint32 a_length, const char *b, } -int Field_blob::cmp(const char *a_ptr, const char *b_ptr) +int Field_blob::cmp_max(const char *a_ptr, const char *b_ptr, + uint max_length) { char *blob1,*blob2; memcpy_fixed(&blob1,a_ptr+packlength,sizeof(char*)); memcpy_fixed(&blob2,b_ptr+packlength,sizeof(char*)); - return Field_blob::cmp(blob1,get_length(a_ptr), - blob2,get_length(b_ptr)); + uint a_len= get_length(a_ptr), b_len= get_length(b_ptr); + set_if_smaller(a_len, max_length); + set_if_smaller(b_len, max_length); + return Field_blob::cmp(blob1,a_len,blob2,b_len); } @@ -7885,6 +7931,17 @@ bool Field_num::eq_def(Field *field) } +uint Field_num::is_equal(create_field *new_field) +{ + return ((new_field->sql_type == type()) && + ((new_field->flags & UNSIGNED_FLAG) == (uint) (flags & + UNSIGNED_FLAG)) && + ((new_field->flags & AUTO_INCREMENT_FLAG) == + (uint) (flags & AUTO_INCREMENT_FLAG)) && + (new_field->length >= max_length())); +} + + /* Bit field. @@ -7916,10 +7973,9 @@ bool Field_num::eq_def(Field *field) Field_bit::Field_bit(char *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, uchar *bit_ptr_arg, uchar bit_ofs_arg, - enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg) + enum utype unireg_check_arg, const char *field_name_arg) : Field(ptr_arg, len_arg >> 3, null_ptr_arg, null_bit_arg, - unireg_check_arg, field_name_arg, table_arg), + unireg_check_arg, field_name_arg), bit_ptr(bit_ptr_arg), bit_ofs(bit_ofs_arg), bit_len(len_arg & 7) { /* @@ -8072,6 +8128,35 @@ my_decimal *Field_bit::val_decimal(my_decimal *deciaml_value) } +/* + Compare two bit fields using pointers within the record. + SYNOPSIS + cmp_max() + a Pointer to field->ptr in first record + b Pointer to field->ptr in second record + max_len Maximum length used in index + DESCRIPTION + This method is used from key_rec_cmp used by merge sorts used + by partitioned index read and later other similar places. + The a and b pointer must be pointers to the field in a record + (not the table->record[0] necessarily) +*/ +int Field_bit::cmp_max(const char *a, const char *b, uint max_len) +{ + my_ptrdiff_t a_diff= a - ptr; + my_ptrdiff_t b_diff= b - ptr; + if (bit_len) + { + int flag; + uchar bits_a= get_rec_bits(bit_ptr+a_diff, bit_ofs, bit_len); + uchar bits_b= get_rec_bits(bit_ptr+b_diff, bit_ofs, bit_len); + if ((flag= (int) (bits_a - bits_b))) + return flag; + } + return memcmp(a, b, field_length); +} + + int Field_bit::key_cmp(const byte *str, uint length) { if (bit_len) @@ -8157,11 +8242,10 @@ Field_bit_as_char::Field_bit_as_char(char *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, uchar *bit_ptr_arg, uchar bit_ofs_arg, enum utype unireg_check_arg, - const char *field_name_arg, - struct st_table *table_arg) - : Field_bit(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, bit_ptr_arg, - bit_ofs_arg, unireg_check_arg, field_name_arg, table_arg), - create_length(len_arg) + const char *field_name_arg) + :Field_bit(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, bit_ptr_arg, + bit_ofs_arg, unireg_check_arg, field_name_arg), + create_length(len_arg) { bit_ptr= 0; bit_ofs= 0; @@ -8699,7 +8783,7 @@ uint pack_length_to_packflag(uint type) } -Field *make_field(char *ptr, uint32 field_length, +Field *make_field(TABLE_SHARE *share, char *ptr, uint32 field_length, uchar *null_pos, uchar null_bit, uint pack_flag, enum_field_types field_type, @@ -8707,8 +8791,7 @@ Field *make_field(char *ptr, uint32 field_length, Field::geometry_type geom_type, Field::utype unireg_check, TYPELIB *interval, - const char *field_name, - struct st_table *table) + const char *field_name) { uchar *bit_ptr; uchar bit_offset; @@ -8754,13 +8837,14 @@ Field *make_field(char *ptr, uint32 field_length, field_type == FIELD_TYPE_DECIMAL || // 3.23 or 4.0 string field_type == MYSQL_TYPE_VAR_STRING) return new Field_string(ptr,field_length,null_pos,null_bit, - unireg_check, field_name, table, + unireg_check, field_name, field_charset); if (field_type == MYSQL_TYPE_VARCHAR) return new Field_varstring(ptr,field_length, HA_VARCHAR_PACKLENGTH(field_length), null_pos,null_bit, - unireg_check, field_name, table, + unireg_check, field_name, + share, field_charset); return 0; // Error } @@ -8772,22 +8856,22 @@ Field *make_field(char *ptr, uint32 field_length, #ifdef HAVE_SPATIAL if (f_is_geom(pack_flag)) return new Field_geom(ptr,null_pos,null_bit, - unireg_check, field_name, table, + unireg_check, field_name, share, pack_length, geom_type); #endif if (f_is_blob(pack_flag)) return new Field_blob(ptr,null_pos,null_bit, - unireg_check, field_name, table, + unireg_check, field_name, share, pack_length, field_charset); if (interval) { if (f_is_enum(pack_flag)) return new Field_enum(ptr,field_length,null_pos,null_bit, - unireg_check, field_name, table, + unireg_check, field_name, pack_length, interval, field_charset); else return new Field_set(ptr,field_length,null_pos,null_bit, - unireg_check, field_name, table, + unireg_check, field_name, pack_length, interval, field_charset); } } @@ -8795,80 +8879,82 @@ Field *make_field(char *ptr, uint32 field_length, switch (field_type) { case FIELD_TYPE_DECIMAL: return new Field_decimal(ptr,field_length,null_pos,null_bit, - unireg_check, field_name, table, + unireg_check, field_name, f_decimals(pack_flag), f_is_zerofill(pack_flag) != 0, f_is_dec(pack_flag) == 0); case FIELD_TYPE_NEWDECIMAL: return new Field_new_decimal(ptr,field_length,null_pos,null_bit, - unireg_check, field_name, table, + unireg_check, field_name, f_decimals(pack_flag), f_is_zerofill(pack_flag) != 0, f_is_dec(pack_flag) == 0); case FIELD_TYPE_FLOAT: return new Field_float(ptr,field_length,null_pos,null_bit, - unireg_check, field_name, table, + unireg_check, field_name, f_decimals(pack_flag), f_is_zerofill(pack_flag) != 0, f_is_dec(pack_flag)== 0); case FIELD_TYPE_DOUBLE: return new Field_double(ptr,field_length,null_pos,null_bit, - unireg_check, field_name, table, + unireg_check, field_name, f_decimals(pack_flag), f_is_zerofill(pack_flag) != 0, f_is_dec(pack_flag)== 0); case FIELD_TYPE_TINY: return new Field_tiny(ptr,field_length,null_pos,null_bit, - unireg_check, field_name, table, + unireg_check, field_name, f_is_zerofill(pack_flag) != 0, f_is_dec(pack_flag) == 0); case FIELD_TYPE_SHORT: return new Field_short(ptr,field_length,null_pos,null_bit, - unireg_check, field_name, table, + unireg_check, field_name, f_is_zerofill(pack_flag) != 0, f_is_dec(pack_flag) == 0); case FIELD_TYPE_INT24: return new Field_medium(ptr,field_length,null_pos,null_bit, - unireg_check, field_name, table, + unireg_check, field_name, f_is_zerofill(pack_flag) != 0, f_is_dec(pack_flag) == 0); case FIELD_TYPE_LONG: return new Field_long(ptr,field_length,null_pos,null_bit, - unireg_check, field_name, table, + unireg_check, field_name, f_is_zerofill(pack_flag) != 0, f_is_dec(pack_flag) == 0); case FIELD_TYPE_LONGLONG: return new Field_longlong(ptr,field_length,null_pos,null_bit, - unireg_check, field_name, table, + unireg_check, field_name, f_is_zerofill(pack_flag) != 0, f_is_dec(pack_flag) == 0); case FIELD_TYPE_TIMESTAMP: return new Field_timestamp(ptr,field_length, null_pos, null_bit, - unireg_check, field_name, table, + unireg_check, field_name, share, field_charset); case FIELD_TYPE_YEAR: return new Field_year(ptr,field_length,null_pos,null_bit, - unireg_check, field_name, table); + unireg_check, field_name); case FIELD_TYPE_DATE: return new Field_date(ptr,null_pos,null_bit, - unireg_check, field_name, table, field_charset); + unireg_check, field_name, field_charset); case FIELD_TYPE_NEWDATE: return new Field_newdate(ptr,null_pos,null_bit, - unireg_check, field_name, table, field_charset); + unireg_check, field_name, field_charset); case FIELD_TYPE_TIME: return new Field_time(ptr,null_pos,null_bit, - unireg_check, field_name, table, field_charset); + unireg_check, field_name, field_charset); case FIELD_TYPE_DATETIME: return new Field_datetime(ptr,null_pos,null_bit, - unireg_check, field_name, table, field_charset); + unireg_check, field_name, field_charset); case FIELD_TYPE_NULL: - return new Field_null(ptr,field_length,unireg_check,field_name,table, field_charset); + return new Field_null(ptr, field_length, unireg_check, field_name, + field_charset); case FIELD_TYPE_BIT: return f_bit_as_char(pack_flag) ? - new Field_bit_as_char(ptr, field_length, null_pos, null_bit, bit_ptr, - bit_offset, unireg_check, field_name, table) : - new Field_bit(ptr, field_length, null_pos, null_bit, bit_ptr, - bit_offset, unireg_check, field_name, table); + new Field_bit_as_char(ptr, field_length, null_pos, null_bit, + bit_ptr, bit_offset, unireg_check, field_name) : + new Field_bit(ptr, field_length, null_pos, null_bit, bit_ptr, + bit_offset, unireg_check, field_name); + default: // Impossible (Wrong version) break; } @@ -8950,14 +9036,15 @@ create_field::create_field(Field *old_field,Field *orig_field) char buff[MAX_FIELD_WIDTH],*pos; String tmp(buff,sizeof(buff), charset), *res; my_ptrdiff_t diff; + bool is_null; /* Get the value from default_values */ diff= (my_ptrdiff_t) (orig_field->table->s->default_values- orig_field->table->record[0]); - orig_field->move_field(diff); // Points now at default_values - bool is_null=orig_field->is_real_null(); + orig_field->move_field_offset(diff); // Points now at default_values + is_null= orig_field->is_real_null(); res= orig_field->val_str(&tmp); - orig_field->move_field(-diff); // Back to record[0] + orig_field->move_field_offset(-diff); // Back to record[0] if (!is_null) { pos= (char*) sql_strmake(res->ptr(), res->length()); diff --git a/sql/field.h b/sql/field.h index 67705523088..76e18eeaeb4 100644 --- a/sql/field.h +++ b/sql/field.h @@ -29,6 +29,7 @@ class Send_field; class Protocol; +class create_field; struct st_cache_field; void field_conv(Field *to,Field *from); @@ -87,12 +88,15 @@ public: utype unireg_check; uint32 field_length; // Length of field uint field_index; // field number in fields array - uint16 flags; + uint32 flags; + /* fieldnr is the id of the field (first field = 1) as is also + used in key_part. + */ + uint16 fieldnr; uchar null_bit; // Bit used to test null bit Field(char *ptr_arg,uint32 length_arg,uchar *null_ptr_arg,uchar null_bit_arg, - utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg); + utype unireg_check_arg, const char *field_name_arg); virtual ~Field() {} /* Store functions returns 1 on overflow and -1 on fatal error */ virtual int store(const char *to,uint length,CHARSET_INFO *cs)=0; @@ -163,6 +167,8 @@ public: virtual enum_field_types type() const =0; virtual enum_field_types real_type() const { return type(); } inline int cmp(const char *str) { return cmp(ptr,str); } + virtual int cmp_max(const char *a, const char *b, uint max_len) + { return cmp(a, b); } virtual int cmp(const char *,const char *)=0; virtual int cmp_binary(const char *a,const char *b, uint32 max_length=~0L) { return memcmp(a,b,pack_length()); } @@ -192,6 +198,12 @@ public: return test(record[(uint) (null_ptr - (uchar*) table->record[0])] & null_bit); } + inline bool is_null_in_record_with_offset(my_ptrdiff_t offset) + { + if (!null_ptr) + return 0; + return test(null_ptr[offset] & null_bit); + } inline void set_null(int row_offset=0) { if (null_ptr) null_ptr[row_offset]|= null_bit; } inline void set_notnull(int row_offset=0) @@ -214,12 +226,13 @@ public: virtual Field *new_key_field(MEM_ROOT *root, struct st_table *new_table, char *new_ptr, uchar *new_null_ptr, uint new_null_bit); + Field *clone(MEM_ROOT *mem_root, struct st_table *new_table); inline void move_field(char *ptr_arg,uchar *null_ptr_arg,uchar null_bit_arg) { ptr=ptr_arg; null_ptr=null_ptr_arg; null_bit=null_bit_arg; } inline void move_field(char *ptr_arg) { ptr=ptr_arg; } - inline void move_field(my_ptrdiff_t ptr_diff) + virtual void move_field_offset(my_ptrdiff_t ptr_diff) { ptr=ADD_TO_PTR(ptr,ptr_diff,char*); if (null_ptr) @@ -314,8 +327,16 @@ public: return (op_result == E_DEC_OVERFLOW); } int warn_if_overflow(int op_result); + void init(TABLE *table_arg) + { + orig_table= table= table_arg; + table_name= &table_arg->alias; + } + /* maximum possible display length */ virtual uint32 max_length()= 0; + + virtual uint is_equal(create_field *new_field); /* convert decimal to longlong with overflow check */ longlong convert_decimal2longlong(const my_decimal *val, bool unsigned_flag, int *err); @@ -344,7 +365,6 @@ public: Field_num(char *ptr_arg,uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg, uint8 dec_arg, bool zero_arg, bool unsigned_arg); Item_result result_type () const { return REAL_RESULT; } void prepend_zeros(String *value); @@ -356,6 +376,7 @@ public: bool eq_def(Field *field); int store_decimal(const my_decimal *); my_decimal *val_decimal(my_decimal *); + uint is_equal(create_field *new_field); }; @@ -365,8 +386,7 @@ protected: public: Field_str(char *ptr_arg,uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, utype unireg_check_arg, - const char *field_name_arg, - struct st_table *table_arg, CHARSET_INFO *charset); + const char *field_name_arg, CHARSET_INFO *charset); Item_result result_type () const { return STRING_RESULT; } uint decimals() const { return NOT_FIXED_DEC; } int store(double nr); @@ -380,6 +400,7 @@ public: uint32 max_length() { return field_length; } friend class create_field; my_decimal *val_decimal(my_decimal *); + uint is_equal(create_field *new_field); }; @@ -390,10 +411,9 @@ class Field_longstr :public Field_str public: Field_longstr(char *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, utype unireg_check_arg, - const char *field_name_arg, - struct st_table *table_arg,CHARSET_INFO *charset) + const char *field_name_arg, CHARSET_INFO *charset) :Field_str(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, unireg_check_arg, - field_name_arg, table_arg, charset) + field_name_arg, charset) {} int store_decimal(const my_decimal *d); @@ -402,17 +422,13 @@ public: /* base class for float and double and decimal (old one) */ class Field_real :public Field_num { public: - Field_real(char *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg, uint8 dec_arg, bool zero_arg, bool unsigned_arg) :Field_num(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, unireg_check_arg, - field_name_arg, table_arg, dec_arg, zero_arg, unsigned_arg) + field_name_arg, dec_arg, zero_arg, unsigned_arg) {} - - int store_decimal(const my_decimal *); my_decimal *val_decimal(my_decimal *); }; @@ -423,10 +439,9 @@ public: Field_decimal(char *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg, uint8 dec_arg,bool zero_arg,bool unsigned_arg) :Field_real(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, - unireg_check_arg, field_name_arg, table_arg, + unireg_check_arg, field_name_arg, dec_arg, zero_arg, unsigned_arg) {} enum_field_types type() const { return FIELD_TYPE_DECIMAL;} @@ -463,11 +478,9 @@ public: Field_new_decimal(char *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg, uint8 dec_arg, bool zero_arg, bool unsigned_arg); Field_new_decimal(uint32 len_arg, bool maybe_null_arg, - const char *field_name_arg, - struct st_table *table_arg, uint8 dec_arg, + const char *field_name_arg, uint8 dec_arg, bool unsigned_arg); enum_field_types type() const { return FIELD_TYPE_NEWDECIMAL;} enum ha_base_keytype key_type() const { return HA_KEYTYPE_BINARY; } @@ -498,10 +511,9 @@ public: Field_tiny(char *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg, bool zero_arg, bool unsigned_arg) :Field_num(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, - unireg_check_arg, field_name_arg, table_arg, + unireg_check_arg, field_name_arg, 0, zero_arg,unsigned_arg) {} enum Item_result result_type () const { return INT_RESULT; } @@ -529,16 +541,15 @@ public: Field_short(char *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg, bool zero_arg, bool unsigned_arg) :Field_num(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, - unireg_check_arg, field_name_arg, table_arg, + unireg_check_arg, field_name_arg, 0, zero_arg,unsigned_arg) {} Field_short(uint32 len_arg,bool maybe_null_arg, const char *field_name_arg, - struct st_table *table_arg,bool unsigned_arg) + bool unsigned_arg) :Field_num((char*) 0, len_arg, maybe_null_arg ? (uchar*) "": 0,0, - NONE, field_name_arg, table_arg,0,0,unsigned_arg) + NONE, field_name_arg, 0, 0, unsigned_arg) {} enum Item_result result_type () const { return INT_RESULT; } enum_field_types type() const { return FIELD_TYPE_SHORT;} @@ -565,10 +576,9 @@ public: Field_medium(char *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg, bool zero_arg, bool unsigned_arg) :Field_num(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, - unireg_check_arg, field_name_arg, table_arg, + unireg_check_arg, field_name_arg, 0, zero_arg,unsigned_arg) {} enum Item_result result_type () const { return INT_RESULT; } @@ -596,16 +606,15 @@ public: Field_long(char *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg, bool zero_arg, bool unsigned_arg) :Field_num(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, - unireg_check_arg, field_name_arg, table_arg, + unireg_check_arg, field_name_arg, 0, zero_arg,unsigned_arg) {} Field_long(uint32 len_arg,bool maybe_null_arg, const char *field_name_arg, - struct st_table *table_arg,bool unsigned_arg) + bool unsigned_arg) :Field_num((char*) 0, len_arg, maybe_null_arg ? (uchar*) "": 0,0, - NONE, field_name_arg, table_arg,0,0,unsigned_arg) + NONE, field_name_arg,0,0,unsigned_arg) {} enum Item_result result_type () const { return INT_RESULT; } enum_field_types type() const { return FIELD_TYPE_LONG;} @@ -633,17 +642,16 @@ public: Field_longlong(char *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg, bool zero_arg, bool unsigned_arg) :Field_num(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, - unireg_check_arg, field_name_arg, table_arg, + unireg_check_arg, field_name_arg, 0, zero_arg,unsigned_arg) {} Field_longlong(uint32 len_arg,bool maybe_null_arg, const char *field_name_arg, - struct st_table *table_arg, bool unsigned_arg) + bool unsigned_arg) :Field_num((char*) 0, len_arg, maybe_null_arg ? (uchar*) "": 0,0, - NONE, field_name_arg, table_arg,0,0,unsigned_arg) + NONE, field_name_arg,0,0,unsigned_arg) {} enum Item_result result_type () const { return INT_RESULT; } enum_field_types type() const { return FIELD_TYPE_LONGLONG;} @@ -672,16 +680,15 @@ public: Field_float(char *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg, uint8 dec_arg,bool zero_arg,bool unsigned_arg) :Field_real(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, - unireg_check_arg, field_name_arg, table_arg, + unireg_check_arg, field_name_arg, dec_arg, zero_arg, unsigned_arg) {} Field_float(uint32 len_arg, bool maybe_null_arg, const char *field_name_arg, - struct st_table *table_arg, uint8 dec_arg) + uint8 dec_arg) :Field_real((char*) 0, len_arg, maybe_null_arg ? (uchar*) "": 0, (uint) 0, - NONE, field_name_arg, table_arg, dec_arg, 0, 0) + NONE, field_name_arg, dec_arg, 0, 0) {} enum_field_types type() const { return FIELD_TYPE_FLOAT;} enum ha_base_keytype key_type() const { return HA_KEYTYPE_FLOAT; } @@ -706,16 +713,15 @@ public: Field_double(char *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg, uint8 dec_arg,bool zero_arg,bool unsigned_arg) :Field_real(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, - unireg_check_arg, field_name_arg, table_arg, + unireg_check_arg, field_name_arg, dec_arg, zero_arg, unsigned_arg) {} Field_double(uint32 len_arg, bool maybe_null_arg, const char *field_name_arg, - struct st_table *table_arg, uint8 dec_arg) + uint8 dec_arg) :Field_real((char*) 0, len_arg, maybe_null_arg ? (uchar*) "": 0, (uint) 0, - NONE, field_name_arg, table_arg, dec_arg, 0, 0) + NONE, field_name_arg, dec_arg, 0, 0) {} enum_field_types type() const { return FIELD_TYPE_DOUBLE;} enum ha_base_keytype key_type() const { return HA_KEYTYPE_DOUBLE; } @@ -742,9 +748,9 @@ class Field_null :public Field_str { public: Field_null(char *ptr_arg, uint32 len_arg, enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg, CHARSET_INFO *cs) + CHARSET_INFO *cs) :Field_str(ptr_arg, len_arg, null, 1, - unireg_check_arg, field_name_arg, table_arg, cs) + unireg_check_arg, field_name_arg, cs) {} enum_field_types type() const { return FIELD_TYPE_NULL;} int store(const char *to, uint length, CHARSET_INFO *cs) @@ -772,8 +778,7 @@ public: Field_timestamp(char *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg, - CHARSET_INFO *cs); + TABLE_SHARE *share, CHARSET_INFO *cs); enum_field_types type() const { return FIELD_TYPE_TIMESTAMP;} enum ha_base_keytype key_type() const { return HA_KEYTYPE_ULONG_INT; } enum Item_result cmp_type () const { return INT_RESULT; } @@ -823,10 +828,9 @@ class Field_year :public Field_tiny { public: Field_year(char *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, - enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg) + enum utype unireg_check_arg, const char *field_name_arg) :Field_tiny(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, - unireg_check_arg, field_name_arg, table_arg, 1, 1) + unireg_check_arg, field_name_arg, 1, 1) {} enum_field_types type() const { return FIELD_TYPE_YEAR;} int store(const char *to,uint length,CHARSET_INFO *charset); @@ -845,14 +849,14 @@ class Field_date :public Field_str { public: Field_date(char *ptr_arg, uchar *null_ptr_arg, uchar null_bit_arg, enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg, CHARSET_INFO *cs) + CHARSET_INFO *cs) :Field_str(ptr_arg, 10, null_ptr_arg, null_bit_arg, - unireg_check_arg, field_name_arg, table_arg, cs) + unireg_check_arg, field_name_arg, cs) {} Field_date(bool maybe_null_arg, const char *field_name_arg, - struct st_table *table_arg, CHARSET_INFO *cs) + CHARSET_INFO *cs) :Field_str((char*) 0,10, maybe_null_arg ? (uchar*) "": 0,0, - NONE, field_name_arg, table_arg, cs) {} + NONE, field_name_arg, cs) {} enum_field_types type() const { return FIELD_TYPE_DATE;} enum ha_base_keytype key_type() const { return HA_KEYTYPE_ULONG_INT; } enum Item_result cmp_type () const { return INT_RESULT; } @@ -872,13 +876,14 @@ public: bool zero_pack() const { return 1; } }; + class Field_newdate :public Field_str { public: Field_newdate(char *ptr_arg, uchar *null_ptr_arg, uchar null_bit_arg, enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg, CHARSET_INFO *cs) + CHARSET_INFO *cs) :Field_str(ptr_arg, 10, null_ptr_arg, null_bit_arg, - unireg_check_arg, field_name_arg, table_arg, cs) + unireg_check_arg, field_name_arg, cs) {} enum_field_types type() const { return FIELD_TYPE_DATE;} enum_field_types real_type() const { return FIELD_TYPE_NEWDATE; } @@ -908,14 +913,14 @@ class Field_time :public Field_str { public: Field_time(char *ptr_arg, uchar *null_ptr_arg, uchar null_bit_arg, enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg, CHARSET_INFO *cs) + CHARSET_INFO *cs) :Field_str(ptr_arg, 8, null_ptr_arg, null_bit_arg, - unireg_check_arg, field_name_arg, table_arg, cs) + unireg_check_arg, field_name_arg, cs) {} Field_time(bool maybe_null_arg, const char *field_name_arg, - struct st_table *table_arg, CHARSET_INFO *cs) + CHARSET_INFO *cs) :Field_str((char*) 0,8, maybe_null_arg ? (uchar*) "": 0,0, - NONE, field_name_arg, table_arg, cs) {} + NONE, field_name_arg, cs) {} enum_field_types type() const { return FIELD_TYPE_TIME;} enum ha_base_keytype key_type() const { return HA_KEYTYPE_INT24; } enum Item_result cmp_type () const { return INT_RESULT; } @@ -943,14 +948,14 @@ class Field_datetime :public Field_str { public: Field_datetime(char *ptr_arg, uchar *null_ptr_arg, uchar null_bit_arg, enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg, CHARSET_INFO *cs) + CHARSET_INFO *cs) :Field_str(ptr_arg, 19, null_ptr_arg, null_bit_arg, - unireg_check_arg, field_name_arg, table_arg, cs) + unireg_check_arg, field_name_arg, cs) {} Field_datetime(bool maybe_null_arg, const char *field_name_arg, - struct st_table *table_arg, CHARSET_INFO *cs) + CHARSET_INFO *cs) :Field_str((char*) 0,19, maybe_null_arg ? (uchar*) "": 0,0, - NONE, field_name_arg, table_arg, cs) {} + NONE, field_name_arg, cs) {} enum_field_types type() const { return FIELD_TYPE_DATETIME;} #ifdef HAVE_LONG_LONG enum ha_base_keytype key_type() const { return HA_KEYTYPE_ULONGLONG; } @@ -982,13 +987,13 @@ public: Field_string(char *ptr_arg, uint32 len_arg,uchar *null_ptr_arg, uchar null_bit_arg, enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg, CHARSET_INFO *cs) + CHARSET_INFO *cs) :Field_longstr(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, - unireg_check_arg, field_name_arg, table_arg, cs) {}; + unireg_check_arg, field_name_arg, cs) {}; Field_string(uint32 len_arg,bool maybe_null_arg, const char *field_name_arg, - struct st_table *table_arg, CHARSET_INFO *cs) + CHARSET_INFO *cs) :Field_longstr((char*) 0, len_arg, maybe_null_arg ? (uchar*) "": 0, 0, - NONE, field_name_arg, table_arg, cs) {}; + NONE, field_name_arg, cs) {}; enum_field_types type() const { @@ -1033,26 +1038,23 @@ public: uint32 length_bytes; Field_varstring(char *ptr_arg, uint32 len_arg, uint length_bytes_arg, - uchar *null_ptr_arg, - uchar null_bit_arg, + uchar *null_ptr_arg, uchar null_bit_arg, enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg, CHARSET_INFO *cs) + TABLE_SHARE *share, CHARSET_INFO *cs) :Field_longstr(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, - unireg_check_arg, field_name_arg, table_arg, cs), + unireg_check_arg, field_name_arg, cs), length_bytes(length_bytes_arg) { - if (table) - table->s->varchar_fields++; + share->varchar_fields++; } Field_varstring(uint32 len_arg,bool maybe_null_arg, const char *field_name_arg, - struct st_table *table_arg, CHARSET_INFO *cs) + TABLE_SHARE *share, CHARSET_INFO *cs) :Field_longstr((char*) 0,len_arg, maybe_null_arg ? (uchar*) "": 0, 0, - NONE, field_name_arg, table_arg, cs), + NONE, field_name_arg, cs), length_bytes(len_arg < 256 ? 1 :2) { - if (table) - table->s->varchar_fields++; + share->varchar_fields++; } enum_field_types type() const { return MYSQL_TYPE_VARCHAR; } @@ -1073,7 +1075,11 @@ public: longlong val_int(void); String *val_str(String*,String *); my_decimal *val_decimal(my_decimal *); - int cmp(const char *,const char*); + int cmp_max(const char *, const char *, uint max_length); + int cmp(const char *a,const char*b) + { + return cmp_max(a, b, ~0L); + } void sort_string(char *buff,uint length); void get_key_image(char *buff,uint length, imagetype type); void set_key_image(char *buff,uint length); @@ -1099,6 +1105,7 @@ public: Field *new_key_field(MEM_ROOT *root, struct st_table *new_table, char *new_ptr, uchar *new_null_ptr, uint new_null_bit); + uint is_equal(create_field *new_field); }; @@ -1109,12 +1116,11 @@ protected: public: Field_blob(char *ptr_arg, uchar *null_ptr_arg, uchar null_bit_arg, enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg,uint blob_pack_length, - CHARSET_INFO *cs); + TABLE_SHARE *share, uint blob_pack_length, CHARSET_INFO *cs); Field_blob(uint32 len_arg,bool maybe_null_arg, const char *field_name_arg, - struct st_table *table_arg, CHARSET_INFO *cs) - :Field_longstr((char*) 0,len_arg, maybe_null_arg ? (uchar*) "": 0, 0, - NONE, field_name_arg, table_arg, cs), + CHARSET_INFO *cs) + :Field_longstr((char*) 0, len_arg, maybe_null_arg ? (uchar*) "": 0, 0, + NONE, field_name_arg, cs), packlength(4) { flags|= BLOB_FLAG; @@ -1129,7 +1135,9 @@ public: longlong val_int(void); String *val_str(String*,String *); my_decimal *val_decimal(my_decimal *); - int cmp(const char *,const char*); + int cmp_max(const char *, const char *, uint max_length); + int cmp(const char *a,const char*b) + { return cmp_max(a, b, ~0L); } int cmp(const char *a, uint32 a_length, const char *b, uint32 b_length); int cmp_binary(const char *a,const char *b, uint32 max_length=~0L); int key_cmp(const byte *,const byte*); @@ -1154,6 +1162,10 @@ public: { memcpy_fixed(str,ptr+packlength,sizeof(char*)); } + inline void get_ptr(char **str, uint row_offset) + { + memcpy_fixed(str,ptr+packlength+row_offset,sizeof(char*)); + } inline void set_ptr(char *length,char *data) { memcpy(ptr,length,packlength); @@ -1205,15 +1217,14 @@ public: Field_geom(char *ptr_arg, uchar *null_ptr_arg, uint null_bit_arg, enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg,uint blob_pack_length, + TABLE_SHARE *share, uint blob_pack_length, enum geometry_type geom_type_arg) :Field_blob(ptr_arg, null_ptr_arg, null_bit_arg, unireg_check_arg, - field_name_arg, table_arg, blob_pack_length,&my_charset_bin) + field_name_arg, share, blob_pack_length, &my_charset_bin) { geom_type= geom_type_arg; } Field_geom(uint32 len_arg,bool maybe_null_arg, const char *field_name_arg, - struct st_table *table_arg, enum geometry_type geom_type_arg) - :Field_blob(len_arg, maybe_null_arg, field_name_arg, - table_arg, &my_charset_bin) + TABLE_SHARE *share, enum geometry_type geom_type_arg) + :Field_blob(len_arg, maybe_null_arg, field_name_arg, &my_charset_bin) { geom_type= geom_type_arg; } enum ha_base_keytype key_type() const { return HA_KEYTYPE_VARBINARY2; } enum_field_types type() const { return FIELD_TYPE_GEOMETRY; } @@ -1234,13 +1245,13 @@ protected: public: TYPELIB *typelib; Field_enum(char *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, - uchar null_bit_arg, - enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg,uint packlength_arg, - TYPELIB *typelib_arg, - CHARSET_INFO *charset_arg) + uchar null_bit_arg, + enum utype unireg_check_arg, const char *field_name_arg, + uint packlength_arg, + TYPELIB *typelib_arg, + CHARSET_INFO *charset_arg) :Field_str(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, - unireg_check_arg, field_name_arg, table_arg, charset_arg), + unireg_check_arg, field_name_arg, charset_arg), packlength(packlength_arg),typelib(typelib_arg) { flags|=ENUM_FLAG; @@ -1277,12 +1288,12 @@ public: Field_set(char *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg,uint32 packlength_arg, + uint32 packlength_arg, TYPELIB *typelib_arg, CHARSET_INFO *charset_arg) :Field_enum(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, unireg_check_arg, field_name_arg, - table_arg, packlength_arg, - typelib_arg,charset_arg) + packlength_arg, + typelib_arg,charset_arg) { flags=(flags & ~ENUM_FLAG) | SET_FLAG; } @@ -1304,8 +1315,7 @@ public: uint bit_len; // number of 'uneven' high bits Field_bit(char *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, uchar *bit_ptr_arg, uchar bit_ofs_arg, - enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg); + enum utype unireg_check_arg, const char *field_name_arg); enum_field_types type() const { return FIELD_TYPE_BIT; } enum ha_base_keytype key_type() const { return HA_KEYTYPE_BIT; } uint32 key_length() const { return (uint32) field_length + (bit_len > 0); } @@ -1323,6 +1333,7 @@ public: my_decimal *val_decimal(my_decimal *); int cmp(const char *a, const char *b) { return cmp_binary(a, b); } + int cmp_max(const char *a, const char *b, uint max_length); int key_cmp(const byte *a, const byte *b) { return cmp_binary((char *) a, (char *) b); } int key_cmp(const byte *str, uint length); @@ -1346,6 +1357,11 @@ public: bit_ptr= bit_ptr_arg; bit_ofs= bit_ofs_arg; } + void move_field_offset(my_ptrdiff_t ptr_diff) + { + Field::move_field_offset(ptr_diff); + bit_ptr= ADD_TO_PTR(bit_ptr, ptr_diff, uchar*); + } }; @@ -1354,8 +1370,7 @@ public: uchar create_length; Field_bit_as_char(char *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, uchar *bit_ptr_arg, uchar bit_ofs_arg, - enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg); + enum utype unireg_check_arg, const char *field_name_arg); enum ha_base_keytype key_type() const { return HA_KEYTYPE_BINARY; } uint32 max_length() { return (uint32) create_length; } uint size_of() const { return sizeof(*this); } @@ -1456,14 +1471,13 @@ public: }; -Field *make_field(char *ptr, uint32 field_length, +Field *make_field(TABLE_SHARE *share, char *ptr, uint32 field_length, uchar *null_pos, uchar null_bit, uint pack_flag, enum_field_types field_type, CHARSET_INFO *cs, Field::geometry_type geom_type, Field::utype unireg_check, - TYPELIB *interval, const char *field_name, - struct st_table *table); + TYPELIB *interval, const char *field_name); uint pack_length_to_packflag(uint type); enum_field_types get_blob_type_from_length(ulong length); uint32 calc_pack_length(enum_field_types type,uint32 length); diff --git a/sql/field_conv.cc b/sql/field_conv.cc index bbe2dbe5e9f..d46a3aa8be3 100644 --- a/sql/field_conv.cc +++ b/sql/field_conv.cc @@ -178,8 +178,7 @@ set_field_to_null_with_conversions(Field *field, bool no_conversions) } if (current_thd->count_cuted_fields == CHECK_FIELD_WARN) { - field->set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, - ER_WARN_NULL_TO_NOTNULL, 1); + field->set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, ER_BAD_NULL_ERROR, 1); return 0; } if (!current_thd->no_errors) diff --git a/sql/ha_archive.cc b/sql/ha_archive.cc index c60d40c2685..dfc01d01a80 100644 --- a/sql/ha_archive.cc +++ b/sql/ha_archive.cc @@ -20,7 +20,6 @@ #include "mysql_priv.h" -#if defined(HAVE_ARCHIVE_DB) #include "ha_archive.h" #include <my_dir.h> @@ -31,13 +30,13 @@ a storage engine without indexes that could compress data very well. So, welcome to a completely compressed storage engine. This storage engine only does inserts. No replace, deletes, or updates. All reads are - complete table scans. Compression is done through gzip (bzip compresses + complete table scans. Compression is done through azip (bzip compresses better, but only marginally, if someone asks I could add support for - it too, but beaware that it costs a lot more in CPU time then gzip). + it too, but beaware that it costs a lot more in CPU time then azip). We keep a file pointer open for each instance of ha_archive for each read but for writes we keep one open file handle just for that. We flush it - only if we have a read occur. gzip handles compressing lots of records + only if we have a read occur. azip handles compressing lots of records at once much better then doing lots of little records between writes. It is possible to not lock on writes but this would then mean we couldn't handle bulk inserts as well (that is if someone was trying to read at @@ -85,7 +84,7 @@ Add truncate table command. Implement versioning, should be easy. Allow for errors, find a way to mark bad rows. - Talk to the gzip guys, come up with a writable format so that updates are doable + Talk to the azip guys, come up with a writable format so that updates are doable without switching to a block method. Add optional feature so that rows can be flushed at interval (which will cause less compression but may speed up ordered searches). @@ -135,8 +134,13 @@ static HASH archive_open_tables; #define DATA_BUFFER_SIZE 2 // Size of the data used in the data file #define ARCHIVE_CHECK_HEADER 254 // The number we use to determine corruption +/* Static declarations for handerton */ +static handler *archive_create_handler(TABLE_SHARE *table); + + /* dummy handlerton - only to have something to return from archive_db_init */ handlerton archive_hton = { + MYSQL_HANDLERTON_INTERFACE_VERSION, "ARCHIVE", SHOW_OPTION_YES, "Archive storage engine", @@ -157,9 +161,19 @@ handlerton archive_hton = { NULL, /* create_cursor_read_view */ NULL, /* set_cursor_read_view */ NULL, /* close_cursor_read_view */ + archive_create_handler, /* Create a new handler */ + NULL, /* Drop a database */ + archive_db_end, /* Panic call */ + NULL, /* Start Consistent Snapshot */ + NULL, /* Flush logs */ + NULL, /* Show status */ HTON_NO_FLAGS }; +static handler *archive_create_handler(TABLE_SHARE *table) +{ + return new ha_archive(table); +} /* Used for hash table that tracks open tables. @@ -215,7 +229,7 @@ error: FALSE OK */ -bool archive_db_end() +int archive_db_end(ha_panic_function type) { if (archive_inited) { @@ -223,32 +237,31 @@ bool archive_db_end() VOID(pthread_mutex_destroy(&archive_mutex)); } archive_inited= 0; - return FALSE; + return 0; } -ha_archive::ha_archive(TABLE *table_arg) +ha_archive::ha_archive(TABLE_SHARE *table_arg) :handler(&archive_hton, table_arg), delayed_insert(0), bulk_insert(0) { /* Set our original buffer from pre-allocated memory */ buffer.set((char *)byte_buffer, IO_SIZE, system_charset_info); /* The size of the offset value we will use for position() */ - ref_length = 2 << ((zlibCompileFlags() >> 6) & 3); - DBUG_ASSERT(ref_length <= sizeof(z_off_t)); + ref_length = sizeof(my_off_t); } /* This method reads the header of a datafile and returns whether or not it was successful. */ -int ha_archive::read_data_header(gzFile file_to_read) +int ha_archive::read_data_header(azio_stream *file_to_read) { uchar data_buffer[DATA_BUFFER_SIZE]; DBUG_ENTER("ha_archive::read_data_header"); - if (gzrewind(file_to_read) == -1) + if (azrewind(file_to_read) == -1) DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE); - if (gzread(file_to_read, data_buffer, DATA_BUFFER_SIZE) != DATA_BUFFER_SIZE) + if (azread(file_to_read, data_buffer, DATA_BUFFER_SIZE) != DATA_BUFFER_SIZE) DBUG_RETURN(errno ? errno : -1); DBUG_PRINT("ha_archive::read_data_header", ("Check %u", data_buffer[0])); @@ -264,7 +277,7 @@ int ha_archive::read_data_header(gzFile file_to_read) /* This method writes out the header of a datafile and returns whether or not it was successful. */ -int ha_archive::write_data_header(gzFile file_to_write) +int ha_archive::write_data_header(azio_stream *file_to_write) { uchar data_buffer[DATA_BUFFER_SIZE]; DBUG_ENTER("ha_archive::write_data_header"); @@ -272,7 +285,7 @@ int ha_archive::write_data_header(gzFile file_to_write) data_buffer[0]= (uchar)ARCHIVE_CHECK_HEADER; data_buffer[1]= (uchar)ARCHIVE_VERSION; - if (gzwrite(file_to_write, &data_buffer, DATA_BUFFER_SIZE) != + if (azwrite(file_to_write, &data_buffer, DATA_BUFFER_SIZE) != DATA_BUFFER_SIZE) goto error; DBUG_PRINT("ha_archive::write_data_header", ("Check %u", (uint)data_buffer[0])); @@ -411,8 +424,11 @@ ARCHIVE_SHARE *ha_archive::get_share(const char *table_name, TABLE *table) a gzip file that can be both read and written we keep a writer open that is shared amoung all open tables. */ - if ((share->archive_write= gzopen(share->data_file_name, "ab")) == NULL) + if (!(azopen(&(share->archive_write), share->data_file_name, O_WRONLY|O_APPEND|O_BINARY))) + { + DBUG_PRINT("info", ("Could not open archive write file")); share->crashed= TRUE; + } VOID(my_hash_insert(&archive_open_tables, (byte*) share)); thr_lock_init(&share->lock); } @@ -437,7 +453,7 @@ int ha_archive::free_share(ARCHIVE_SHARE *share) thr_lock_delete(&share->lock); VOID(pthread_mutex_destroy(&share->mutex)); (void)write_meta_file(share->meta_file, share->rows_recorded, FALSE); - if (gzclose(share->archive_write) == Z_ERRNO) + if (azclose(&(share->archive_write))) rc= 1; if (my_close(share->meta_file, MYF(0))) rc= 1; @@ -478,7 +494,7 @@ int ha_archive::open(const char *name, int mode, uint test_if_locked) DBUG_RETURN(HA_ERR_OUT_OF_MEM); // Not handled well by calling code! thr_lock_data_init(&share->lock,&lock,NULL); - if ((archive= gzopen(share->data_file_name, "rb")) == NULL) + if (!(azopen(&archive, share->data_file_name, O_RDONLY|O_BINARY))) { if (errno == EROFS || errno == EACCES) DBUG_RETURN(my_errno= errno); @@ -512,7 +528,7 @@ int ha_archive::close(void) DBUG_ENTER("ha_archive::close"); /* First close stream */ - if (gzclose(archive) == Z_ERRNO) + if (azclose(&archive)) rc= 1; /* then also close share */ rc|= free_share(share); @@ -558,30 +574,28 @@ int ha_archive::create(const char *name, TABLE *table_arg, error= my_errno; goto error; } - if ((archive= gzdopen(create_file, "wb")) == NULL) + if (!azdopen(&archive, create_file, O_WRONLY|O_BINARY)) { error= errno; goto error2; } - if (write_data_header(archive)) + if (write_data_header(&archive)) { error= errno; goto error3; } - if (gzclose(archive)) + if (azclose(&archive)) { error= errno; goto error2; } - my_close(create_file, MYF(0)); - DBUG_RETURN(0); error3: - /* We already have an error, so ignore results of gzclose. */ - (void)gzclose(archive); + /* We already have an error, so ignore results of azclose. */ + (void)azclose(&archive); error2: my_close(create_file, MYF(0)); delete_table(name); @@ -593,18 +607,18 @@ error: /* This is where the actual row is written out. */ -int ha_archive::real_write_row(byte *buf, gzFile writer) +int ha_archive::real_write_row(byte *buf, azio_stream *writer) { - z_off_t written; + my_off_t written; uint *ptr, *end; DBUG_ENTER("ha_archive::real_write_row"); - written= gzwrite(writer, buf, table->s->reclength); + written= azwrite(writer, buf, table->s->reclength); DBUG_PRINT("ha_archive::real_write_row", ("Wrote %d bytes expected %d", written, table->s->reclength)); if (!delayed_insert || !bulk_insert) share->dirty= TRUE; - if (written != (z_off_t)table->s->reclength) + if (written != (my_off_t)table->s->reclength) DBUG_RETURN(errno ? errno : -1); /* We should probably mark the table as damagaged if the record is written @@ -620,8 +634,8 @@ int ha_archive::real_write_row(byte *buf, gzFile writer) if (size) { ((Field_blob*) table->field[*ptr])->get_ptr(&data_ptr); - written= gzwrite(writer, data_ptr, (unsigned)size); - if (written != (z_off_t)size) + written= azwrite(writer, data_ptr, (unsigned)size); + if (written != (my_off_t)size) DBUG_RETURN(errno ? errno : -1); } } @@ -651,7 +665,7 @@ int ha_archive::write_row(byte *buf) table->timestamp_field->set_time(); pthread_mutex_lock(&share->mutex); share->rows_recorded++; - rc= real_write_row(buf, share->archive_write); + rc= real_write_row(buf, &(share->archive_write)); pthread_mutex_unlock(&share->mutex); DBUG_RETURN(rc); @@ -678,20 +692,20 @@ int ha_archive::rnd_init(bool scan) /* If dirty, we lock, and then reset/flush the data. - I found that just calling gzflush() doesn't always work. + I found that just calling azflush() doesn't always work. */ if (share->dirty == TRUE) { pthread_mutex_lock(&share->mutex); if (share->dirty == TRUE) { - gzflush(share->archive_write, Z_SYNC_FLUSH); + azflush(&(share->archive_write), Z_SYNC_FLUSH); share->dirty= FALSE; } pthread_mutex_unlock(&share->mutex); } - if (read_data_header(archive)) + if (read_data_header(&archive)) DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE); } @@ -703,15 +717,15 @@ int ha_archive::rnd_init(bool scan) This is the method that is used to read a row. It assumes that the row is positioned where you want it. */ -int ha_archive::get_row(gzFile file_to_read, byte *buf) +int ha_archive::get_row(azio_stream *file_to_read, byte *buf) { - int read; // Bytes read, gzread() returns int + int read; // Bytes read, azread() returns int uint *ptr, *end; char *last; size_t total_blob_length= 0; DBUG_ENTER("ha_archive::get_row"); - read= gzread(file_to_read, buf, table->s->reclength); + read= azread(file_to_read, buf, table->s->reclength); DBUG_PRINT("ha_archive::get_row", ("Read %d bytes expected %d", read, table->s->reclength)); if (read == Z_STREAM_ERROR) @@ -746,7 +760,7 @@ int ha_archive::get_row(gzFile file_to_read, byte *buf) size_t size= ((Field_blob*) table->field[*ptr])->get_length(); if (size) { - read= gzread(file_to_read, last, size); + read= azread(file_to_read, last, size); if ((size_t) read != size) DBUG_RETURN(HA_ERR_END_OF_FILE); ((Field_blob*) table->field[*ptr])->set_ptr(size, last); @@ -776,8 +790,8 @@ int ha_archive::rnd_next(byte *buf) statistic_increment(table->in_use->status_var.ha_read_rnd_next_count, &LOCK_status); - current_position= gztell(archive); - rc= get_row(archive, buf); + current_position= aztell(&archive); + rc= get_row(&archive, buf); if (rc != HA_ERR_END_OF_FILE) @@ -813,10 +827,10 @@ int ha_archive::rnd_pos(byte * buf, byte *pos) DBUG_ENTER("ha_archive::rnd_pos"); statistic_increment(table->in_use->status_var.ha_read_rnd_next_count, &LOCK_status); - current_position= (z_off_t)my_get_ptr(pos, ref_length); - (void)gzseek(archive, current_position, SEEK_SET); + current_position= (my_off_t)my_get_ptr(pos, ref_length); + (void)azseek(&archive, current_position, SEEK_SET); - DBUG_RETURN(get_row(archive, buf)); + DBUG_RETURN(get_row(&archive, buf)); } /* @@ -845,17 +859,17 @@ int ha_archive::optimize(THD* thd, HA_CHECK_OPT* check_opt) { DBUG_ENTER("ha_archive::optimize"); int rc; - gzFile writer; + azio_stream writer; char writer_filename[FN_REFLEN]; /* Flush any waiting data */ - gzflush(share->archive_write, Z_SYNC_FLUSH); + azflush(&(share->archive_write), Z_SYNC_FLUSH); /* Lets create a file to contain the new data */ fn_format(writer_filename, share->table_name, "", ARN, MY_REPLACE_EXT|MY_UNPACK_FILENAME); - if ((writer= gzopen(writer_filename, "wb")) == NULL) + if (!(azopen(&writer, writer_filename, O_CREAT|O_WRONLY|O_TRUNC|O_BINARY))) DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE); /* @@ -865,6 +879,7 @@ int ha_archive::optimize(THD* thd, HA_CHECK_OPT* check_opt) if (check_opt->flags == T_EXTEND) { + DBUG_PRINT("info", ("archive extended rebuild")); byte *buf; /* @@ -881,14 +896,14 @@ int ha_archive::optimize(THD* thd, HA_CHECK_OPT* check_opt) Now we will rewind the archive file so that we are positioned at the start of the file. */ - rc= read_data_header(archive); + rc= read_data_header(&archive); /* Assuming now error from rewinding the archive file, we now write out the new header for out data file. */ if (!rc) - rc= write_data_header(writer); + rc= write_data_header(&writer); /* On success of writing out the new header, we now fetch each row and @@ -897,9 +912,9 @@ int ha_archive::optimize(THD* thd, HA_CHECK_OPT* check_opt) if (!rc) { share->rows_recorded= 0; - while (!(rc= get_row(archive, buf))) + while (!(rc= get_row(&archive, buf))) { - real_write_row(buf, writer); + real_write_row(buf, &writer); share->rows_recorded++; } } @@ -910,31 +925,31 @@ int ha_archive::optimize(THD* thd, HA_CHECK_OPT* check_opt) } else { + DBUG_PRINT("info", ("archive quick rebuild")); /* The quick method is to just read the data raw, and then compress it directly. */ - int read; // Bytes read, gzread() returns int + int read; // Bytes read, azread() returns int char block[IO_SIZE]; - if (gzrewind(archive) == -1) + if (azrewind(&archive) == -1) { rc= HA_ERR_CRASHED_ON_USAGE; + DBUG_PRINT("info", ("archive HA_ERR_CRASHED_ON_USAGE")); goto error; } - while ((read= gzread(archive, block, IO_SIZE))) - gzwrite(writer, block, read); + while ((read= azread(&archive, block, IO_SIZE))) + azwrite(&writer, block, read); } - gzflush(writer, Z_SYNC_FLUSH); - gzclose(share->archive_write); - share->archive_write= writer; + azclose(&writer); my_rename(writer_filename,share->data_file_name,MYF(0)); DBUG_RETURN(0); error: - gzclose(writer); + azclose(&writer); DBUG_RETURN(rc); } @@ -1076,7 +1091,7 @@ int ha_archive::check(THD* thd, HA_CHECK_OPT* check_opt) thd->proc_info= "Checking table"; /* Flush any waiting data */ - gzflush(share->archive_write, Z_SYNC_FLUSH); + azflush(&(share->archive_write), Z_SYNC_FLUSH); /* First we create a buffer that we can use for reading rows, and can pass @@ -1090,10 +1105,10 @@ int ha_archive::check(THD* thd, HA_CHECK_OPT* check_opt) start of the file. */ if (!rc) - read_data_header(archive); + read_data_header(&archive); if (!rc) - while (!(rc= get_row(archive, buf))) + while (!(rc= get_row(&archive, buf))) count--; my_free((char*)buf, MYF(0)); @@ -1130,4 +1145,3 @@ bool ha_archive::check_and_repair(THD *thd) DBUG_RETURN(HA_ADMIN_OK); } } -#endif /* HAVE_ARCHIVE_DB */ diff --git a/sql/ha_archive.h b/sql/ha_archive.h index 56a4b9d1e27..7b957060f34 100644 --- a/sql/ha_archive.h +++ b/sql/ha_archive.h @@ -19,6 +19,7 @@ #endif #include <zlib.h> +#include "../storage/archive/azlib.h" /* Please read ha_archive.cc first. If you are looking for more general @@ -33,7 +34,7 @@ typedef struct st_archive_share { pthread_mutex_t mutex; THR_LOCK lock; File meta_file; /* Meta file we use */ - gzFile archive_write; /* Archive file we are working with */ + azio_stream archive_write; /* Archive file we are working with */ bool dirty; /* Flag for if a flush should occur */ bool crashed; /* Meta file is crashed */ ha_rows rows_recorded; /* Number of rows in tables */ @@ -49,8 +50,8 @@ class ha_archive: public handler { THR_LOCK_DATA lock; /* MySQL lock */ ARCHIVE_SHARE *share; /* Shared lock info */ - gzFile archive; /* Archive file we are working with */ - z_off_t current_position; /* The position of the row we just read */ + azio_stream archive; /* Archive file we are working with */ + my_off_t current_position; /* The position of the row we just read */ byte byte_buffer[IO_SIZE]; /* Initial buffer for our string */ String buffer; /* Buffer used for blob storage */ ha_rows scan_rows; /* Number of rows left in scan */ @@ -58,7 +59,7 @@ class ha_archive: public handler bool bulk_insert; /* If we are performing a bulk insert */ public: - ha_archive(TABLE *table_arg); + ha_archive(TABLE_SHARE *table_arg); ~ha_archive() { } @@ -77,19 +78,19 @@ public: int open(const char *name, int mode, uint test_if_locked); int close(void); int write_row(byte * buf); - int real_write_row(byte *buf, gzFile writer); + int real_write_row(byte *buf, azio_stream *writer); int delete_all_rows(); int rnd_init(bool scan=1); int rnd_next(byte *buf); int rnd_pos(byte * buf, byte *pos); - int get_row(gzFile file_to_read, byte *buf); + int get_row(azio_stream *file_to_read, byte *buf); int read_meta_file(File meta_file, ha_rows *rows); int write_meta_file(File meta_file, ha_rows rows, bool dirty); ARCHIVE_SHARE *get_share(const char *table_name, TABLE *table); int free_share(ARCHIVE_SHARE *share); bool auto_repair() const { return 1; } // For the moment we just do this - int read_data_header(gzFile file_to_read); - int write_data_header(gzFile file_to_write); + int read_data_header(azio_stream *file_to_read); + int write_data_header(azio_stream *file_to_write); void position(const byte *record); void info(uint); int create(const char *name, TABLE *form, HA_CREATE_INFO *create_info); @@ -109,5 +110,5 @@ public: }; bool archive_db_init(void); -bool archive_db_end(void); +int archive_db_end(ha_panic_function type); diff --git a/sql/ha_berkeley.cc b/sql/ha_berkeley.cc index 72af402a0dc..01d6ceed3f2 100644 --- a/sql/ha_berkeley.cc +++ b/sql/ha_berkeley.cc @@ -53,7 +53,6 @@ #include "mysql_priv.h" -#ifdef HAVE_BERKELEY_DB #include <m_ctype.h> #include <myisampack.h> #include <hash.h> @@ -72,13 +71,21 @@ #define STATUS_ROW_COUNT_INIT 2 #define STATUS_BDB_ANALYZE 4 +const u_int32_t bdb_DB_TXN_NOSYNC= DB_TXN_NOSYNC; +const u_int32_t bdb_DB_RECOVER= DB_RECOVER; +const u_int32_t bdb_DB_PRIVATE= DB_PRIVATE; +const u_int32_t bdb_DB_DIRECT_DB= DB_DIRECT_DB; +const u_int32_t bdb_DB_DIRECT_LOG= DB_DIRECT_LOG; const char *ha_berkeley_ext=".db"; bool berkeley_shared_data=0; -u_int32_t berkeley_init_flags= DB_PRIVATE | DB_RECOVER, berkeley_env_flags=0, - berkeley_lock_type=DB_LOCK_DEFAULT; -ulong berkeley_cache_size, berkeley_log_buffer_size, berkeley_log_file_size=0; +u_int32_t berkeley_init_flags= DB_PRIVATE | DB_RECOVER, + berkeley_env_flags= DB_LOG_AUTOREMOVE, + berkeley_lock_type= DB_LOCK_DEFAULT; +ulong berkeley_log_buffer_size=0 , berkeley_log_file_size=0; +ulonglong berkeley_cache_size= 0; char *berkeley_home, *berkeley_tmpdir, *berkeley_logdir; long berkeley_lock_scan_time=0; +ulong berkeley_region_size=0, berkeley_cache_parts=1; ulong berkeley_trans_retry=1; ulong berkeley_max_lock; pthread_mutex_t bdb_mutex; @@ -87,13 +94,17 @@ static DB_ENV *db_env; static HASH bdb_open_tables; const char *berkeley_lock_names[] = -{ "DEFAULT", "OLDEST","RANDOM","YOUNGEST",0 }; +{ "DEFAULT", "OLDEST", "RANDOM", "YOUNGEST", "EXPIRE", "MAXLOCKS", + "MAXWRITE", "MINLOCKS", "MINWRITE", 0 }; u_int32_t berkeley_lock_types[]= -{ DB_LOCK_DEFAULT, DB_LOCK_OLDEST, DB_LOCK_RANDOM }; +{ DB_LOCK_DEFAULT, DB_LOCK_OLDEST, DB_LOCK_RANDOM, DB_LOCK_YOUNGEST, + DB_LOCK_EXPIRE, DB_LOCK_MAXLOCKS, DB_LOCK_MAXWRITE, DB_LOCK_MINLOCKS, + DB_LOCK_MINWRITE }; TYPELIB berkeley_lock_typelib= {array_elements(berkeley_lock_names)-1,"", berkeley_lock_names, NULL}; -static void berkeley_print_error(const char *db_errpfx, char *buffer); +static void berkeley_print_error(const DB_ENV *db_env, const char *db_errpfx, + const char *buffer); static byte* bdb_get_key(BDB_SHARE *share,uint *length, my_bool not_used __attribute__((unused))); static BDB_SHARE *get_share(const char *table_name, TABLE *table); @@ -101,24 +112,28 @@ static int free_share(BDB_SHARE *share, TABLE *table, uint hidden_primary_key, bool mutex_is_locked); static int write_status(DB *status_block, char *buff, uint length); static void update_status(BDB_SHARE *share, TABLE *table); -static void berkeley_noticecall(DB_ENV *db_env, db_notices notice); static int berkeley_close_connection(THD *thd); static int berkeley_commit(THD *thd, bool all); static int berkeley_rollback(THD *thd, bool all); +static int berkeley_rollback_to_savepoint(THD* thd, void *savepoint); +static int berkeley_savepoint(THD* thd, void *savepoint); +static int berkeley_release_savepoint(THD* thd, void *savepoint); +static handler *berkeley_create_handler(TABLE_SHARE *table); handlerton berkeley_hton = { + MYSQL_HANDLERTON_INTERFACE_VERSION, "BerkeleyDB", SHOW_OPTION_YES, "Supports transactions and page-level locking", DB_TYPE_BERKELEY_DB, berkeley_init, 0, /* slot */ - 0, /* savepoint size */ + sizeof(DB_TXN *), /* savepoint size */ berkeley_close_connection, - NULL, /* savepoint_set */ - NULL, /* savepoint_rollback */ - NULL, /* savepoint_release */ + berkeley_savepoint, /* savepoint_set */ + berkeley_rollback_to_savepoint, /* savepoint_rollback */ + berkeley_release_savepoint, /* savepoint_release */ berkeley_commit, berkeley_rollback, NULL, /* prepare */ @@ -128,12 +143,24 @@ handlerton berkeley_hton = { NULL, /* create_cursor_read_view */ NULL, /* set_cursor_read_view */ NULL, /* close_cursor_read_view */ - HTON_CLOSE_CURSORS_AT_COMMIT + berkeley_create_handler, /* Create a new handler */ + NULL, /* Drop a database */ + berkeley_end, /* Panic call */ + NULL, /* Start Consistent Snapshot */ + berkeley_flush_logs, /* Flush logs */ + berkeley_show_status, /* Show status */ + HTON_CLOSE_CURSORS_AT_COMMIT | HTON_FLUSH_AFTER_RENAME }; +handler *berkeley_create_handler(TABLE_SHARE *table) +{ + return new ha_berkeley(table); +} + typedef struct st_berkeley_trx_data { DB_TXN *all; DB_TXN *stmt; + DB_TXN *sp_level; uint bdb_lock_count; } berkeley_trx_data; @@ -174,7 +201,6 @@ bool berkeley_init(void) goto error; db_env->set_errcall(db_env,berkeley_print_error); db_env->set_errpfx(db_env,"bdb"); - db_env->set_noticecall(db_env, berkeley_noticecall); db_env->set_tmp_dir(db_env, berkeley_tmpdir); db_env->set_data_dir(db_env, mysql_data_home); db_env->set_flags(db_env, berkeley_env_flags, 1); @@ -183,13 +209,20 @@ bool berkeley_init(void) if (opt_endinfo) db_env->set_verbose(db_env, - DB_VERB_CHKPOINT | DB_VERB_DEADLOCK | DB_VERB_RECOVERY, + DB_VERB_DEADLOCK | DB_VERB_RECOVERY, 1); - db_env->set_cachesize(db_env, 0, berkeley_cache_size, 0); + if (berkeley_cache_size > (uint) ~0) + db_env->set_cachesize(db_env, berkeley_cache_size / (1024*1024L*1024L), + berkeley_cache_size % (1024L*1024L*1024L), + berkeley_cache_parts); + else + db_env->set_cachesize(db_env, 0, berkeley_cache_size, berkeley_cache_parts); + db_env->set_lg_max(db_env, berkeley_log_file_size); db_env->set_lg_bsize(db_env, berkeley_log_buffer_size); db_env->set_lk_detect(db_env, berkeley_lock_type); + db_env->set_lg_regionmax(db_env, berkeley_region_size); if (berkeley_max_lock) db_env->set_lk_max(db_env, berkeley_max_lock); @@ -214,18 +247,19 @@ error: } -bool berkeley_end(void) +int berkeley_end(ha_panic_function type) { - int error; + int error= 0; DBUG_ENTER("berkeley_end"); - if (!db_env) - return 1; /* purecov: tested */ - berkeley_cleanup_log_files(); - error=db_env->close(db_env,0); // Error is logged - db_env=0; - hash_free(&bdb_open_tables); - pthread_mutex_destroy(&bdb_mutex); - DBUG_RETURN(error != 0); + if (db_env) + { + berkeley_cleanup_log_files(); + error= db_env->close(db_env,0); // Error is logged + db_env= 0; + hash_free(&bdb_open_tables); + pthread_mutex_destroy(&bdb_mutex); + } + DBUG_RETURN(error); } static int berkeley_close_connection(THD *thd) @@ -258,7 +292,7 @@ static int berkeley_commit(THD *thd, bool all) DBUG_PRINT("trans",("ending transaction %s", all ? "all" : "stmt")); berkeley_trx_data *trx=(berkeley_trx_data *)thd->ha_data[berkeley_hton.slot]; DB_TXN **txn= all ? &trx->all : &trx->stmt; - int error=txn_commit(*txn,0); + int error= (*txn)->commit(*txn,0); *txn=0; #ifndef DBUG_OFF if (error) @@ -273,15 +307,58 @@ static int berkeley_rollback(THD *thd, bool all) DBUG_PRINT("trans",("aborting transaction %s", all ? "all" : "stmt")); berkeley_trx_data *trx=(berkeley_trx_data *)thd->ha_data[berkeley_hton.slot]; DB_TXN **txn= all ? &trx->all : &trx->stmt; - int error=txn_abort(*txn); + int error= (*txn)->abort(*txn); *txn=0; DBUG_RETURN(error); } +static int berkeley_savepoint(THD* thd, void *savepoint) +{ + int error; + DB_TXN **save_txn= (DB_TXN**) savepoint; + DBUG_ENTER("berkeley_savepoint"); + berkeley_trx_data *trx=(berkeley_trx_data *)thd->ha_data[berkeley_hton.slot]; + if (!(error= db_env->txn_begin(db_env, trx->sp_level, save_txn, 0))) + { + trx->sp_level= *save_txn; + } + DBUG_RETURN(error); +} + +static int berkeley_rollback_to_savepoint(THD* thd, void *savepoint) +{ + int error; + DB_TXN *parent, **save_txn= (DB_TXN**) savepoint; + DBUG_ENTER("berkeley_rollback_to_savepoint"); + berkeley_trx_data *trx=(berkeley_trx_data *)thd->ha_data[berkeley_hton.slot]; + parent= (*save_txn)->parent; + if (!(error= (*save_txn)->abort(*save_txn))) + { + trx->sp_level= parent; + error= berkeley_savepoint(thd, savepoint); + } + DBUG_RETURN(error); +} + +static int berkeley_release_savepoint(THD* thd, void *savepoint) +{ + int error; + DB_TXN *parent, **save_txn= (DB_TXN**) savepoint; + DBUG_ENTER("berkeley_release_savepoint"); + berkeley_trx_data *trx=(berkeley_trx_data *)thd->ha_data[berkeley_hton.slot]; + parent= (*save_txn)->parent; + if (!(error= (*save_txn)->commit(*save_txn,0))) + { + trx->sp_level= parent; + *save_txn= 0; + } + DBUG_RETURN(error); +} -int berkeley_show_logs(Protocol *protocol) +static bool berkeley_show_logs(THD *thd, stat_print_fn *stat_print) { char **all_logs, **free_logs, **a, **f; + uint hton_name_len= strlen(berkeley_hton.name); int error=1; MEM_ROOT **root_ptr= my_pthread_getspecific_ptr(MEM_ROOT**,THR_MALLOC); MEM_ROOT show_logs_root, *old_mem_root= *root_ptr; @@ -306,21 +383,20 @@ int berkeley_show_logs(Protocol *protocol) { for (a = all_logs, f = free_logs; *a; ++a) { - protocol->prepare_for_resend(); - protocol->store(*a, system_charset_info); - protocol->store(STRING_WITH_LEN("BDB"), system_charset_info); if (f && *f && strcmp(*a, *f) == 0) { - f++; - protocol->store(SHOW_LOG_STATUS_FREE, system_charset_info); + f++; + if ((error= stat_print(thd, berkeley_hton.name, hton_name_len, + *a, strlen(*a), + STRING_WITH_LEN(SHOW_LOG_STATUS_FREE)))) + break; } else - protocol->store(SHOW_LOG_STATUS_INUSE, system_charset_info); - - if (protocol->write()) { - error=1; - goto err; + if ((error= stat_print(thd, berkeley_hton.name, hton_name_len, + *a, strlen(*a), + STRING_WITH_LEN(SHOW_LOG_STATUS_INUSE)))) + break; } } } @@ -330,26 +406,24 @@ err: DBUG_RETURN(error); } - -static void berkeley_print_error(const char *db_errpfx, char *buffer) +bool berkeley_show_status(THD *thd, stat_print_fn *stat_print, + enum ha_stat_type stat_type) { - sql_print_error("%s: %s",db_errpfx,buffer); /* purecov: tested */ + switch (stat_type) { + case HA_ENGINE_LOGS: + return berkeley_show_logs(thd, stat_print); + default: + return FALSE; + } } - -static void berkeley_noticecall(DB_ENV *db_env, db_notices notice) +static void berkeley_print_error(const DB_ENV *db_env, const char *db_errpfx, + const char *buffer) { - switch (notice) - { - case DB_NOTICE_LOGFILE_CHANGED: /* purecov: tested */ - pthread_mutex_lock(&LOCK_manager); - manager_status |= MANAGER_BERKELEY_LOG_CLEANUP; - pthread_mutex_unlock(&LOCK_manager); - pthread_cond_signal(&COND_manager); - break; - } + sql_print_error("%s: %s",db_errpfx,buffer); /* purecov: tested */ } + void berkeley_cleanup_log_files(void) { DBUG_ENTER("berkeley_cleanup_log_files"); @@ -387,7 +461,7 @@ void berkeley_cleanup_log_files(void) ** Berkeley DB tables *****************************************************************************/ -ha_berkeley::ha_berkeley(TABLE *table_arg) +ha_berkeley::ha_berkeley(TABLE_SHARE *table_arg) :handler(&berkeley_hton, table_arg), alloc_ptr(0), rec_buff(0), file(0), int_table_flags(HA_REC_NOT_IN_SEQ | HA_FAST_KEY_READ | HA_NULL_IN_KEY | HA_CAN_INDEX_BLOBS | HA_NOT_EXACT_COUNT | @@ -414,13 +488,14 @@ ulong ha_berkeley::index_flags(uint idx, uint part, bool all_parts) const | HA_READ_RANGE); for (uint i= all_parts ? 0 : part ; i <= part ; i++) { - if (table->key_info[idx].key_part[i].field->type() == FIELD_TYPE_BLOB) + KEY_PART_INFO *key_part= table_share->key_info[idx].key_part+i; + if (key_part->field->type() == FIELD_TYPE_BLOB) { /* We can't use BLOBS to shortcut sorts */ flags&= ~(HA_READ_ORDER | HA_KEYREAD_ONLY | HA_READ_RANGE); break; } - switch (table->key_info[idx].key_part[i].field->key_type()) { + switch (key_part->field->key_type()) { case HA_KEYTYPE_TEXT: case HA_KEYTYPE_VARTEXT1: case HA_KEYTYPE_VARTEXT2: @@ -428,8 +503,7 @@ ulong ha_berkeley::index_flags(uint idx, uint part, bool all_parts) const As BDB stores only one copy of equal strings, we can't use key read on these. Binary collations do support key read though. */ - if (!(table->key_info[idx].key_part[i].field->charset()->state - & MY_CS_BINSORT)) + if (!(key_part->field->charset()->state & MY_CS_BINSORT)) flags&= ~HA_KEYREAD_ONLY; break; default: // Keep compiler happy @@ -558,7 +632,6 @@ int ha_berkeley::open(const char *name, int mode, uint test_if_locked) uint open_mode=(mode == O_RDONLY ? DB_RDONLY : 0) | DB_THREAD; uint max_key_length; int error; - TABLE_SHARE *table_share= table->s; DBUG_ENTER("ha_berkeley::open"); /* Open primary key */ @@ -580,7 +653,7 @@ int ha_berkeley::open(const char *name, int mode, uint test_if_locked) &key_buff2, max_key_length, &primary_key_buff, (hidden_primary_key ? 0 : - table->key_info[table_share->primary_key].key_length), + table_share->key_info[table_share->primary_key].key_length), NullS))) DBUG_RETURN(1); /* purecov: inspected */ if (!(rec_buff= (byte*) my_malloc((alloced_rec_buff_length= @@ -623,10 +696,10 @@ int ha_berkeley::open(const char *name, int mode, uint test_if_locked) berkeley_cmp_packed_key)); if (!hidden_primary_key) file->app_private= (void*) (table->key_info + table_share->primary_key); - if ((error= txn_begin(db_env, 0, (DB_TXN**) &transaction, 0)) || + if ((error= db_env->txn_begin(db_env, NULL, (DB_TXN**) &transaction, 0)) || (error= (file->open(file, transaction, fn_format(name_buff, name, "", ha_berkeley_ext, - 2 | 4), + MY_UNPACK_FILENAME|MY_APPEND_EXT), "main", DB_BTREE, open_mode, 0))) || (error= transaction->commit(transaction, 0))) { @@ -639,7 +712,7 @@ int ha_berkeley::open(const char *name, int mode, uint test_if_locked) /* Open other keys; These are part of the share structure */ key_file[primary_key]=file; - key_type[primary_key]=DB_NOOVERWRITE; + key_type[primary_key]= hidden_primary_key ? 0 : DB_NOOVERWRITE; DB **ptr=key_file; for (uint i=0, used_keys=0; i < table_share->keys ; i++, ptr++) @@ -662,7 +735,8 @@ int ha_berkeley::open(const char *name, int mode, uint test_if_locked) DBUG_PRINT("bdb",("Setting DB_DUP for key %u", i)); (*ptr)->set_flags(*ptr, DB_DUP); } - if ((error= txn_begin(db_env, 0, (DB_TXN**) &transaction, 0)) || + if ((error= db_env->txn_begin(db_env, NULL, (DB_TXN**) &transaction, + 0)) || (error=((*ptr)->open(*ptr, transaction, name_buff, part, DB_BTREE, open_mode, 0))) || (error= transaction->commit(transaction, 0))) @@ -736,9 +810,9 @@ bool ha_berkeley::fix_rec_buff_for_blob(ulong length) ulong ha_berkeley::max_row_length(const byte *buf) { - ulong length= table->s->reclength + table->s->fields*2; + ulong length= table_share->reclength + table_share->fields*2; uint *ptr, *end; - for (ptr= table->s->blob_field, end=ptr + table->s->blob_fields ; + for (ptr= table_share->blob_field, end=ptr + table_share->blob_fields ; ptr != end ; ptr++) { @@ -765,25 +839,26 @@ int ha_berkeley::pack_row(DBT *row, const byte *record, bool new_row) if (share->fixed_length_row) { row->data=(void*) record; - row->size= table->s->reclength+hidden_primary_key; + row->size= table_share->reclength+hidden_primary_key; if (hidden_primary_key) { if (new_row) get_auto_primary_key(current_ident); - memcpy_fixed((char*) record+table->s->reclength, (char*) current_ident, + memcpy_fixed((char*) record+table_share->reclength, + (char*) current_ident, BDB_HIDDEN_PRIMARY_KEY_LENGTH); } return 0; } - if (table->s->blob_fields) + if (table_share->blob_fields) { if (fix_rec_buff_for_blob(max_row_length(record))) return HA_ERR_OUT_OF_MEM; /* purecov: inspected */ } /* Copy null bits */ - memcpy(rec_buff, record, table->s->null_bytes); - ptr= rec_buff + table->s->null_bytes; + memcpy(rec_buff, record, table_share->null_bytes); + ptr= rec_buff + table_share->null_bytes; for (Field **field=table->field ; *field ; field++) ptr=(byte*) (*field)->pack((char*) ptr, @@ -806,13 +881,13 @@ int ha_berkeley::pack_row(DBT *row, const byte *record, bool new_row) void ha_berkeley::unpack_row(char *record, DBT *row) { if (share->fixed_length_row) - memcpy(record,(char*) row->data,table->s->reclength+hidden_primary_key); + memcpy(record,(char*) row->data,table_share->reclength+hidden_primary_key); else { /* Copy null bits */ const char *ptr= (const char*) row->data; - memcpy(record, ptr, table->s->null_bytes); - ptr+= table->s->null_bytes; + memcpy(record, ptr, table_share->null_bytes); + ptr+= table_share->null_bytes; for (Field **field=table->field ; *field ; field++) ptr= (*field)->unpack(record + (*field)->offset(), ptr); } @@ -958,7 +1033,7 @@ int ha_berkeley::write_row(byte * record) DBUG_RETURN(error); /* purecov: inspected */ table->insert_or_update= 1; // For handling of VARCHAR - if (table->s->keys + test(hidden_primary_key) == 1) + if (table_share->keys + test(hidden_primary_key) == 1) { error=file->put(file, transaction, create_key(&prim_key, primary_key, key_buff, record), @@ -977,7 +1052,7 @@ int ha_berkeley::write_row(byte * record) &row, key_type[primary_key]))) { changed_keys.set_bit(primary_key); - for (uint keynr=0 ; keynr < table->s->keys ; keynr++) + for (uint keynr=0 ; keynr < table_share->keys ; keynr++) { if (keynr == primary_key) continue; @@ -1005,7 +1080,7 @@ int ha_berkeley::write_row(byte * record) { new_error = 0; for (uint keynr=0; - keynr < table->s->keys+test(hidden_primary_key); + keynr < table_share->keys+test(hidden_primary_key); keynr++) { if (changed_keys.is_set(keynr)) @@ -1148,7 +1223,7 @@ int ha_berkeley::restore_keys(DB_TXN *trans, key_map *changed_keys, that one just put back the old value. */ if (!changed_keys->is_clear_all()) { - for (keynr=0 ; keynr < table->s->keys+test(hidden_primary_key) ; keynr++) + for (keynr=0 ; keynr < table_share->keys+test(hidden_primary_key) ; keynr++) { if (changed_keys->is_set(keynr)) { @@ -1213,7 +1288,7 @@ int ha_berkeley::update_row(const byte * old_row, byte * new_row) using_ignore))) { // Update all other keys - for (uint keynr=0 ; keynr < table->s->keys ; keynr++) + for (uint keynr=0 ; keynr < table_share->keys ; keynr++) { if (keynr == primary_key) continue; @@ -1325,7 +1400,7 @@ int ha_berkeley::remove_keys(DB_TXN *trans, const byte *record, { int result = 0; for (uint keynr=0; - keynr < table->s->keys+test(hidden_primary_key); + keynr < table_share->keys+test(hidden_primary_key); keynr++) { if (keys->is_set(keynr)) @@ -1346,7 +1421,7 @@ int ha_berkeley::delete_row(const byte * record) { int error; DBT row, prim_key; - key_map keys= table->s->keys_in_use; + key_map keys= table_share->keys_in_use; DBUG_ENTER("delete_row"); statistic_increment(table->in_use->status_var.ha_delete_count,&LOCK_status); @@ -1378,11 +1453,12 @@ int ha_berkeley::delete_row(const byte * record) } -int ha_berkeley::index_init(uint keynr) +int ha_berkeley::index_init(uint keynr, bool sorted) { int error; DBUG_ENTER("ha_berkeley::index_init"); - DBUG_PRINT("enter",("table: '%s' key: %d", table->s->table_name, keynr)); + DBUG_PRINT("enter",("table: '%s' key: %d", table_share->table_name.str, + keynr)); /* Under some very rare conditions (like full joins) we may already have @@ -1409,7 +1485,7 @@ int ha_berkeley::index_end() DBUG_ENTER("ha_berkely::index_end"); if (cursor) { - DBUG_PRINT("enter",("table: '%s'", table->s->table_name)); + DBUG_PRINT("enter",("table: '%s'", table_share->table_name.str)); error=cursor->c_close(cursor); cursor=0; } @@ -1656,7 +1732,7 @@ int ha_berkeley::rnd_init(bool scan) { DBUG_ENTER("rnd_init"); current_row.flags=DB_DBT_REALLOC; - DBUG_RETURN(index_init(primary_key)); + DBUG_RETURN(index_init(primary_key, 0)); } int ha_berkeley::rnd_end() @@ -1764,14 +1840,14 @@ void ha_berkeley::info(uint flag) if ((flag & HA_STATUS_CONST) || version != share->version) { version=share->version; - for (uint i=0 ; i < table->s->keys ; i++) + for (uint i=0 ; i < table_share->keys ; i++) { table->key_info[i].rec_per_key[table->key_info[i].key_parts-1]= share->rec_per_key[i]; } } /* Don't return key if we got an error for the internal primary key */ - if (flag & HA_STATUS_ERRKEY && last_dup_key < table->s->keys) + if (flag & HA_STATUS_ERRKEY && last_dup_key < table_share->keys) errkey= last_dup_key; DBUG_VOID_RETURN; } @@ -1842,6 +1918,8 @@ int ha_berkeley::external_lock(THD *thd, int lock_type) if (!trx) DBUG_RETURN(1); } + if (trx->all == 0) + trx->sp_level= 0; if (lock_type != F_UNLCK) { if (!trx->bdb_lock_count++) @@ -1855,17 +1933,18 @@ int ha_berkeley::external_lock(THD *thd, int lock_type) /* We have to start a master transaction */ DBUG_PRINT("trans",("starting transaction all: options: 0x%lx", (ulong) thd->options)); - if ((error=txn_begin(db_env, 0, &trx->all, 0))) + if ((error= db_env->txn_begin(db_env, NULL, &trx->all, 0))) { trx->bdb_lock_count--; // We didn't get the lock DBUG_RETURN(error); } + trx->sp_level= trx->all; trans_register_ha(thd, TRUE, &berkeley_hton); if (thd->in_lock_tables) DBUG_RETURN(0); // Don't create stmt trans } DBUG_PRINT("trans",("starting transaction stmt")); - if ((error=txn_begin(db_env, trx->all, &trx->stmt, 0))) + if ((error= db_env->txn_begin(db_env, trx->sp_level, &trx->stmt, 0))) { /* We leave the possible master transaction open */ trx->bdb_lock_count--; // We didn't get the lock @@ -1890,7 +1969,7 @@ int ha_berkeley::external_lock(THD *thd, int lock_type) We must in this case commit the work to keep the row locks */ DBUG_PRINT("trans",("commiting non-updating transaction")); - error= txn_commit(trx->stmt,0); + error= trx->stmt->commit(trx->stmt,0); trx->stmt= transaction= 0; } } @@ -1919,7 +1998,7 @@ int ha_berkeley::start_stmt(THD *thd, thr_lock_type lock_type) if (!trx->stmt) { DBUG_PRINT("trans",("starting transaction stmt")); - error=txn_begin(db_env, trx->all, &trx->stmt, 0); + error= db_env->txn_begin(db_env, trx->sp_level, &trx->stmt, 0); trans_register_ha(thd, FALSE, &berkeley_hton); } transaction= trx->stmt; @@ -2014,13 +2093,14 @@ int ha_berkeley::create(const char *name, register TABLE *form, int error; DBUG_ENTER("ha_berkeley::create"); - fn_format(name_buff,name,"", ha_berkeley_ext,2 | 4); + fn_format(name_buff,name,"", ha_berkeley_ext, + MY_UNPACK_FILENAME|MY_APPEND_EXT); /* Create the main table that will hold the real rows */ if ((error= create_sub_table(name_buff,"main",DB_BTREE,0))) DBUG_RETURN(error); /* purecov: inspected */ - primary_key= table->s->primary_key; + primary_key= form->s->primary_key; /* Create the keys */ for (uint i=0; i < form->s->keys; i++) { @@ -2028,7 +2108,7 @@ int ha_berkeley::create(const char *name, register TABLE *form, { sprintf(part,"key%02d",index++); if ((error= create_sub_table(name_buff, part, DB_BTREE, - (table->key_info[i].flags & HA_NOSAME) ? 0 : + (form->key_info[i].flags & HA_NOSAME) ? 0 : DB_DUP))) DBUG_RETURN(error); /* purecov: inspected */ } @@ -2044,7 +2124,7 @@ int ha_berkeley::create(const char *name, register TABLE *form, "status", DB_BTREE, DB_CREATE, 0)))) { char rec_buff[4+MAX_KEY*4]; - uint length= 4+ table->s->keys*4; + uint length= 4+ form->s->keys*4; bzero(rec_buff, length); error= write_status(status_block, rec_buff, length); status_block->close(status_block,0); @@ -2063,8 +2143,9 @@ int ha_berkeley::delete_table(const char *name) if ((error=db_create(&file, db_env, 0))) my_errno=error; /* purecov: inspected */ else - error=file->remove(file,fn_format(name_buff,name,"",ha_berkeley_ext,2 | 4), - NULL,0); + error=file->remove(file,fn_format(name_buff,name,"",ha_berkeley_ext, + MY_UNPACK_FILENAME|MY_APPEND_EXT), + NULL,0); file=0; // Safety DBUG_RETURN(error); } @@ -2082,9 +2163,11 @@ int ha_berkeley::rename_table(const char * from, const char * to) { /* On should not do a file->close() after rename returns */ error= file->rename(file, - fn_format(from_buff, from, "", ha_berkeley_ext, 2 | 4), + fn_format(from_buff, from, "", + ha_berkeley_ext, + MY_UNPACK_FILENAME|MY_APPEND_EXT), NULL, fn_format(to_buff, to, "", ha_berkeley_ext, - 2 | 4), 0); + MY_UNPACK_FILENAME|MY_APPEND_EXT), 0); } return error; } @@ -2164,9 +2247,9 @@ ulonglong ha_berkeley::get_auto_increment() (void) ha_berkeley::extra(HA_EXTRA_KEYREAD); /* Set 'active_index' */ - ha_berkeley::index_init(table->s->next_number_index); + ha_berkeley::index_init(table_share->next_number_index, 0); - if (!table->s->next_number_key_offset) + if (!table_share->next_number_key_offset) { // Autoincrement at key-start error=ha_berkeley::index_last(table->record[1]); } @@ -2179,7 +2262,7 @@ ulonglong ha_berkeley::get_auto_increment() /* Reading next available number for a sub key */ ha_berkeley::create_key(&last_key, active_index, key_buff, table->record[0], - table->s->next_number_key_offset); + table_share->next_number_key_offset); /* Store for compare */ memcpy(old_key.data=key_buff2, key_buff, (old_key.size=last_key.size)); old_key.app_private=(void*) key_info; @@ -2209,7 +2292,7 @@ ulonglong ha_berkeley::get_auto_increment() } if (!error) nr= (ulonglong) - table->next_number_field->val_int_offset(table->s->rec_buff_length)+1; + table->next_number_field->val_int_offset(table_share->rec_buff_length)+1; ha_berkeley::index_end(); (void) ha_berkeley::extra(HA_EXTRA_NO_KEYREAD); return nr; @@ -2259,48 +2342,14 @@ int ha_berkeley::analyze(THD* thd, HA_CHECK_OPT* check_opt) berkeley_trx_data *trx=(berkeley_trx_data *)thd->ha_data[berkeley_hton.slot]; DBUG_ASSERT(trx); - /* - Original bdb documentation says: - "The DB->stat method cannot be transaction-protected. - For this reason, it should be called in a thread of - control that has no open cursors or active transactions." - So, let's check if there are any changes have been done since - the beginning of the transaction.. - */ - - if (!db_env->txn_stat(db_env, &txn_stat_ptr, 0) && - txn_stat_ptr && txn_stat_ptr->st_nactive>=2) - { - DB_TXN_ACTIVE *atxn_stmt= 0, *atxn_all= 0; - - u_int32_t all_id= trx->all->id(trx->all); - u_int32_t stmt_id= trx->stmt->id(trx->stmt); - - DB_TXN_ACTIVE *cur= txn_stat_ptr->st_txnarray; - DB_TXN_ACTIVE *end= cur + txn_stat_ptr->st_nactive; - for (; cur!=end && (!atxn_stmt || !atxn_all); cur++) - { - if (cur->txnid==all_id) atxn_all= cur; - if (cur->txnid==stmt_id) atxn_stmt= cur; - } - - if (atxn_stmt && atxn_all && - log_compare(&atxn_stmt->lsn,&atxn_all->lsn)) - { - free(txn_stat_ptr); - return HA_ADMIN_REJECT; - } - free(txn_stat_ptr); - } - - for (i=0 ; i < table->s->keys ; i++) + for (i=0 ; i < table_share->keys ; i++) { if (stat) { free(stat); stat=0; } - if ((key_file[i]->stat)(key_file[i], (void*) &stat, 0)) + if ((key_file[i]->stat)(key_file[i], trx->all, (void*) &stat, 0)) goto err; /* purecov: inspected */ share->rec_per_key[i]= (stat->bt_ndata / (stat->bt_nkeys ? stat->bt_nkeys : 1)); @@ -2313,7 +2362,7 @@ int ha_berkeley::analyze(THD* thd, HA_CHECK_OPT* check_opt) free(stat); stat=0; } - if ((file->stat)(file, (void*) &stat, 0)) + if ((file->stat)(file, trx->all, (void*) &stat, 0)) goto err; /* purecov: inspected */ } pthread_mutex_lock(&share->mutex); @@ -2368,7 +2417,8 @@ int ha_berkeley::check(THD* thd, HA_CHECK_OPT* check_opt) (hidden_primary_key ? berkeley_cmp_hidden_key : berkeley_cmp_packed_key)); tmp_file->app_private= (void*) (table->key_info+table->primary_key); - fn_format(name_buff,share->table_name,"", ha_berkeley_ext, 2 | 4); + fn_format(name_buff,share->table_name.str,"", ha_berkeley_ext, + MY_UNPACK_FILENAME|MY_APPEND_EXT); if ((error=tmp_file->verify(tmp_file, name_buff, NullS, (FILE*) 0, hidden_primary_key ? 0 : DB_NOORDERCHK))) { @@ -2442,7 +2492,7 @@ static BDB_SHARE *get_share(const char *table_name, TABLE *table) share->rec_per_key = rec_per_key; share->table_name = tmp_name; share->table_name_length=length; - strmov(share->table_name,table_name); + strmov(share->table_name, table_name); share->key_file = key_file; share->key_type = key_type; if (my_hash_insert(&bdb_open_tables, (byte*) share)) @@ -2503,7 +2553,7 @@ void ha_berkeley::get_status() if (!(share->status & STATUS_PRIMARY_KEY_INIT)) { (void) extra(HA_EXTRA_KEYREAD); - index_init(primary_key); + index_init(primary_key, 0); if (!index_last(table->record[1])) share->auto_ident=uint5korr(current_ident); index_end(); @@ -2514,7 +2564,8 @@ void ha_berkeley::get_status() char name_buff[FN_REFLEN]; uint open_mode= (((table->db_stat & HA_READ_ONLY) ? DB_RDONLY : 0) | DB_THREAD); - fn_format(name_buff, share->table_name,"", ha_berkeley_ext, 2 | 4); + fn_format(name_buff, share->table_name, "", ha_berkeley_ext, + MY_UNPACK_FILENAME|MY_APPEND_EXT); if (!db_create(&share->status_block, db_env, 0)) { if (share->status_block->open(share->status_block, NULL, name_buff, @@ -2528,7 +2579,7 @@ void ha_berkeley::get_status() if (!(share->status & STATUS_ROW_COUNT_INIT) && share->status_block) { share->org_rows= share->rows= - table->s->max_rows ? table->s->max_rows : HA_BERKELEY_MAX_ROWS; + table_share->max_rows ? table_share->max_rows : HA_BERKELEY_MAX_ROWS; if (!share->status_block->cursor(share->status_block, 0, &cursor, 0)) { DBT row; @@ -2543,7 +2594,7 @@ void ha_berkeley::get_status() uint i; uchar *pos=(uchar*) row.data; share->org_rows=share->rows=uint4korr(pos); pos+=4; - for (i=0 ; i < table->s->keys ; i++) + for (i=0 ; i < table_share->keys ; i++) { share->rec_per_key[i]=uint4korr(pos); pos+=4; @@ -2595,8 +2646,9 @@ static void update_status(BDB_SHARE *share, TABLE *table) goto end; /* purecov: inspected */ share->status_block->set_flags(share->status_block,0); /* purecov: inspected */ if (share->status_block->open(share->status_block, NULL, - fn_format(name_buff,share->table_name,"", - ha_berkeley_ext,2 | 4), + fn_format(name_buff,share->table_name, + "", ha_berkeley_ext, + MY_UNPACK_FILENAME|MY_APPEND_EXT), "status", DB_BTREE, DB_THREAD | DB_CREATE, my_umask)) /* purecov: inspected */ goto end; /* purecov: inspected */ @@ -2608,7 +2660,7 @@ static void update_status(BDB_SHARE *share, TABLE *table) { int4store(pos,share->rec_per_key[i]); pos+=4; } - DBUG_PRINT("info",("updating status for %s",share->table_name)); + DBUG_PRINT("info",("updating status for %s", share->table_name)); (void) write_status(share->status_block, rec_buff, (uint) (pos-rec_buff)); share->status&= ~STATUS_BDB_ANALYZE; @@ -2638,7 +2690,7 @@ int ha_berkeley::cmp_ref(const byte *ref1, const byte *ref2) int result; Field *field; - KEY *key_info=table->key_info+table->s->primary_key; + KEY *key_info=table->key_info+table_share->primary_key; KEY_PART_INFO *key_part=key_info->key_part; KEY_PART_INFO *end=key_part+key_info->key_parts; @@ -2656,4 +2708,13 @@ int ha_berkeley::cmp_ref(const byte *ref1, const byte *ref2) return 0; } -#endif /* HAVE_BERKELEY_DB */ + +bool ha_berkeley::check_if_incompatible_data(HA_CREATE_INFO *info, + uint table_changes) +{ + if (table_changes < IS_EQUAL_YES) + return COMPATIBLE_DATA_NO; + return COMPATIBLE_DATA_YES; +} + + diff --git a/sql/ha_berkeley.h b/sql/ha_berkeley.h index 16e4db59c10..21b618b8d6d 100644 --- a/sql/ha_berkeley.h +++ b/sql/ha_berkeley.h @@ -84,7 +84,7 @@ class ha_berkeley: public handler DBT *get_pos(DBT *to, byte *pos); public: - ha_berkeley(TABLE *table_arg); + ha_berkeley(TABLE_SHARE *table_arg); ~ha_berkeley() {} const char *table_type() const { return "BerkeleyDB"; } ulong index_flags(uint idx, uint part, bool all_parts) const; @@ -92,7 +92,7 @@ class ha_berkeley: public handler const char **bas_ext() const; ulong table_flags(void) const { return int_table_flags; } uint max_supported_keys() const { return MAX_KEY-1; } - uint extra_rec_buf_length() { return BDB_HIDDEN_PRIMARY_KEY_LENGTH; } + uint extra_rec_buf_length() const { return BDB_HIDDEN_PRIMARY_KEY_LENGTH; } ha_rows estimate_rows_upper_bound(); uint max_supported_key_length() const { return UINT_MAX32; } uint max_supported_key_part_length() const { return UINT_MAX32; } @@ -106,7 +106,7 @@ class ha_berkeley: public handler int write_row(byte * buf); int update_row(const byte * old_data, byte * new_data); int delete_row(const byte * buf); - int index_init(uint index); + int index_init(uint index, bool sorted); int index_end(); int index_read(byte * buf, const byte * key, uint key_len, enum ha_rkey_function find_flag); @@ -154,17 +154,25 @@ class ha_berkeley: public handler uint8 table_cache_type() { return HA_CACHE_TBL_TRANSACT; } bool primary_key_is_clustered() { return true; } int cmp_ref(const byte *ref1, const byte *ref2); + bool check_if_incompatible_data(HA_CREATE_INFO *info, uint table_changes); }; +extern const u_int32_t bdb_DB_TXN_NOSYNC; +extern const u_int32_t bdb_DB_RECOVER; +extern const u_int32_t bdb_DB_PRIVATE; +extern const u_int32_t bdb_DB_DIRECT_DB; +extern const u_int32_t bdb_DB_DIRECT_LOG; extern bool berkeley_shared_data; extern u_int32_t berkeley_init_flags,berkeley_env_flags, berkeley_lock_type, berkeley_lock_types[]; -extern ulong berkeley_cache_size, berkeley_max_lock, berkeley_log_buffer_size; +extern ulong berkeley_max_lock, berkeley_log_buffer_size; +extern ulonglong berkeley_cache_size; +extern ulong berkeley_region_size, berkeley_cache_parts; extern char *berkeley_home, *berkeley_tmpdir, *berkeley_logdir; extern long berkeley_lock_scan_time; extern TYPELIB berkeley_lock_typelib; bool berkeley_init(void); -bool berkeley_end(void); +int berkeley_end(ha_panic_function type); bool berkeley_flush_logs(void); -int berkeley_show_logs(Protocol *protocol); +bool berkeley_show_status(THD *thd, stat_print_fn *print, enum ha_stat_type); diff --git a/sql/ha_blackhole.cc b/sql/ha_blackhole.cc index 2505919af39..615836b9867 100644 --- a/sql/ha_blackhole.cc +++ b/sql/ha_blackhole.cc @@ -20,13 +20,17 @@ #endif #include "mysql_priv.h" -#ifdef HAVE_BLACKHOLE_DB #include "ha_blackhole.h" +/* Static declarations for handlerton */ + +static handler *blackhole_create_handler(TABLE_SHARE *table); + /* Blackhole storage engine handlerton */ handlerton blackhole_hton= { + MYSQL_HANDLERTON_INTERFACE_VERSION, "BLACKHOLE", SHOW_OPTION_YES, "/dev/null storage engine (anything you write to it disappears)", @@ -47,14 +51,27 @@ handlerton blackhole_hton= { NULL, /* create_cursor_read_view */ NULL, /* set_cursor_read_view */ NULL, /* close_cursor_read_view */ + blackhole_create_handler, /* Create a new handler */ + NULL, /* Drop a database */ + NULL, /* Panic call */ + NULL, /* Start Consistent Snapshot */ + NULL, /* Flush logs */ + NULL, /* Show status */ HTON_CAN_RECREATE }; + +static handler *blackhole_create_handler(TABLE_SHARE *table) +{ + return new ha_blackhole(table); +} + + /***************************************************************************** ** BLACKHOLE tables *****************************************************************************/ -ha_blackhole::ha_blackhole(TABLE *table_arg) +ha_blackhole::ha_blackhole(TABLE_SHARE *table_arg) :handler(&blackhole_hton, table_arg) {} @@ -93,13 +110,12 @@ int ha_blackhole::create(const char *name, TABLE *table_arg, const char *ha_blackhole::index_type(uint key_number) { DBUG_ENTER("ha_blackhole::index_type"); - DBUG_RETURN((table->key_info[key_number].flags & HA_FULLTEXT) ? + DBUG_RETURN((table_share->key_info[key_number].flags & HA_FULLTEXT) ? "FULLTEXT" : - (table->key_info[key_number].flags & HA_SPATIAL) ? + (table_share->key_info[key_number].flags & HA_SPATIAL) ? "SPATIAL" : - (table->key_info[key_number].algorithm == HA_KEY_ALG_RTREE) ? - "RTREE" : - "BTREE"); + (table_share->key_info[key_number].algorithm == + HA_KEY_ALG_RTREE) ? "RTREE" : "BTREE"); } int ha_blackhole::write_row(byte * buf) @@ -227,4 +243,3 @@ int ha_blackhole::index_last(byte * buf) DBUG_RETURN(HA_ERR_END_OF_FILE); } -#endif /* HAVE_BLACKHOLE_DB */ diff --git a/sql/ha_blackhole.h b/sql/ha_blackhole.h index 7238147a06a..15e12659aa0 100644 --- a/sql/ha_blackhole.h +++ b/sql/ha_blackhole.h @@ -28,7 +28,7 @@ class ha_blackhole: public handler THR_LOCK thr_lock; public: - ha_blackhole(TABLE *table_arg); + ha_blackhole(TABLE_SHARE *table_arg); ~ha_blackhole() { } @@ -49,7 +49,7 @@ public: } ulong index_flags(uint inx, uint part, bool all_parts) const { - return ((table->key_info[inx].algorithm == HA_KEY_ALG_FULLTEXT) ? + return ((table_share->key_info[inx].algorithm == HA_KEY_ALG_FULLTEXT) ? 0 : HA_READ_NEXT | HA_READ_PREV | HA_READ_RANGE | HA_READ_ORDER | HA_KEYREAD_ONLY); } @@ -84,4 +84,5 @@ public: THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to, enum thr_lock_type lock_type); + bool has_transactions() { return 1; } }; diff --git a/sql/ha_federated.cc b/sql/ha_federated.cc index 14b79a9a418..bc087ac25e7 100644 --- a/sql/ha_federated.cc +++ b/sql/ha_federated.cc @@ -351,21 +351,23 @@ #pragma implementation // gcc: Class implementation #endif -#ifdef HAVE_FEDERATED_DB #include "ha_federated.h" #include "m_string.h" /* Variables for federated share methods */ -static HASH federated_open_tables; // Hash used to track open - // tables -pthread_mutex_t federated_mutex; // This is the mutex we use to - // init the hash -static int federated_init= FALSE; // Variable for checking the - // init state of hash +static HASH federated_open_tables; // To track open tables +pthread_mutex_t federated_mutex; // To init the hash +static int federated_init= FALSE; // Checking the state of hash + +/* Static declaration for handerton */ +static handler *federated_create_handler(TABLE_SHARE *table); +static int federated_commit(THD *thd, bool all); +static int federated_rollback(THD *thd, bool all); /* Federated storage engine handlerton */ handlerton federated_hton= { + MYSQL_HANDLERTON_INTERFACE_VERSION, "FEDERATED", SHOW_OPTION_YES, "Federated MySQL storage engine", @@ -377,8 +379,8 @@ handlerton federated_hton= { NULL, /* savepoint */ NULL, /* rollback to savepoint */ NULL, /* release savepoint */ - NULL, /* commit */ - NULL, /* rollback */ + federated_commit, /* commit */ + federated_rollback, /* rollback */ NULL, /* prepare */ NULL, /* recover */ NULL, /* commit_by_xid */ @@ -386,11 +388,23 @@ handlerton federated_hton= { NULL, /* create_cursor_read_view */ NULL, /* set_cursor_read_view */ NULL, /* close_cursor_read_view */ + federated_create_handler, /* Create a new handler */ + NULL, /* Drop a database */ + federated_db_end, /* Panic call */ + NULL, /* Start Consistent Snapshot */ + NULL, /* Flush logs */ + NULL, /* Show status */ HTON_ALTER_NOT_SUPPORTED }; -/* Function we use in the creation of our hash to get key. */ +static handler *federated_create_handler(TABLE_SHARE *table) +{ + return new ha_federated(table); +} + + +/* Function we use in the creation of our hash to get key */ static byte *federated_get_key(FEDERATED_SHARE *share, uint *length, my_bool not_used __attribute__ ((unused))) @@ -416,16 +430,14 @@ bool federated_db_init() DBUG_ENTER("federated_db_init"); if (pthread_mutex_init(&federated_mutex, MY_MUTEX_INIT_FAST)) goto error; - if (hash_init(&federated_open_tables, system_charset_info, 32, 0, 0, + if (!hash_init(&federated_open_tables, system_charset_info, 32, 0, 0, (hash_get_key) federated_get_key, 0, 0)) { - VOID(pthread_mutex_destroy(&federated_mutex)); - } - else - { federated_init= TRUE; DBUG_RETURN(FALSE); } + + VOID(pthread_mutex_destroy(&federated_mutex)); error: have_federated_db= SHOW_OPTION_DISABLED; // If we couldn't use handler DBUG_RETURN(TRUE); @@ -437,13 +449,12 @@ error: SYNOPSIS federated_db_end() - void RETURN FALSE OK */ -bool federated_db_end() +int federated_db_end(ha_panic_function type) { if (federated_init) { @@ -451,9 +462,10 @@ bool federated_db_end() VOID(pthread_mutex_destroy(&federated_mutex)); } federated_init= 0; - return FALSE; + return 0; } + /* Check (in create) whether the tables exists, and that it can be connected to @@ -587,12 +599,12 @@ static int parse_url_error(FEDERATED_SHARE *share, TABLE *table, int error_num) SYNOPSIS parse_url() - share pointer to FEDERATED share - table pointer to current TABLE class - table_create_flag determines what error to throw + share pointer to FEDERATED share + table pointer to current TABLE class + table_create_flag determines what error to throw DESCRIPTION - populates the share with information about the connection + Populates the share with information about the connection to the foreign database that will serve as the data source. This string must be specified (currently) in the "comment" field, listed in the CREATE TABLE statement. @@ -611,7 +623,7 @@ static int parse_url_error(FEDERATED_SHARE *share, TABLE *table, int error_num) ***IMPORTANT*** Currently, only "mysql://" is supported. - 'password' and 'port' are both optional. + 'password' and 'port' are both optional. RETURN VALUE 0 success @@ -629,8 +641,8 @@ static int parse_url(FEDERATED_SHARE *share, TABLE *table, share->port= 0; share->socket= 0; - DBUG_PRINT("info", ("Length %d \n", table->s->connect_string.length)); - DBUG_PRINT("info", ("String %.*s \n", table->s->connect_string.length, + DBUG_PRINT("info", ("Length: %d", table->s->connect_string.length)); + DBUG_PRINT("info", ("String: '%.*s'", table->s->connect_string.length, table->s->connect_string.str)); share->scheme= my_strdup_with_length((const byte*)table->s-> connect_string.str, @@ -721,8 +733,8 @@ static int parse_url(FEDERATED_SHARE *share, TABLE *table, } DBUG_PRINT("info", - ("scheme %s username %s password %s \ - hostname %s port %d database %s tablename %s\n", + ("scheme: %s username: %s password: %s \ + hostname: %s port: %d database: %s tablename: %s", share->scheme, share->username, share->password, share->hostname, share->port, share->database, share->table_name)); @@ -738,11 +750,13 @@ error: ** FEDERATED tables *****************************************************************************/ -ha_federated::ha_federated(TABLE *table_arg) +ha_federated::ha_federated(TABLE_SHARE *table_arg) :handler(&federated_hton, table_arg), mysql(0), stored_result(0), scan_flag(0), ref_length(sizeof(MYSQL_ROW_OFFSET)), current_position(0) -{} +{ + trx_next= 0; +} /* @@ -750,8 +764,8 @@ ha_federated::ha_federated(TABLE *table_arg) SYNOPSIS convert_row_to_internal_format() - record Byte pointer to record - row MySQL result set row from fetchrow() + record Byte pointer to record + row MySQL result set row from fetchrow() DESCRIPTION This method simply iterates through a row returned via fetchrow with @@ -762,7 +776,7 @@ ha_federated::ha_federated(TABLE *table_arg) RETURN VALUE 0 After fields have had field values stored from record - */ +*/ uint ha_federated::convert_row_to_internal_format(byte *record, MYSQL_ROW row) { @@ -773,24 +787,23 @@ uint ha_federated::convert_row_to_internal_format(byte *record, MYSQL_ROW row) lengths= mysql_fetch_lengths(stored_result); memset(record, 0, table->s->null_bytes); - for (field= table->field; *field; field++) + for (field= table->field; *field; field++, row++, lengths++) { /* index variable to move us through the row at the same iterative step as the field */ - int x= field - table->field; my_ptrdiff_t old_ptr; old_ptr= (my_ptrdiff_t) (record - table->record[0]); - (*field)->move_field(old_ptr); - if (!row[x]) + (*field)->move_field_offset(old_ptr); + if (!*row) (*field)->set_null(); else { (*field)->set_notnull(); - (*field)->store(row[x], lengths[x], &my_charset_bin); + (*field)->store(*row, *lengths, &my_charset_bin); } - (*field)->move_field(-old_ptr); + (*field)->move_field_offset(-old_ptr); } DBUG_RETURN(0); @@ -1195,8 +1208,8 @@ bool ha_federated::create_where_from_key(String *to, DBUG_RETURN(1); } else - /* LIKE */ { + /* LIKE */ if (emit_key_part_name(&tmp, key_part) || tmp.append(FEDERATED_LIKE) || emit_key_part_element(&tmp, key_part, needs_quotes, 1, ptr, @@ -1308,16 +1321,16 @@ static FEDERATED_SHARE *get_share(const char *table_name, TABLE *table) /* In order to use this string, we must first zero it's length, or it will contain garbage - */ + */ query.length(0); pthread_mutex_lock(&federated_mutex); - tmp_table_name= (char *)table->s->table_name; - tmp_table_name_length= (uint) strlen(tmp_table_name); + tmp_table_name= table->s->table_name.str; + tmp_table_name_length= table->s->table_name.length; if (!(share= (FEDERATED_SHARE *) hash_search(&federated_open_tables, (byte*) table_name, - strlen(table_name)))) + tmp_table_name_length))) { query.set_charset(system_charset_info); query.append(FEDERATED_SELECT); @@ -1328,7 +1341,7 @@ static FEDERATED_SHARE *get_share(const char *table_name, TABLE *table) query.append(FEDERATED_BTICK); query.append(FEDERATED_COMMA); } - query.length(query.length()- strlen(FEDERATED_COMMA)); + query.length(query.length()- (FEDERATED_COMMA_LEN - 1)); query.append(FEDERATED_FROM); query.append(FEDERATED_BTICK); @@ -1352,7 +1365,6 @@ static FEDERATED_SHARE *get_share(const char *table_name, TABLE *table) share->select_query= select_query; strmov(share->select_query, query.ptr()); share->use_count= 0; - share->table_name_length= strlen(share->table_name); DBUG_PRINT("info", ("share->select_query %s", share->select_query)); @@ -1466,10 +1478,11 @@ int ha_federated::open(const char *name, int mode, uint test_if_locked) } /* Since we do not support transactions at this version, we can let the client - API silently reconnect. For future versions, we will need more logic to deal - with transactions + API silently reconnect. For future versions, we will need more logic to + deal with transactions */ mysql->reconnect= 1; + DBUG_RETURN(0); } @@ -1542,6 +1555,7 @@ inline uint field_in_record_is_null(TABLE *table, DBUG_RETURN(0); } + /* write_row() inserts a row. No extra() hint is given currently if a bulk load is happeneding. buf() is a byte array of data. You can use the field @@ -1559,9 +1573,6 @@ inline uint field_in_record_is_null(TABLE *table, int ha_federated::write_row(byte *buf) { bool has_fields= FALSE; - uint all_fields_have_same_query_id= 1; - ulong current_query_id= 1; - ulong tmp_query_id= 1; char insert_buffer[FEDERATED_QUERY_BUFFER_SIZE]; char values_buffer[FEDERATED_QUERY_BUFFER_SIZE]; char insert_field_value_buffer[STRING_BUFFER_USUAL_SIZE]; @@ -1589,14 +1600,6 @@ int ha_federated::write_row(byte *buf) table->timestamp_field->set_time(); /* - get the current query id - the fields that we add to the insert - statement to send to the foreign will not be appended unless they match - this query id - */ - current_query_id= table->in_use->query_id; - DBUG_PRINT("info", ("current query id %d", current_query_id)); - - /* start both our field and field values strings */ insert_string.append(FEDERATED_INSERT); @@ -1609,21 +1612,8 @@ int ha_federated::write_row(byte *buf) values_string.append(FEDERATED_OPENPAREN); /* - Even if one field is different, all_fields_same_query_id can't remain - 0 if it remains 0, then that means no fields were specified in the query - such as in the case of INSERT INTO table VALUES (val1, val2, valN) - - */ - for (field= table->field; *field; field++) - { - if (field > table->field && tmp_query_id != (*field)->query_id) - all_fields_have_same_query_id= 0; - - tmp_query_id= (*field)->query_id; - } - /* loop through the field pointer array, add any fields to both the values - list and the fields list that match the current query id + list and the fields list that is part of the write set You might ask "Why an index variable (has_fields) ?" My answer is that we need to count how many fields we actually need @@ -1631,8 +1621,7 @@ int ha_federated::write_row(byte *buf) for (field= table->field; *field; field++) { /* if there is a query id and if it's equal to the current query id */ - if (((*field)->query_id && (*field)->query_id == current_query_id) - || all_fields_have_same_query_id) + if (ha_get_bit_in_write_set((*field)->fieldnr)) { /* There are some fields. This will be used later to determine @@ -1823,15 +1812,15 @@ int ha_federated::update_row(const byte *old_data, byte *new_data) update_string.append(FEDERATED_BTICK); update_string.append(FEDERATED_SET); -/* - In this loop, we want to match column names to values being inserted - (while building INSERT statement). + /* + In this loop, we want to match column names to values being inserted + (while building INSERT statement). - Iterate through table->field (new data) and share->old_filed (old_data) - using the same index to created an SQL UPDATE statement, new data is - used to create SET field=value and old data is used to create WHERE - field=oldvalue - */ + Iterate through table->field (new data) and share->old_filed (old_data) + using the same index to created an SQL UPDATE statement, new data is + used to create SET field=value and old data is used to create WHERE + field=oldvalue + */ for (Field **field= table->field; *field; field++) { @@ -2052,7 +2041,7 @@ int ha_federated::index_read_idx(byte *buf, uint index, const byte *key, This basically says that the record in table->record[0] is legal, and that it is ok to use this record, for whatever reason, such as with a join (without it, joins will not work) - */ + */ table->status= 0; retval= rnd_next(buf); @@ -2070,11 +2059,11 @@ error: } /* Initialized at each key walk (called multiple times unlike rnd_init()) */ -int ha_federated::index_init(uint keynr) +int ha_federated::index_init(uint keynr, bool sorted) { DBUG_ENTER("ha_federated::index_init"); DBUG_PRINT("info", - ("table: '%s' key: %d", table->s->table_name, keynr)); + ("table: '%s' key: %d", table->s->table_name.str, keynr)); active_index= keynr; DBUG_RETURN(0); } @@ -2255,6 +2244,7 @@ int ha_federated::rnd_end() DBUG_RETURN(retval); } + int ha_federated::index_end(void) { DBUG_ENTER("ha_federated::index_end"); @@ -2262,6 +2252,7 @@ int ha_federated::index_end(void) DBUG_RETURN(0); } + /* This is called for each row of the table scan. When you run out of records you should return HA_ERR_END_OF_FILE. Fill buff up with the row information. @@ -2639,4 +2630,151 @@ bool ha_federated::get_error_message(int error, String* buf) DBUG_RETURN(FALSE); } -#endif /* HAVE_FEDERATED_DB */ +int ha_federated::external_lock(THD *thd, int lock_type) +{ + int error= 0; + ha_federated *trx= (ha_federated *)thd->ha_data[federated_hton.slot]; + DBUG_ENTER("ha_federated::external_lock"); + + if (lock_type != F_UNLCK) + { + DBUG_PRINT("info",("federated not lock F_UNLCK")); + if (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) + { + DBUG_PRINT("info",("federated autocommit")); + /* + This means we are doing an autocommit + */ + error= connection_autocommit(TRUE); + if (error) + { + DBUG_PRINT("info", ("error setting autocommit TRUE: %d", error)); + DBUG_RETURN(error); + } + trans_register_ha(thd, FALSE, &federated_hton); + } + else + { + DBUG_PRINT("info",("not autocommit")); + if (!trx) + { + /* + This is where a transaction gets its start + */ + error= connection_autocommit(FALSE); + if (error) + { + DBUG_PRINT("info", ("error setting autocommit FALSE: %d", error)); + DBUG_RETURN(error); + } + thd->ha_data[federated_hton.slot]= this; + trans_register_ha(thd, TRUE, &federated_hton); + /* + Send a lock table to the remote end. + We do not support this at the moment + */ + if (thd->options & (OPTION_TABLE_LOCK)) + { + DBUG_PRINT("info", ("We do not support lock table yet")); + } + } + else + { + ha_federated *ptr; + for (ptr= trx; ptr; ptr= ptr->trx_next) + if (ptr == this) + break; + else if (!ptr->trx_next) + ptr->trx_next= this; + } + } + } + DBUG_RETURN(0); +} + + +static int federated_commit(THD *thd, bool all) +{ + int return_val= 0; + ha_federated *trx= (ha_federated *)thd->ha_data[federated_hton.slot]; + DBUG_ENTER("federated_commit"); + + if (all) + { + int error= 0; + ha_federated *ptr, *old= NULL; + for (ptr= trx; ptr; old= ptr, ptr= ptr->trx_next) + { + if (old) + old->trx_next= NULL; + error= ptr->connection_commit(); + if (error && !return_val); + return_val= error; + } + thd->ha_data[federated_hton.slot]= NULL; + } + + DBUG_PRINT("info", ("error val: %d", return_val)); + DBUG_RETURN(return_val); +} + + +static int federated_rollback(THD *thd, bool all) +{ + int return_val= 0; + ha_federated *trx= (ha_federated *)thd->ha_data[federated_hton.slot]; + DBUG_ENTER("federated_rollback"); + + if (all) + { + int error= 0; + ha_federated *ptr, *old= NULL; + for (ptr= trx; ptr; old= ptr, ptr= ptr->trx_next) + { + if (old) + old->trx_next= NULL; + error= ptr->connection_rollback(); + if (error && !return_val) + return_val= error; + } + thd->ha_data[federated_hton.slot]= NULL; + } + + DBUG_PRINT("info", ("error val: %d", return_val)); + DBUG_RETURN(return_val); +} + +int ha_federated::connection_commit() +{ + DBUG_ENTER("ha_federated::connection_commit"); + DBUG_RETURN(execute_simple_query("COMMIT", 6)); +} + + +int ha_federated::connection_rollback() +{ + DBUG_ENTER("ha_federated::connection_rollback"); + DBUG_RETURN(execute_simple_query("ROLLBACK", 8)); +} + + +int ha_federated::connection_autocommit(bool state) +{ + const char *text; + DBUG_ENTER("ha_federated::connection_autocommit"); + text= (state == true) ? "SET AUTOCOMMIT=1" : "SET AUTOCOMMIT=0"; + DBUG_RETURN(execute_simple_query(text, 16)); +} + + +int ha_federated::execute_simple_query(const char *query, int len) +{ + DBUG_ENTER("ha_federated::execute_simple_query"); + + if (mysql_real_query(mysql, query, len)) + { + DBUG_RETURN(stash_remote_error()); + } + DBUG_RETURN(0); +} + diff --git a/sql/ha_federated.h b/sql/ha_federated.h index b25071dda16..c596f066098 100644 --- a/sql/ha_federated.h +++ b/sql/ha_federated.h @@ -173,13 +173,15 @@ private: int stash_remote_error(); public: - ha_federated(TABLE *table_arg); - ~ha_federated() - { - } + ha_federated(TABLE_SHARE *table_arg); + ~ha_federated() {} /* The name that will be used for display purposes */ const char *table_type() const { return "FEDERATED"; } /* + Next pointer used in transaction + */ + ha_federated *trx_next; + /* The name of the index type that will be used for display don't implement this method unless you really have indexes */ @@ -230,8 +232,7 @@ public: */ double scan_time() { - DBUG_PRINT("info", - ("records %d", records)); + DBUG_PRINT("info", ("records %lu", (ulong) records)); return (double)(records*1000); } /* @@ -259,7 +260,7 @@ public: int write_row(byte *buf); int update_row(const byte *old_data, byte *new_data); int delete_row(const byte *buf); - int index_init(uint keynr); + int index_init(uint keynr, bool sorted); int index_read(byte *buf, const byte *key, uint key_len, enum ha_rkey_function find_flag); int index_read_idx(byte *buf, uint idx, const byte *key, @@ -298,7 +299,14 @@ public: THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to, enum thr_lock_type lock_type); //required virtual bool get_error_message(int error, String *buf); + int external_lock(THD *thd, int lock_type); + int connection_commit(); + int connection_rollback(); + bool has_transactions() { return 1; } + int connection_autocommit(bool state); + int execute_simple_query(const char *query, int len); }; bool federated_db_init(void); -bool federated_db_end(void); +int federated_db_end(ha_panic_function type); + diff --git a/sql/ha_heap.cc b/sql/ha_heap.cc index 98cc96db707..a83a95ac863 100644 --- a/sql/ha_heap.cc +++ b/sql/ha_heap.cc @@ -23,7 +23,11 @@ #include <myisampack.h> #include "ha_heap.h" + +static handler *heap_create_handler(TABLE_SHARE *table); + handlerton heap_hton= { + MYSQL_HANDLERTON_INTERFACE_VERSION, "MEMORY", SHOW_OPTION_YES, "Hash based, stored in memory, useful for temporary tables", @@ -44,14 +48,26 @@ handlerton heap_hton= { NULL, /* create_cursor_read_view */ NULL, /* set_cursor_read_view */ NULL, /* close_cursor_read_view */ + heap_create_handler, /* Create a new handler */ + NULL, /* Drop a database */ + heap_panic, /* Panic call */ + NULL, /* Start Consistent Snapshot */ + NULL, /* Flush logs */ + NULL, /* Show status */ HTON_CAN_RECREATE }; +static handler *heap_create_handler(TABLE_SHARE *table) +{ + return new ha_heap(table); +} + + /***************************************************************************** ** HEAP tables *****************************************************************************/ -ha_heap::ha_heap(TABLE *table_arg) +ha_heap::ha_heap(TABLE_SHARE *table_arg) :handler(&heap_hton, table_arg), file(0), records_changed(0), key_stats_ok(0) {} @@ -472,11 +488,18 @@ THR_LOCK_DATA **ha_heap::store_lock(THD *thd, int ha_heap::delete_table(const char *name) { char buff[FN_REFLEN]; - int error= heap_delete_table(fn_format(buff,name,"","", - MY_REPLACE_EXT|MY_UNPACK_FILENAME)); + int error= heap_delete_table(name); return error == ENOENT ? 0 : error; } + +void ha_heap::drop_table(const char *name) +{ + heap_drop_table(file); + close(); +} + + int ha_heap::rename_table(const char * from, const char * to) { return heap_rename(from,to); @@ -511,7 +534,6 @@ int ha_heap::create(const char *name, TABLE *table_arg, ha_rows max_rows; HP_KEYDEF *keydef; HA_KEYSEG *seg; - char buff[FN_REFLEN]; int error; TABLE_SHARE *share= table_arg->s; bool found_real_auto_increment= 0; @@ -592,7 +614,7 @@ int ha_heap::create(const char *name, TABLE *table_arg, } } mem_per_row+= MY_ALIGN(share->reclength + 1, sizeof(char*)); - max_rows = (ha_rows) (table->in_use->variables.max_heap_table_size / + max_rows = (ha_rows) (table_arg->in_use->variables.max_heap_table_size / mem_per_row); if (table_arg->found_next_number_field) { @@ -607,8 +629,7 @@ int ha_heap::create(const char *name, TABLE *table_arg, hp_create_info.max_table_size=current_thd->variables.max_heap_table_size; hp_create_info.with_auto_increment= found_real_auto_increment; max_rows = (ha_rows) (hp_create_info.max_table_size / mem_per_row); - error= heap_create(fn_format(buff,name,"","", - MY_REPLACE_EXT|MY_UNPACK_FILENAME), + error= heap_create(name, keys, keydef, share->reclength, (ulong) ((share->max_rows < max_rows && share->max_rows) ? @@ -633,3 +654,15 @@ ulonglong ha_heap::get_auto_increment() ha_heap::info(HA_STATUS_AUTO); return auto_increment_value; } + + +bool ha_heap::check_if_incompatible_data(HA_CREATE_INFO *info, + uint table_changes) +{ + /* Check that auto_increment value was not changed */ + if ((table_changes != IS_EQUAL_YES && + info->used_fields & HA_CREATE_USED_AUTO) && + info->auto_increment_value != 0) + return COMPATIBLE_DATA_NO; + return COMPATIBLE_DATA_YES; +} diff --git a/sql/ha_heap.h b/sql/ha_heap.h index 7c4227e952c..909b36f975b 100644 --- a/sql/ha_heap.h +++ b/sql/ha_heap.h @@ -31,7 +31,7 @@ class ha_heap: public handler uint records_changed; bool key_stats_ok; public: - ha_heap(TABLE *table); + ha_heap(TABLE_SHARE *table); ~ha_heap() {} const char *table_type() const { @@ -40,8 +40,8 @@ public: } const char *index_type(uint inx) { - return ((table->key_info[inx].algorithm == HA_KEY_ALG_BTREE) ? "BTREE" : - "HASH"); + return ((table_share->key_info[inx].algorithm == HA_KEY_ALG_BTREE) ? + "BTREE" : "HASH"); } /* Rows also use a fixed-size format */ enum row_type get_row_type() const { return ROW_TYPE_FIXED; } @@ -54,7 +54,7 @@ public: } ulong index_flags(uint inx, uint part, bool all_parts) const { - return ((table->key_info[inx].algorithm == HA_KEY_ALG_BTREE) ? + return ((table_share->key_info[inx].algorithm == HA_KEY_ALG_BTREE) ? HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER | HA_READ_RANGE : HA_ONLY_WHOLE_INDEX); } @@ -94,6 +94,7 @@ public: int indexes_are_disabled(void); ha_rows records_in_range(uint inx, key_range *min_key, key_range *max_key); int delete_table(const char *from); + void drop_table(const char *name); int rename_table(const char * from, const char * to); int create(const char *name, TABLE *form, HA_CREATE_INFO *create_info); void update_create_info(HA_CREATE_INFO *create_info); @@ -106,6 +107,7 @@ public: HEAP_PTR ptr2=*(HEAP_PTR*)ref2; return ptr1 < ptr2? -1 : (ptr1 > ptr2? 1 : 0); } + bool check_if_incompatible_data(HA_CREATE_INFO *info, uint table_changes); private: void update_key_stats(); }; diff --git a/sql/ha_innodb.cc b/sql/ha_innodb.cc index 744bd242fca..27262e6f197 100644 --- a/sql/ha_innodb.cc +++ b/sql/ha_innodb.cc @@ -34,7 +34,6 @@ have disables the InnoDB inlining in this file. */ #include "mysql_priv.h" #include "slave.h" -#ifdef HAVE_INNOBASE_DB #include <m_ctype.h> #include <hash.h> #include <myisampack.h> @@ -111,28 +110,28 @@ typedef byte mysql_byte; /* Include necessary InnoDB headers */ extern "C" { -#include "../innobase/include/univ.i" -#include "../innobase/include/os0file.h" -#include "../innobase/include/os0thread.h" -#include "../innobase/include/srv0start.h" -#include "../innobase/include/srv0srv.h" -#include "../innobase/include/trx0roll.h" -#include "../innobase/include/trx0trx.h" -#include "../innobase/include/trx0sys.h" -#include "../innobase/include/mtr0mtr.h" -#include "../innobase/include/row0ins.h" -#include "../innobase/include/row0mysql.h" -#include "../innobase/include/row0sel.h" -#include "../innobase/include/row0upd.h" -#include "../innobase/include/log0log.h" -#include "../innobase/include/lock0lock.h" -#include "../innobase/include/dict0crea.h" -#include "../innobase/include/btr0cur.h" -#include "../innobase/include/btr0btr.h" -#include "../innobase/include/fsp0fsp.h" -#include "../innobase/include/sync0sync.h" -#include "../innobase/include/fil0fil.h" -#include "../innobase/include/trx0xa.h" +#include "../storage/innobase/include/univ.i" +#include "../storage/innobase/include/os0file.h" +#include "../storage/innobase/include/os0thread.h" +#include "../storage/innobase/include/srv0start.h" +#include "../storage/innobase/include/srv0srv.h" +#include "../storage/innobase/include/trx0roll.h" +#include "../storage/innobase/include/trx0trx.h" +#include "../storage/innobase/include/trx0sys.h" +#include "../storage/innobase/include/mtr0mtr.h" +#include "../storage/innobase/include/row0ins.h" +#include "../storage/innobase/include/row0mysql.h" +#include "../storage/innobase/include/row0sel.h" +#include "../storage/innobase/include/row0upd.h" +#include "../storage/innobase/include/log0log.h" +#include "../storage/innobase/include/lock0lock.h" +#include "../storage/innobase/include/dict0crea.h" +#include "../storage/innobase/include/btr0cur.h" +#include "../storage/innobase/include/btr0btr.h" +#include "../storage/innobase/include/fsp0fsp.h" +#include "../storage/innobase/include/sync0sync.h" +#include "../storage/innobase/include/fil0fil.h" +#include "../storage/innobase/include/trx0xa.h" } #define HA_INNOBASE_ROWS_IN_TABLE 10000 /* to get optimization right */ @@ -206,8 +205,10 @@ static int innobase_rollback(THD* thd, bool all); static int innobase_rollback_to_savepoint(THD* thd, void *savepoint); static int innobase_savepoint(THD* thd, void *savepoint); static int innobase_release_savepoint(THD* thd, void *savepoint); +static handler *innobase_create_handler(TABLE_SHARE *table); handlerton innobase_hton = { + MYSQL_HANDLERTON_INTERFACE_VERSION, "InnoDB", SHOW_OPTION_YES, "Supports transactions, row-level locking, and foreign keys", @@ -228,9 +229,22 @@ handlerton innobase_hton = { innobase_create_cursor_view, innobase_set_cursor_view, innobase_close_cursor_view, + innobase_create_handler, /* Create a new handler */ + innobase_drop_database, /* Drop a database */ + innobase_end, /* Panic call */ + innobase_start_trx_and_assign_read_view, /* Start Consistent Snapshot */ + innobase_flush_logs, /* Flush logs */ + innobase_show_status, /* Show status */ HTON_NO_FLAGS }; + +static handler *innobase_create_handler(TABLE_SHARE *table) +{ + return new ha_innobase(table); +} + + /********************************************************************* Commits a transaction in an InnoDB database. */ @@ -391,7 +405,7 @@ Call this function when mysqld passes control to the client. That is to avoid deadlocks on the adaptive hash S-latch possibly held by thd. For more documentation, see handler.cc. */ -void +int innobase_release_temporary_latches( /*===============================*/ THD *thd) @@ -400,7 +414,7 @@ innobase_release_temporary_latches( if (!innodb_inited) { - return; + return 0; } trx = (trx_t*) thd->ha_data[innobase_hton.slot]; @@ -408,6 +422,7 @@ innobase_release_temporary_latches( if (trx) { innobase_release_stat_resources(trx); } + return 0; } /************************************************************************ @@ -805,13 +820,14 @@ check_trx_exists( /************************************************************************* Construct ha_innobase handler. */ -ha_innobase::ha_innobase(TABLE *table_arg) +ha_innobase::ha_innobase(TABLE_SHARE *table_arg) :handler(&innobase_hton, table_arg), int_table_flags(HA_REC_NOT_IN_SEQ | HA_NULL_IN_KEY | HA_CAN_INDEX_BLOBS | HA_CAN_SQL_HANDLER | HA_NOT_EXACT_COUNT | + HA_PRIMARY_KEY_ALLOW_RANDOM_ACCESS | HA_PRIMARY_KEY_IN_READ_INDEX | HA_CAN_GEOMETRY | HA_TABLE_SCAN_ON_INDEX), @@ -1450,8 +1466,8 @@ error: /*********************************************************************** Closes an InnoDB database. */ -bool -innobase_end(void) +int +innobase_end(ha_panic_function type) /*==============*/ /* out: TRUE if error */ { @@ -1956,6 +1972,11 @@ innobase_repl_report_sent_binlog( int cmp; ibool can_release_threads = 0; + if (!innodb_inited) { + + return 0; + } + /* If synchronous replication is not switched on, or this thd is sending binlog to a slave where we do not need synchronous replication, then return immediately */ @@ -3032,6 +3053,9 @@ ha_innobase::store_key_val_for_row( continue; } + /* In a column prefix index, we may need to truncate + the stored value: */ + cs = key_part->field->charset(); src_start = record + key_part->offset; @@ -3048,7 +3072,11 @@ ha_innobase::store_key_val_for_row( memcpy(buff, src_start, len); buff+=len; - /* Pad the unused space with spaces */ + /* Pad the unused space with spaces. Note that no + padding is ever needed for UCS-2 because in MySQL, + all UCS2 characters are 2 bytes, as MySQL does not + support surrogate pairs, which are needed to represent + characters in the range U+10000 to U+10FFFF. */ if (len < key_part->length) { len = key_part->length - len; @@ -3183,7 +3211,8 @@ build_template( goto include_field; } - if (thd->query_id == field->query_id) { + if (table->file->ha_get_bit_in_read_set(i+1) || + table->file->ha_get_bit_in_write_set(i+1)) { /* This field is needed in the query */ goto include_field; @@ -3770,9 +3799,9 @@ ha_innobase::delete_row( } /************************************************************************** -Removes a new lock set on a row. This can be called after a row has been read -in the processing of an UPDATE or a DELETE query, if the option -innodb_locks_unsafe_for_binlog is set. */ +Removes a new lock set on a row, if it was not read optimistically. This can +be called after a row has been read in the processing of an UPDATE or a DELETE +query, if the option innodb_locks_unsafe_for_binlog is set. */ void ha_innobase::unlock_row(void) @@ -3782,7 +3811,7 @@ ha_innobase::unlock_row(void) DBUG_ENTER("ha_innobase::unlock_row"); - if (last_query_id != user_thd->query_id) { + if (UNIV_UNLIKELY(last_query_id != user_thd->query_id)) { ut_print_timestamp(stderr); sql_print_error("last_query_id is %lu != user_thd_query_id is " "%lu", (ulong) last_query_id, @@ -3790,9 +3819,45 @@ ha_innobase::unlock_row(void) mem_analyze_corruption((byte *) prebuilt->trx); ut_error; } - - if (srv_locks_unsafe_for_binlog) { + + switch (prebuilt->row_read_type) { + case ROW_READ_WITH_LOCKS: + if (!srv_locks_unsafe_for_binlog) { + break; + } + /* fall through */ + case ROW_READ_TRY_SEMI_CONSISTENT: row_unlock_for_mysql(prebuilt, FALSE); + break; + case ROW_READ_DID_SEMI_CONSISTENT: + prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT; + break; + } + + DBUG_VOID_RETURN; +} + +/* See handler.h and row0mysql.h for docs on this function. */ +bool +ha_innobase::was_semi_consistent_read(void) +/*=======================================*/ +{ + row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; + + return(prebuilt->row_read_type == ROW_READ_DID_SEMI_CONSISTENT); +} + +/* See handler.h and row0mysql.h for docs on this function. */ +void +ha_innobase::try_semi_consistent_read(bool yes) +/*===========================================*/ +{ + row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; + + if (yes && srv_locks_unsafe_for_binlog) { + prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT; + } else { + prebuilt->row_read_type = ROW_READ_WITH_LOCKS; } } @@ -3803,7 +3868,8 @@ int ha_innobase::index_init( /*====================*/ /* out: 0 or error number */ - uint keynr) /* in: key (index) number */ + uint keynr, /* in: key (index) number */ + bool sorted) /* in: 1 if result MUST be sorted according to index */ { int error = 0; DBUG_ENTER("index_init"); @@ -4306,6 +4372,13 @@ ha_innobase::rnd_init( err = change_active_index(primary_key); } + /* Don't use semi-consistent read in random row reads (by position). + This means we must disable semi_consistent_read if scan is false */ + + if (!scan) { + try_semi_consistent_read(0); + } + start_of_scan = 1; return(err); @@ -4784,7 +4857,7 @@ ha_innobase::create( srv_lower_case_table_names = FALSE; } - fn_format(name2, name, "", "", 2); // Remove the .frm extension + strcpy(name2, name); normalize_table_name(norm_name, name2); @@ -4806,8 +4879,8 @@ ha_innobase::create( /* Look for a primary key */ - primary_key_no= (table->s->primary_key != MAX_KEY ? - (int) table->s->primary_key : + primary_key_no= (form->s->primary_key != MAX_KEY ? + (int) form->s->primary_key : -1); /* Our function row_get_mysql_key_number_for_index assumes @@ -5080,7 +5153,7 @@ ha_innobase::delete_table( /********************************************************************* Removes all tables in the named database inside InnoDB. */ -int +void innobase_drop_database( /*===================*/ /* out: error number */ @@ -5146,10 +5219,13 @@ innobase_drop_database( innobase_commit_low(trx); trx_free_for_mysql(trx); - +#ifdef NO_LONGER_INTERESTED_IN_DROP_DB_ERROR error = convert_error_code_to_mysql(error, NULL); return(error); +#else + return; +#endif } /************************************************************************* @@ -5742,7 +5818,6 @@ ha_innobase::update_table_comment( uint length = (uint) strlen(comment); char* str; row_prebuilt_t* prebuilt = (row_prebuilt_t*)innobase_prebuilt; - long flen; /* We do not know if MySQL can call this function before calling external_lock(). To be safe, update the thd of the current table @@ -5762,42 +5837,42 @@ ha_innobase::update_table_comment( trx_search_latch_release_if_reserved(prebuilt->trx); str = NULL; - /* output the data to a temporary file */ - - mutex_enter_noninline(&srv_dict_tmpfile_mutex); - rewind(srv_dict_tmpfile); + if (FILE* file = os_file_create_tmpfile()) { + long flen; - fprintf(srv_dict_tmpfile, "InnoDB free: %lu kB", + /* output the data to a temporary file */ + fprintf(file, "InnoDB free: %lu kB", (ulong) fsp_get_available_space_in_free_extents( prebuilt->table->space)); - dict_print_info_on_foreign_keys(FALSE, srv_dict_tmpfile, + dict_print_info_on_foreign_keys(FALSE, file, prebuilt->trx, prebuilt->table); - flen = ftell(srv_dict_tmpfile); - if (flen < 0) { - flen = 0; - } else if (length + flen + 3 > 64000) { - flen = 64000 - 3 - length; - } + flen = ftell(file); + if (flen < 0) { + flen = 0; + } else if (length + flen + 3 > 64000) { + flen = 64000 - 3 - length; + } - /* allocate buffer for the full string, and - read the contents of the temporary file */ + /* allocate buffer for the full string, and + read the contents of the temporary file */ - str = my_malloc(length + flen + 3, MYF(0)); + str = my_malloc(length + flen + 3, MYF(0)); - if (str) { - char* pos = str + length; - if (length) { - memcpy(str, comment, length); - *pos++ = ';'; - *pos++ = ' '; + if (str) { + char* pos = str + length; + if (length) { + memcpy(str, comment, length); + *pos++ = ';'; + *pos++ = ' '; + } + rewind(file); + flen = (uint) fread(pos, 1, flen, file); + pos[flen] = 0; } - rewind(srv_dict_tmpfile); - flen = (uint) fread(pos, 1, flen, srv_dict_tmpfile); - pos[flen] = 0; - } - mutex_exit_noninline(&srv_dict_tmpfile_mutex); + fclose(file); + } prebuilt->trx->op_info = (char*)""; @@ -5816,7 +5891,6 @@ ha_innobase::get_foreign_key_create_info(void) { row_prebuilt_t* prebuilt = (row_prebuilt_t*)innobase_prebuilt; char* str = 0; - long flen; ut_a(prebuilt != NULL); @@ -5826,41 +5900,46 @@ ha_innobase::get_foreign_key_create_info(void) update_thd(current_thd); - prebuilt->trx->op_info = (char*)"getting info on foreign keys"; + if (FILE* file = os_file_create_tmpfile()) { + long flen; - /* In case MySQL calls this in the middle of a SELECT query, - release possible adaptive hash latch to avoid - deadlocks of threads */ + prebuilt->trx->op_info = (char*)"getting info on foreign keys"; - trx_search_latch_release_if_reserved(prebuilt->trx); + /* In case MySQL calls this in the middle of a SELECT query, + release possible adaptive hash latch to avoid + deadlocks of threads */ - mutex_enter_noninline(&srv_dict_tmpfile_mutex); - rewind(srv_dict_tmpfile); + trx_search_latch_release_if_reserved(prebuilt->trx); - /* output the data to a temporary file */ - dict_print_info_on_foreign_keys(TRUE, srv_dict_tmpfile, + /* output the data to a temporary file */ + dict_print_info_on_foreign_keys(TRUE, file, prebuilt->trx, prebuilt->table); - prebuilt->trx->op_info = (char*)""; + prebuilt->trx->op_info = (char*)""; - flen = ftell(srv_dict_tmpfile); - if (flen < 0) { - flen = 0; - } else if (flen > 64000 - 1) { - flen = 64000 - 1; - } + flen = ftell(file); + if (flen < 0) { + flen = 0; + } else if (flen > 64000 - 1) { + flen = 64000 - 1; + } - /* allocate buffer for the string, and - read the contents of the temporary file */ + /* allocate buffer for the string, and + read the contents of the temporary file */ - str = my_malloc(flen + 1, MYF(0)); + str = my_malloc(flen + 1, MYF(0)); - if (str) { - rewind(srv_dict_tmpfile); - flen = (uint) fread(str, 1, flen, srv_dict_tmpfile); - str[flen] = 0; - } + if (str) { + rewind(file); + flen = (uint) fread(str, 1, flen, file); + str[flen] = 0; + } - mutex_exit_noninline(&srv_dict_tmpfile_mutex); + fclose(file); + } else { + /* unable to create temporary file */ + str = my_strdup( +"/* Error: cannot display foreign key constraints */", MYF(0)); + } return(str); } @@ -6451,11 +6530,13 @@ ha_innobase::transactional_table_lock( /**************************************************************************** Here we export InnoDB status variables to MySQL. */ -void -innodb_export_status(void) +int +innodb_export_status() /*======================*/ { - srv_export_innodb_status(); + if (innodb_inited) + srv_export_innodb_status(); + return 0; } /**************************************************************************** @@ -6465,9 +6546,9 @@ Monitor to the client. */ bool innodb_show_status( /*===============*/ - THD* thd) /* in: the MySQL query thread of the caller */ + THD* thd, /* in: the MySQL query thread of the caller */ + stat_print_fn *stat_print) { - Protocol* protocol = thd->protocol; trx_t* trx; static const char truncated_msg[] = "... truncated...\n"; const long MAX_STATUS_SIZE = 64000; @@ -6477,10 +6558,7 @@ innodb_show_status( DBUG_ENTER("innodb_show_status"); if (have_innodb != SHOW_OPTION_YES) { - my_message(ER_NOT_SUPPORTED_YET, - "Cannot call SHOW INNODB STATUS because skip-innodb is defined", - MYF(0)); - DBUG_RETURN(TRUE); + DBUG_RETURN(FALSE); } trx = check_trx_exists(thd); @@ -6542,28 +6620,15 @@ innodb_show_status( mutex_exit_noninline(&srv_monitor_file_mutex); - List<Item> field_list; + bool result = FALSE; - field_list.push_back(new Item_empty_string("Status", flen)); - - if (protocol->send_fields(&field_list, Protocol::SEND_NUM_ROWS | - Protocol::SEND_EOF)) { - my_free(str, MYF(0)); - - DBUG_RETURN(TRUE); - } - - protocol->prepare_for_resend(); - protocol->store(str, flen, system_charset_info); - my_free(str, MYF(0)); - - if (protocol->write()) { - - DBUG_RETURN(TRUE); + if (stat_print(thd, innobase_hton.name, strlen(innobase_hton.name), + STRING_WITH_LEN(""), str, flen)) { + result= TRUE; } - send_eof(thd); + my_free(str, MYF(0)); - DBUG_RETURN(FALSE); + DBUG_RETURN(FALSE); } /**************************************************************************** @@ -6572,10 +6637,10 @@ Implements the SHOW MUTEX STATUS command. . */ bool innodb_mutex_show_status( /*===============*/ - THD* thd) /* in: the MySQL query thread of the caller */ + THD* thd, /* in: the MySQL query thread of the caller */ + stat_print_fn *stat_print) { - Protocol *protocol= thd->protocol; - List<Item> field_list; + char buf1[IO_SIZE], buf2[IO_SIZE]; mutex_t* mutex; ulint rw_lock_count= 0; ulint rw_lock_count_spin_loop= 0; @@ -6583,21 +6648,9 @@ innodb_mutex_show_status( ulint rw_lock_count_os_wait= 0; ulint rw_lock_count_os_yield= 0; ulonglong rw_lock_wait_time= 0; + uint hton_name_len= strlen(innobase_hton.name), buf1len, buf2len; DBUG_ENTER("innodb_mutex_show_status"); - field_list.push_back(new Item_empty_string("Mutex", FN_REFLEN)); - field_list.push_back(new Item_empty_string("Module", FN_REFLEN)); - field_list.push_back(new Item_uint("Count", 21)); - field_list.push_back(new Item_uint("Spin_waits", 21)); - field_list.push_back(new Item_uint("Spin_rounds", 21)); - field_list.push_back(new Item_uint("OS_waits", 21)); - field_list.push_back(new Item_uint("OS_yields", 21)); - field_list.push_back(new Item_uint("OS_waits_time", 21)); - - if (protocol->send_fields(&field_list, - Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF)) - DBUG_RETURN(TRUE); - #ifdef MUTEX_PROTECT_TO_BE_ADDED_LATER mutex_enter(&mutex_list_mutex); #endif @@ -6610,17 +6663,17 @@ innodb_mutex_show_status( { if (mutex->count_using > 0) { - protocol->prepare_for_resend(); - protocol->store(mutex->cmutex_name, system_charset_info); - protocol->store(mutex->cfile_name, system_charset_info); - protocol->store((ulonglong)mutex->count_using); - protocol->store((ulonglong)mutex->count_spin_loop); - protocol->store((ulonglong)mutex->count_spin_rounds); - protocol->store((ulonglong)mutex->count_os_wait); - protocol->store((ulonglong)mutex->count_os_yield); - protocol->store((ulonglong)mutex->lspent_time/1000); - - if (protocol->write()) + buf1len= my_snprintf(buf1, sizeof(buf1), "%s:%s", + mutex->cmutex_name, mutex->cfile_name); + buf2len= my_snprintf(buf2, sizeof(buf2), + "count=%lu, spin_waits=%lu, spin_rounds=%lu, " + "os_waits=%lu, os_yields=%lu, os_wait_times=%lu", + mutex->count_using, mutex->count_spin_loop, + mutex->count_spin_rounds, + mutex->count_os_wait, mutex->count_os_yield, + mutex->lspent_time/1000); + if (stat_print(thd, innobase_hton.name, hton_name_len, + buf1, buf1len, buf2, buf2len)) { #ifdef MUTEX_PROTECT_TO_BE_ADDED_LATER mutex_exit(&mutex_list_mutex); @@ -6642,17 +6695,16 @@ innodb_mutex_show_status( mutex = UT_LIST_GET_NEXT(list, mutex); } - protocol->prepare_for_resend(); - protocol->store("rw_lock_mutexes", system_charset_info); - protocol->store("", system_charset_info); - protocol->store((ulonglong)rw_lock_count); - protocol->store((ulonglong)rw_lock_count_spin_loop); - protocol->store((ulonglong)rw_lock_count_spin_rounds); - protocol->store((ulonglong)rw_lock_count_os_wait); - protocol->store((ulonglong)rw_lock_count_os_yield); - protocol->store((ulonglong)rw_lock_wait_time/1000); - - if (protocol->write()) + buf2len= my_snprintf(buf2, sizeof(buf2), + "count=%lu, spin_waits=%lu, spin_rounds=%lu, " + "os_waits=%lu, os_yields=%lu, os_wait_times=%lu", + rw_lock_count, rw_lock_count_spin_loop, + rw_lock_count_spin_rounds, + rw_lock_count_os_wait, rw_lock_count_os_yield, + rw_lock_wait_time/1000); + + if (stat_print(thd, innobase_hton.name, hton_name_len, + STRING_WITH_LEN("rw_lock_mutexes"), buf2, buf2len)) { DBUG_RETURN(1); } @@ -6660,10 +6712,23 @@ innodb_mutex_show_status( #ifdef MUTEX_PROTECT_TO_BE_ADDED_LATER mutex_exit(&mutex_list_mutex); #endif - send_eof(thd); DBUG_RETURN(FALSE); } +bool innobase_show_status(THD* thd, stat_print_fn* stat_print, + enum ha_stat_type stat_type) +{ + switch (stat_type) { + case HA_ENGINE_STATUS: + return innodb_show_status(thd, stat_print); + case HA_ENGINE_MUTEX: + return innodb_mutex_show_status(thd, stat_print); + default: + return FALSE; + } +} + + /**************************************************************************** Handling the shared INNOBASE_SHARE structure that is needed to provide table locking. @@ -6932,7 +6997,7 @@ ha_innobase::innobase_read_and_init_auto_inc( } (void) extra(HA_EXTRA_KEYREAD); - index_init(table->s->next_number_index); + index_init(table->s->next_number_index, 1); /* Starting from 5.0.9, we use a consistent read to read the auto-inc column maximum value. This eliminates the spurious deadlocks caused @@ -7476,4 +7541,23 @@ innobase_set_cursor_view( (cursor_view_t*) curview); } -#endif /* HAVE_INNOBASE_DB */ + +bool ha_innobase::check_if_incompatible_data(HA_CREATE_INFO *info, + uint table_changes) +{ + if (table_changes != IS_EQUAL_YES) + return COMPATIBLE_DATA_NO; + + /* Check that auto_increment value was not changed */ + if ((info->used_fields & HA_CREATE_USED_AUTO) && + info->auto_increment_value != 0) + return COMPATIBLE_DATA_NO; + + /* Check that row format didn't change */ + if ((info->used_fields & HA_CREATE_USED_AUTO) && + get_row_type() != info->row_type) + return COMPATIBLE_DATA_NO; + + return COMPATIBLE_DATA_YES; +} + diff --git a/sql/ha_innodb.h b/sql/ha_innodb.h index 58051624f89..fd0d3aa7e8c 100644 --- a/sql/ha_innodb.h +++ b/sql/ha_innodb.h @@ -81,7 +81,7 @@ class ha_innobase: public handler /* Init values for the class: */ public: - ha_innobase(TABLE *table_arg); + ha_innobase(TABLE_SHARE *table_arg); ~ha_innobase() {} /* Get the row type from the storage engine. If this method returns @@ -122,9 +122,11 @@ class ha_innobase: public handler int write_row(byte * buf); int update_row(const byte * old_data, byte * new_data); int delete_row(const byte * buf); + bool was_semi_consistent_read(); + void try_semi_consistent_read(bool yes); void unlock_row(); - int index_init(uint index); + int index_init(uint index, bool sorted); int index_end(); int index_read(byte * buf, const byte * key, uint key_len, enum ha_rkey_function find_flag); @@ -152,6 +154,16 @@ class ha_innobase: public handler int transactional_table_lock(THD *thd, int lock_type); int start_stmt(THD *thd, thr_lock_type lock_type); + int ha_retrieve_all_cols() + { + ha_set_all_bits_in_read_set(); + return extra(HA_EXTRA_RETRIEVE_ALL_COLS); + } + int ha_retrieve_all_pk() + { + ha_set_primary_key_in_read_set(); + return extra(HA_EXTRA_RETRIEVE_PRIMARY_KEY); + } void position(byte *record); ha_rows records_in_range(uint inx, key_range *min_key, key_range *max_key); @@ -196,6 +208,8 @@ class ha_innobase: public handler static ulonglong get_mysql_bin_log_pos(); bool primary_key_is_clustered() { return true; } int cmp_ref(const byte *ref1, const byte *ref2); + bool check_if_incompatible_data(HA_CREATE_INFO *info, + uint table_changes); }; extern struct show_var_st innodb_status_variables[]; @@ -243,7 +257,7 @@ extern ulong srv_commit_concurrency; extern TYPELIB innobase_lock_typelib; bool innobase_init(void); -bool innobase_end(void); +int innobase_end(ha_panic_function type); bool innobase_flush_logs(void); uint innobase_get_free_space(void); @@ -261,12 +275,11 @@ int innobase_commit_complete(void* trx_handle); void innobase_store_binlog_offset_and_flush_log(char *binlog_name,longlong offset); #endif -int innobase_drop_database(char *path); -bool innodb_show_status(THD* thd); -bool innodb_mutex_show_status(THD* thd); -void innodb_export_status(void); +void innobase_drop_database(char *path); +bool innobase_show_status(THD* thd, stat_print_fn*, enum ha_stat_type); +int innodb_export_status(void); -void innobase_release_temporary_latches(THD *thd); +int innobase_release_temporary_latches(THD *thd); void innobase_store_binlog_offset_and_flush_log(char *binlog_name,longlong offset); diff --git a/sql/ha_myisam.cc b/sql/ha_myisam.cc index 08fd2d9a8e3..41000564e53 100644 --- a/sql/ha_myisam.cc +++ b/sql/ha_myisam.cc @@ -27,8 +27,8 @@ #ifndef MASTER #include "../srclib/myisam/myisamdef.h" #else -#include "../myisam/myisamdef.h" -#include "../myisam/rt_index.h" +#include "../storage/myisam/myisamdef.h" +#include "../storage/myisam/rt_index.h" #endif ulong myisam_recover_options= HA_RECOVER_NONE; @@ -50,9 +50,12 @@ TYPELIB myisam_stats_method_typelib= { ** MyISAM tables *****************************************************************************/ +static handler *myisam_create_handler(TABLE_SHARE *table); + /* MyISAM handlerton */ handlerton myisam_hton= { + MYSQL_HANDLERTON_INTERFACE_VERSION, "MyISAM", SHOW_OPTION_YES, "Default engine as of MySQL 3.23 with great performance", @@ -77,9 +80,22 @@ handlerton myisam_hton= { MyISAM doesn't support transactions and doesn't have transaction-dependent context: cursors can survive a commit. */ + myisam_create_handler, /* Create a new handler */ + NULL, /* Drop a database */ + mi_panic,/* Panic call */ + NULL, /* Start Consistent Snapshot */ + NULL, /* Flush logs */ + NULL, /* Show status */ HTON_CAN_RECREATE }; + +static handler *myisam_create_handler(TABLE_SHARE *table) +{ + return new ha_myisam(table); +} + + // collect errors printed by mi_check routines static void mi_check_print_msg(MI_CHECK *param, const char* msg_type, @@ -160,7 +176,7 @@ void mi_check_print_warning(MI_CHECK *param, const char *fmt,...) } -ha_myisam::ha_myisam(TABLE *table_arg) +ha_myisam::ha_myisam(TABLE_SHARE *table_arg) :handler(&myisam_hton, table_arg), file(0), int_table_flags(HA_NULL_IN_KEY | HA_CAN_FULLTEXT | HA_CAN_SQL_HANDLER | HA_DUPP_POS | HA_CAN_INDEX_BLOBS | HA_AUTO_PART_KEY | @@ -280,11 +296,16 @@ err: int ha_myisam::open(const char *name, int mode, uint test_if_locked) { - if (!(file=mi_open(name, mode, test_if_locked))) + uint i; + if (!(file=mi_open(name, mode, test_if_locked | HA_OPEN_FROM_SQL_LAYER))) return (my_errno ? my_errno : -1); if (test_if_locked & (HA_OPEN_IGNORE_IF_LOCKED | HA_OPEN_TMP_TABLE)) VOID(mi_extra(file, HA_EXTRA_NO_WAIT_LOCK, 0)); + + if (!(test_if_locked & HA_OPEN_TMP_TABLE) && opt_myisam_use_mmap) + VOID(mi_extra(file, HA_EXTRA_MMAP, 0)); + info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST); if (!(test_if_locked & HA_OPEN_WAIT_IF_LOCKED)) VOID(mi_extra(file, HA_EXTRA_WAIT_LOCK, 0)); @@ -292,6 +313,14 @@ int ha_myisam::open(const char *name, int mode, uint test_if_locked) int_table_flags|=HA_REC_NOT_IN_SEQ; if (file->s->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD)) int_table_flags|=HA_HAS_CHECKSUM; + + for (i= 0; i < table->s->keys; i++) + { + struct st_plugin_int *parser= table->key_info[i].parser; + if (table->key_info[i].flags & HA_USES_PARSER) + file->s->keyinfo[i].parser= + (struct st_mysql_ftparser *)parser->plugin->info; + } return (0); } @@ -331,7 +360,7 @@ int ha_myisam::check(THD* thd, HA_CHECK_OPT* check_opt) myisamchk_init(¶m); param.thd = thd; param.op_name = "check"; - param.db_name= table->s->db; + param.db_name= table->s->db.str; param.table_name= table->alias; param.testflag = check_opt->flags | T_CHECK | T_SILENT; param.stats_method= (enum_mi_stats_method)thd->variables.myisam_stats_method; @@ -419,7 +448,7 @@ int ha_myisam::analyze(THD *thd, HA_CHECK_OPT* check_opt) myisamchk_init(¶m); param.thd = thd; param.op_name= "analyze"; - param.db_name= table->s->db; + param.db_name= table->s->db.str; param.table_name= table->alias; param.testflag= (T_FAST | T_CHECK | T_SILENT | T_STATISTICS | T_DONT_CHECK_CHECKSUM); @@ -447,7 +476,7 @@ int ha_myisam::restore(THD* thd, HA_CHECK_OPT *check_opt) HA_CHECK_OPT tmp_check_opt; char *backup_dir= thd->lex->backup_dir; char src_path[FN_REFLEN], dst_path[FN_REFLEN]; - const char *table_name= table->s->table_name; + const char *table_name= table->s->table_name.str; int error; const char* errmsg; DBUG_ENTER("restore"); @@ -456,8 +485,8 @@ int ha_myisam::restore(THD* thd, HA_CHECK_OPT *check_opt) MI_NAME_DEXT)) DBUG_RETURN(HA_ADMIN_INVALID); - if (my_copy(src_path, fn_format(dst_path, table->s->path, "", - MI_NAME_DEXT, 4), MYF(MY_WME))) + strxmov(dst_path, table->s->normalized_path.str, MI_NAME_DEXT, NullS); + if (my_copy(src_path, dst_path, MYF(MY_WME))) { error= HA_ADMIN_FAILED; errmsg= "Failed in my_copy (Error %d)"; @@ -474,8 +503,8 @@ int ha_myisam::restore(THD* thd, HA_CHECK_OPT *check_opt) myisamchk_init(¶m); param.thd= thd; param.op_name= "restore"; - param.db_name= table->s->db; - param.table_name= table->s->table_name; + param.db_name= table->s->db.str; + param.table_name= table->s->table_name.str; param.testflag= 0; mi_check_print_error(¶m, errmsg, my_errno); DBUG_RETURN(error); @@ -487,7 +516,7 @@ int ha_myisam::backup(THD* thd, HA_CHECK_OPT *check_opt) { char *backup_dir= thd->lex->backup_dir; char src_path[FN_REFLEN], dst_path[FN_REFLEN]; - const char *table_name= table->s->table_name; + const char *table_name= table->s->table_name.str; int error; const char *errmsg; DBUG_ENTER("ha_myisam::backup"); @@ -500,9 +529,8 @@ int ha_myisam::backup(THD* thd, HA_CHECK_OPT *check_opt) goto err; } - if (my_copy(fn_format(src_path, table->s->path, "", reg_ext, - MY_UNPACK_FILENAME), - dst_path, + strxmov(src_path, table->s->normalized_path.str, reg_ext, NullS); + if (my_copy(src_path, dst_path, MYF(MY_WME | MY_HOLD_ORIGINAL_MODES | MY_DONT_OVERWRITE_FILE))) { error = HA_ADMIN_FAILED; @@ -511,17 +539,16 @@ int ha_myisam::backup(THD* thd, HA_CHECK_OPT *check_opt) } /* Change extension */ - if (!fn_format(dst_path, dst_path, "", MI_NAME_DEXT, - MY_REPLACE_EXT | MY_UNPACK_FILENAME | MY_SAFE_PATH)) + if (fn_format_relative_to_data_home(dst_path, table_name, backup_dir, + MI_NAME_DEXT)) { errmsg = "Failed in fn_format() for .MYD file (errno: %d)"; error = HA_ADMIN_INVALID; goto err; } - if (my_copy(fn_format(src_path, table->s->path, "", MI_NAME_DEXT, - MY_UNPACK_FILENAME), - dst_path, + strxmov(src_path, table->s->normalized_path.str, MI_NAME_DEXT, NullS); + if (my_copy(src_path, dst_path, MYF(MY_WME | MY_HOLD_ORIGINAL_MODES | MY_DONT_OVERWRITE_FILE))) { errmsg = "Failed copying .MYD file (errno: %d)"; @@ -536,8 +563,8 @@ int ha_myisam::backup(THD* thd, HA_CHECK_OPT *check_opt) myisamchk_init(¶m); param.thd= thd; param.op_name= "backup"; - param.db_name= table->s->db; - param.table_name= table->s->table_name; + param.db_name= table->s->db.str; + param.table_name= table->s->table_name.str; param.testflag = 0; mi_check_print_error(¶m,errmsg, my_errno); DBUG_RETURN(error); @@ -628,7 +655,7 @@ int ha_myisam::repair(THD *thd, MI_CHECK ¶m, bool optimize) ha_rows rows= file->state->records; DBUG_ENTER("ha_myisam::repair"); - param.db_name= table->s->db; + param.db_name= table->s->db.str; param.table_name= table->alias; param.tmpfile_createflag = O_RDWR | O_TRUNC; param.using_global_keycache = 1; @@ -799,8 +826,8 @@ int ha_myisam::assign_to_keycache(THD* thd, HA_CHECK_OPT *check_opt) myisamchk_init(¶m); param.thd= thd; param.op_name= "assign_to_keycache"; - param.db_name= table->s->db; - param.table_name= table->s->table_name; + param.db_name= table->s->db.str; + param.table_name= table->s->table_name.str; param.testflag= 0; mi_check_print_error(¶m, errmsg); } @@ -867,8 +894,8 @@ int ha_myisam::preload_keys(THD* thd, HA_CHECK_OPT *check_opt) myisamchk_init(¶m); param.thd= thd; param.op_name= "preload_keys"; - param.db_name= table->s->db; - param.table_name= table->s->table_name; + param.db_name= table->s->db.str; + param.table_name= table->s->table_name.str; param.testflag= 0; mi_check_print_error(¶m, errmsg); DBUG_RETURN(error); @@ -1122,8 +1149,8 @@ bool ha_myisam::check_and_repair(THD *thd) old_query= thd->query; old_query_length= thd->query_length; pthread_mutex_lock(&LOCK_thread_count); - thd->query= (char*) table->s->table_name; - thd->query_length= (uint32) strlen(table->s->table_name); + thd->query= table->s->table_name.str; + thd->query_length= table->s->table_name.length; pthread_mutex_unlock(&LOCK_thread_count); if ((marked_crashed= mi_is_crashed(file)) || check(thd, &check_opt)) @@ -1310,6 +1337,10 @@ void ha_myisam::info(uint flag) ref_length= info.reflength; share->db_options_in_use= info.options; block_size= myisam_block_size; + + /* Update share */ + if (share->tmp_table == NO_TMP_TABLE) + pthread_mutex_lock(&share->mutex); share->keys_in_use.set_prefix(share->keys); share->keys_in_use.intersect_extended(info.key_map); share->keys_for_keyread.intersect(share->keys_in_use); @@ -1318,6 +1349,9 @@ void ha_myisam::info(uint flag) memcpy((char*) table->key_info[0].rec_per_key, (char*) info.rec_per_key, sizeof(table->key_info[0].rec_per_key)*share->key_parts); + if (share->tmp_table == NO_TMP_TABLE) + pthread_mutex_unlock(&share->mutex); + raid_type= info.raid_type; raid_chunks= info.raid_chunks; raid_chunksize= info.raid_chunksize; @@ -1326,11 +1360,11 @@ void ha_myisam::info(uint flag) Set data_file_name and index_file_name to point at the symlink value if table is symlinked (Ie; Real name is not same as generated name) */ - data_file_name=index_file_name=0; - fn_format(name_buff, file->filename, "", MI_NAME_DEXT, 2); + data_file_name= index_file_name= 0; + fn_format(name_buff, file->filename, "", MI_NAME_DEXT, MY_APPEND_EXT); if (strcmp(name_buff, info.data_file_name)) data_file_name=info.data_file_name; - strmov(fn_ext(name_buff),MI_NAME_IEXT); + fn_format(name_buff, file->filename, "", MI_NAME_IEXT, MY_APPEND_EXT); if (strcmp(name_buff, info.index_file_name)) index_file_name=info.index_file_name; } @@ -1421,7 +1455,7 @@ int ha_myisam::create(const char *name, register TABLE *table_arg, MI_KEYDEF *keydef; MI_COLUMNDEF *recinfo,*recinfo_pos; HA_KEYSEG *keyseg; - TABLE_SHARE *share= table->s; + TABLE_SHARE *share= table_arg->s; uint options= share->db_options_in_use; DBUG_ENTER("ha_myisam::create"); @@ -1439,6 +1473,8 @@ int ha_myisam::create(const char *name, register TABLE *table_arg, pos=table_arg->key_info; for (i=0; i < share->keys ; i++, pos++) { + if (pos->flags & HA_USES_PARSER) + create_flags|= HA_CREATE_RELIES_ON_SQL_LAYER; keydef[i].flag= (pos->flags & (HA_NOSAME | HA_FULLTEXT | HA_SPATIAL)); keydef[i].key_alg= pos->algorithm == HA_KEY_ALG_UNDEF ? (pos->flags & HA_SPATIAL ? HA_KEY_ALG_RTREE : HA_KEY_ALG_BTREE) : @@ -1611,7 +1647,7 @@ int ha_myisam::create(const char *name, register TABLE *table_arg, create_flags|= HA_CREATE_DELAY_KEY_WRITE; /* TODO: Check that the following fn_format is really needed */ - error=mi_create(fn_format(buff,name,"","",2+4), + error=mi_create(fn_format(buff,name,"","",MY_UNPACK_FILENAME|MY_APPEND_EXT), share->keys,keydef, (uint) (recinfo_pos-recinfo), recinfo, 0, (MI_UNIQUEDEF*) 0, @@ -1715,3 +1751,25 @@ uint ha_myisam::checksum() const return (uint)file->state->checksum; } + +bool ha_myisam::check_if_incompatible_data(HA_CREATE_INFO *info, + uint table_changes) +{ + uint options= table->s->db_options_in_use; + + if (info->auto_increment_value != auto_increment_value || + info->raid_type != raid_type || + info->raid_chunks != raid_chunks || + info->raid_chunksize != raid_chunksize || + info->data_file_name != data_file_name || + info->index_file_name != index_file_name || + table_changes == IS_EQUAL_NO) + return COMPATIBLE_DATA_NO; + + if ((options & (HA_OPTION_PACK_RECORD | HA_OPTION_CHECKSUM | + HA_OPTION_DELAY_KEY_WRITE)) != + (info->table_options & (HA_OPTION_PACK_RECORD | HA_OPTION_CHECKSUM | + HA_OPTION_DELAY_KEY_WRITE))) + return COMPATIBLE_DATA_NO; + return COMPATIBLE_DATA_YES; +} diff --git a/sql/ha_myisam.h b/sql/ha_myisam.h index ca684463311..eb3ac9db7e4 100644 --- a/sql/ha_myisam.h +++ b/sql/ha_myisam.h @@ -43,7 +43,7 @@ class ha_myisam: public handler int repair(THD *thd, MI_CHECK ¶m, bool optimize); public: - ha_myisam(TABLE *table_arg); + ha_myisam(TABLE_SHARE *table_arg); ~ha_myisam() {} const char *table_type() const { return "MyISAM"; } const char *index_type(uint key_number); @@ -51,7 +51,7 @@ class ha_myisam: public handler ulong table_flags() const { return int_table_flags; } ulong index_flags(uint inx, uint part, bool all_parts) const { - return ((table->key_info[inx].algorithm == HA_KEY_ALG_FULLTEXT) ? + return ((table_share->key_info[inx].algorithm == HA_KEY_ALG_FULLTEXT) ? 0 : HA_READ_NEXT | HA_READ_PREV | HA_READ_RANGE | HA_READ_ORDER | HA_KEYREAD_ONLY); } @@ -123,6 +123,7 @@ class ha_myisam: public handler int backup(THD* thd, HA_CHECK_OPT* check_opt); int assign_to_keycache(THD* thd, HA_CHECK_OPT* check_opt); int preload_keys(THD* thd, HA_CHECK_OPT* check_opt); + bool check_if_incompatible_data(HA_CREATE_INFO *info, uint table_changes); #ifdef HAVE_REPLICATION int dump(THD* thd, int fd); int net_read_dump(NET* net); diff --git a/sql/ha_myisammrg.cc b/sql/ha_myisammrg.cc index da4136def68..ccb3475e34f 100644 --- a/sql/ha_myisammrg.cc +++ b/sql/ha_myisammrg.cc @@ -25,16 +25,19 @@ #ifndef MASTER #include "../srclib/myisammrg/myrg_def.h" #else -#include "../myisammrg/myrg_def.h" +#include "../storage/myisammrg/myrg_def.h" #endif /***************************************************************************** ** MyISAM MERGE tables *****************************************************************************/ +static handler *myisammrg_create_handler(TABLE_SHARE *table); + /* MyISAM MERGE handlerton */ handlerton myisammrg_hton= { + MYSQL_HANDLERTON_INTERFACE_VERSION, "MRG_MYISAM", SHOW_OPTION_YES, "Collection of identical MyISAM tables", @@ -55,11 +58,22 @@ handlerton myisammrg_hton= { NULL, /* create_cursor_read_view */ NULL, /* set_cursor_read_view */ NULL, /* close_cursor_read_view */ + myisammrg_create_handler, /* Create a new handler */ + NULL, /* Drop a database */ + myrg_panic, /* Panic call */ + NULL, /* Start Consistent Snapshot */ + NULL, /* Flush logs */ + NULL, /* Show status */ HTON_CAN_RECREATE }; +static handler *myisammrg_create_handler(TABLE_SHARE *table) +{ + return new ha_myisammrg(table); +} -ha_myisammrg::ha_myisammrg(TABLE *table_arg) + +ha_myisammrg::ha_myisammrg(TABLE_SHARE *table_arg) :handler(&myisammrg_hton, table_arg), file(0) {} @@ -91,8 +105,9 @@ int ha_myisammrg::open(const char *name, int mode, uint test_if_locked) char name_buff[FN_REFLEN]; DBUG_PRINT("info", ("ha_myisammrg::open")); - if (!(file=myrg_open(fn_format(name_buff,name,"","",2 | 4), mode, - test_if_locked))) + if (!(file=myrg_open(fn_format(name_buff,name,"","", + MY_UNPACK_FILENAME|MY_APPEND_EXT), + mode, test_if_locked))) { DBUG_PRINT("info", ("ha_myisammrg::open exit %d", my_errno)); return (my_errno ? my_errno : -1); @@ -286,7 +301,6 @@ void ha_myisammrg::info(uint flag) errkey = info.errkey; table->s->keys_in_use.set_prefix(table->s->keys); table->s->db_options_in_use= info.options; - table->s->is_view= 1; mean_rec_length= info.reclength; block_size=0; update_time=0; @@ -440,9 +454,9 @@ int ha_myisammrg::create(const char *name, register TABLE *form, for (pos= table_names; tables; tables= tables->next_local) { const char *table_name; - TABLE **tbl= 0; + TABLE *tbl= 0; if (create_info->options & HA_LEX_CREATE_TMP_TABLE) - tbl= find_temporary_table(thd, tables->db, tables->table_name); + tbl= find_temporary_table(thd, tables); if (!tbl) { /* @@ -456,8 +470,8 @@ int ha_myisammrg::create(const char *name, register TABLE *form, This means that it might not be possible to move the DATADIR of an embedded server without changing the paths in the .MRG file. */ - uint length= my_snprintf(buff, FN_REFLEN, "%s/%s/%s", mysql_data_home, - tables->db, tables->table_name); + uint length= build_table_filename(buff, sizeof(buff), + tables->db, tables->table_name, ""); /* If a MyISAM table is in the same directory as the MERGE table, we use the table name without a path. This means that the @@ -471,11 +485,13 @@ int ha_myisammrg::create(const char *name, register TABLE *form, DBUG_RETURN(HA_ERR_OUT_OF_MEM); } else - table_name= (*tbl)->s->path; + table_name= tbl->s->path.str; *pos++= table_name; } *pos=0; - DBUG_RETURN(myrg_create(fn_format(buff,name,"","",2+4+16), + DBUG_RETURN(myrg_create(fn_format(buff,name,"","", + MY_RESOLVE_SYMLINKS| + MY_UNPACK_FILENAME|MY_APPEND_EXT), table_names, create_info->merge_insert_method, (my_bool) 0)); @@ -487,6 +503,7 @@ void ha_myisammrg::append_create_info(String *packet) const char *current_db; uint db_length; THD *thd= current_thd; + MYRG_TABLE *open_table, *first; if (file->merge_insert_method != MERGE_INSERT_DISABLED) { @@ -494,10 +511,9 @@ void ha_myisammrg::append_create_info(String *packet) packet->append(get_type(&merge_insert_method,file->merge_insert_method-1)); } packet->append(STRING_WITH_LEN(" UNION=(")); - MYRG_TABLE *open_table,*first; - current_db= table->s->db; - db_length= (uint) strlen(current_db); + current_db= table->s->db.str; + db_length= table->s->db.length; for (first=open_table=file->open_tables ; open_table != file->end_table ; @@ -519,3 +535,14 @@ void ha_myisammrg::append_create_info(String *packet) } packet->append(')'); } + + +bool ha_myisammrg::check_if_incompatible_data(HA_CREATE_INFO *info, + uint table_changes) +{ + /* + For myisammrg, we should always re-generate the mapping file as this + is trivial to do + */ + return COMPATIBLE_DATA_NO; +} diff --git a/sql/ha_myisammrg.h b/sql/ha_myisammrg.h index c762b7c286e..4327b1c17b9 100644 --- a/sql/ha_myisammrg.h +++ b/sql/ha_myisammrg.h @@ -28,7 +28,7 @@ class ha_myisammrg: public handler MYRG_INFO *file; public: - ha_myisammrg(TABLE *table_arg); + ha_myisammrg(TABLE_SHARE *table_arg); ~ha_myisammrg() {} const char *table_type() const { return "MRG_MyISAM"; } const char **bas_ext() const; @@ -37,11 +37,12 @@ class ha_myisammrg: public handler { return (HA_REC_NOT_IN_SEQ | HA_AUTO_PART_KEY | HA_READ_RND_SAME | HA_NULL_IN_KEY | HA_CAN_INDEX_BLOBS | HA_FILE_BASED | - HA_CAN_INSERT_DELAYED | HA_ANY_INDEX_MAY_BE_UNIQUE); + HA_CAN_INSERT_DELAYED | HA_ANY_INDEX_MAY_BE_UNIQUE | + HA_NO_COPY_ON_ALTER); } ulong index_flags(uint inx, uint part, bool all_parts) const { - return ((table->key_info[inx].algorithm == HA_KEY_ALG_FULLTEXT) ? + return ((table_share->key_info[inx].algorithm == HA_KEY_ALG_FULLTEXT) ? 0 : HA_READ_NEXT | HA_READ_PREV | HA_READ_RANGE | HA_READ_ORDER | HA_KEYREAD_ONLY); } @@ -82,4 +83,5 @@ class ha_myisammrg: public handler void update_create_info(HA_CREATE_INFO *create_info); void append_create_info(String *packet); MYRG_INFO *myrg_info() { return file; } + bool check_if_incompatible_data(HA_CREATE_INFO *info, uint table_changes); }; diff --git a/sql/ha_ndbcluster.cc b/sql/ha_ndbcluster.cc index 699b3f05a70..a72bfa7d170 100644 --- a/sql/ha_ndbcluster.cc +++ b/sql/ha_ndbcluster.cc @@ -26,30 +26,40 @@ #include "mysql_priv.h" -#ifdef HAVE_NDBCLUSTER_DB #include <my_dir.h> #include "ha_ndbcluster.h" #include <ndbapi/NdbApi.hpp> #include <ndbapi/NdbScanFilter.hpp> +#include <../util/Bitmask.hpp> +#include <ndbapi/NdbIndexStat.hpp> // options from from mysqld.cc extern my_bool opt_ndb_optimized_node_selection; extern const char *opt_ndbcluster_connectstring; +const char *ndb_distribution_names[]= {"KEYHASH", "LINHASH", NullS}; +TYPELIB ndb_distribution_typelib= { array_elements(ndb_distribution_names)-1, + "", ndb_distribution_names, NULL }; +const char *opt_ndb_distribution= ndb_distribution_names[ND_KEYHASH]; +enum ndb_distribution opt_ndb_distribution_id= ND_KEYHASH; + // Default value for parallelism static const int parallelism= 0; // Default value for max number of transactions // createable against NDB from this handler -static const int max_transactions= 2; +static const int max_transactions= 3; // should really be 2 but there is a transaction to much allocated when loch table is used static const char *ha_ndb_ext=".ndb"; +static const char share_prefix[]= "./"; static int ndbcluster_close_connection(THD *thd); static int ndbcluster_commit(THD *thd, bool all); static int ndbcluster_rollback(THD *thd, bool all); +static handler* ndbcluster_create_handler(TABLE_SHARE *table); handlerton ndbcluster_hton = { + MYSQL_HANDLERTON_INTERFACE_VERSION, "ndbcluster", SHOW_OPTION_YES, "Clustered, fault-tolerant, memory-based tables", @@ -70,9 +80,20 @@ handlerton ndbcluster_hton = { NULL, /* create_cursor_read_view */ NULL, /* set_cursor_read_view */ NULL, /* close_cursor_read_view */ + ndbcluster_create_handler, /* Create a new handler */ + ndbcluster_drop_database, /* Drop a database */ + ndbcluster_end, /* Panic call */ + NULL, /* Start Consistent Snapshot */ + NULL, /* Flush logs */ + ndbcluster_show_status, /* Show status */ HTON_NO_FLAGS }; +static handler *ndbcluster_create_handler(TABLE_SHARE *table) +{ + return new ha_ndbcluster(table); +} + #define NDB_HIDDEN_PRIMARY_KEY_LENGTH 8 #define NDB_FAILED_AUTO_INCREMENT ~(Uint64)0 @@ -90,13 +111,24 @@ handlerton ndbcluster_hton = { DBUG_RETURN(ndb_to_mysql_error(&tmp)); \ } +#define ERR_BREAK(err, code) \ +{ \ + const NdbError& tmp= err; \ + ERR_PRINT(tmp); \ + code= ndb_to_mysql_error(&tmp); \ + break; \ +} + // Typedefs for long names +typedef NdbDictionary::Object NDBOBJ; typedef NdbDictionary::Column NDBCOL; typedef NdbDictionary::Table NDBTAB; typedef NdbDictionary::Index NDBINDEX; typedef NdbDictionary::Dictionary NDBDICT; +typedef NdbDictionary::Event NDBEVENT; -bool ndbcluster_inited= FALSE; +static int ndbcluster_inited= 0; +static int ndbcluster_util_inited= 0; static Ndb* g_ndb= NULL; static Ndb_cluster_connection* g_ndb_cluster_connection= NULL; @@ -109,8 +141,12 @@ static HASH ndbcluster_open_tables; static byte *ndbcluster_get_key(NDB_SHARE *share,uint *length, my_bool not_used __attribute__((unused))); -static NDB_SHARE *get_share(const char *table_name); -static void free_share(NDB_SHARE *share); +static NDB_SHARE *get_share(const char *key, + bool create_if_not_exists= TRUE, + bool have_lock= FALSE); +static void free_share(NDB_SHARE **share, bool have_lock= FALSE); +static void real_free_share(NDB_SHARE **share); +static void ndb_set_fragmentation(NDBTAB &tab, TABLE *table, uint pk_len); static int packfrm(const void *data, uint len, const void **pack_data, uint *pack_len); static int unpackfrm(const void **data, uint *len, @@ -119,6 +155,33 @@ static int unpackfrm(const void **data, uint *len, static int ndb_get_table_statistics(Ndb*, const char *, struct Ndb_statistics *); +#ifndef DBUG_OFF +void print_records(TABLE *table, const char *record) +{ + if (_db_on_) + { + for (uint j= 0; j < table->s->fields; j++) + { + char buf[40]; + int pos= 0; + Field *field= table->field[j]; + const byte* field_ptr= field->ptr - table->record[0] + record; + int pack_len= field->pack_length(); + int n= pack_len < 10 ? pack_len : 10; + + for (int i= 0; i < n && pos < 20; i++) + { + pos+= sprintf(&buf[pos]," %x", (int) (unsigned char) field_ptr[i]); + } + buf[pos]= 0; + DBUG_PRINT("info",("[%u]field_ptr[0->%d]: %s", j, n, buf)); + } + } +} +#else +#define print_records(a,b) +#endif + // Util thread variables static pthread_t ndb_util_thread; pthread_mutex_t LOCK_ndb_util_thread; @@ -170,65 +233,70 @@ struct show_var_st ndb_status_variables[]= { {NullS, NullS, SHOW_LONG} }; +/* instantiated in storage/ndb/src/ndbapi/Ndbif.cpp */ +extern Uint64 g_latest_trans_gci; + /* Error handling functions */ -struct err_code_mapping -{ - int ndb_err; - int my_err; - int show_warning; -}; +/* Note for merge: old mapping table, moved to storage/ndb/ndberror.c */ -static const err_code_mapping err_map[]= +static int ndb_to_mysql_error(const NdbError *ndberr) { - { 626, HA_ERR_KEY_NOT_FOUND, 0 }, - { 630, HA_ERR_FOUND_DUPP_KEY, 0 }, - { 893, HA_ERR_FOUND_DUPP_KEY, 0 }, - { 721, HA_ERR_TABLE_EXIST, 1 }, - { 4244, HA_ERR_TABLE_EXIST, 1 }, - - { 709, HA_ERR_NO_SUCH_TABLE, 0 }, - - { 266, HA_ERR_LOCK_WAIT_TIMEOUT, 1 }, - { 274, HA_ERR_LOCK_WAIT_TIMEOUT, 1 }, - { 296, HA_ERR_LOCK_WAIT_TIMEOUT, 1 }, - { 297, HA_ERR_LOCK_WAIT_TIMEOUT, 1 }, - { 237, HA_ERR_LOCK_WAIT_TIMEOUT, 1 }, - - { 623, HA_ERR_RECORD_FILE_FULL, 1 }, - { 624, HA_ERR_RECORD_FILE_FULL, 1 }, - { 625, HA_ERR_RECORD_FILE_FULL, 1 }, - { 826, HA_ERR_RECORD_FILE_FULL, 1 }, - { 827, HA_ERR_RECORD_FILE_FULL, 1 }, - { 832, HA_ERR_RECORD_FILE_FULL, 1 }, - - { 284, HA_ERR_TABLE_DEF_CHANGED, 0 }, + /* read the mysql mapped error code */ + int error= ndberr->mysql_code; - { 0, 1, 0 }, - - { -1, -1, 1 } -}; + switch (error) + { + /* errors for which we do not add warnings, just return mapped error code + */ + case HA_ERR_NO_SUCH_TABLE: + case HA_ERR_KEY_NOT_FOUND: + case HA_ERR_FOUND_DUPP_KEY: + return error; + + /* Mapping missing, go with the ndb error code*/ + case -1: + error= ndberr->code; + break; + /* Mapping exists, go with the mapped code */ + default: + break; + } -static int ndb_to_mysql_error(const NdbError *err) -{ - uint i; - for (i=0; err_map[i].ndb_err != err->code && err_map[i].my_err != -1; i++); - if (err_map[i].show_warning) - { - // Push the NDB error message as warning + /* + Push the NDB error message as warning + - Used to be able to use SHOW WARNINGS toget more info on what the error is + - Used by replication to see if the error was temporary + */ + if (ndberr->status == NdbError::TemporaryError) push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR, - ER_GET_ERRMSG, ER(ER_GET_ERRMSG), - err->code, err->message, "NDB"); - } - if (err_map[i].my_err == -1) - return err->code; - return err_map[i].my_err; + ER_GET_TEMPORARY_ERRMSG, ER(ER_GET_TEMPORARY_ERRMSG), + ndberr->code, ndberr->message, "NDB"); + else + push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR, + ER_GET_ERRMSG, ER(ER_GET_ERRMSG), + ndberr->code, ndberr->message, "NDB"); + return error; } +int execute_no_commit_ignore_no_key(ha_ndbcluster *h, NdbTransaction *trans) +{ + int res= trans->execute(NdbTransaction::NoCommit, + NdbTransaction::AO_IgnoreError, + h->m_force_send); + if (res == 0) + return 0; + + const NdbError &err= trans->getNdbError(); + if (err.classification != NdbError::ConstraintViolation && + err.classification != NdbError::NoDataFound) + return res; + return 0; +} inline int execute_no_commit(ha_ndbcluster *h, NdbTransaction *trans) @@ -238,9 +306,11 @@ int execute_no_commit(ha_ndbcluster *h, NdbTransaction *trans) if (m_batch_execute) return 0; #endif - return trans->execute(NdbTransaction::NoCommit, - NdbTransaction::AbortOnError, - h->m_force_send); + return h->m_ignore_no_key ? + execute_no_commit_ignore_no_key(h,trans) : + trans->execute(NdbTransaction::NoCommit, + NdbTransaction::AbortOnError, + h->m_force_send); } inline @@ -443,29 +513,38 @@ void ha_ndbcluster::no_uncommitted_rows_reset(THD *thd) # The mapped error code */ -void ha_ndbcluster::invalidate_dictionary_cache(bool global) +void +ha_ndbcluster::invalidate_dictionary_cache(TABLE *table, Ndb *ndb, + const char *tabname, bool global) { - NDBDICT *dict= get_ndb()->getDictionary(); + NDBDICT *dict= ndb->getDictionary(); DBUG_ENTER("invalidate_dictionary_cache"); - DBUG_PRINT("info", ("invalidating %s", m_tabname)); + DBUG_PRINT("info", ("invalidating %s", tabname)); if (global) { - const NDBTAB *tab= dict->getTable(m_tabname); + const NDBTAB *tab= dict->getTable(tabname); if (!tab) DBUG_VOID_RETURN; if (tab->getObjectStatus() == NdbDictionary::Object::Invalid) { // Global cache has already been invalidated - dict->removeCachedTable(m_tabname); + dict->removeCachedTable(tabname); global= FALSE; } else - dict->invalidateTable(m_tabname); + dict->invalidateTable(tabname); } else - dict->removeCachedTable(m_tabname); + dict->removeCachedTable(tabname); table->s->version=0L; /* Free when thread is ready */ + DBUG_VOID_RETURN; +} + +void ha_ndbcluster::invalidate_dictionary_cache(bool global) +{ + NDBDICT *dict= get_ndb()->getDictionary(); + invalidate_dictionary_cache(table, get_ndb(), m_tabname, global); /* Invalidate indexes */ for (uint i= 0; i < table->s->keys; i++) { @@ -497,7 +576,6 @@ void ha_ndbcluster::invalidate_dictionary_cache(bool global) break; } } - DBUG_VOID_RETURN; } int ha_ndbcluster::ndb_err(NdbTransaction *trans) @@ -523,7 +601,7 @@ int ha_ndbcluster::ndb_err(NdbTransaction *trans) { err= dict->getNdbError(); DBUG_PRINT("info", ("Table not found, error: %d", err.code)); - if (err.code != 709) + if (err.code != 709 && err.code != 723) DBUG_RETURN(1); } DBUG_PRINT("info", ("Table exists but must have changed")); @@ -623,8 +701,7 @@ bool ha_ndbcluster::set_hidden_key(NdbOperation *ndb_op, uint fieldnr, const byte *field_ptr) { DBUG_ENTER("set_hidden_key"); - DBUG_RETURN(ndb_op->equal(fieldnr, (char*)field_ptr, - NDB_HIDDEN_PRIMARY_KEY_LENGTH) != 0); + DBUG_RETURN(ndb_op->equal(fieldnr, (char*)field_ptr) != 0); } @@ -654,14 +731,15 @@ int ha_ndbcluster::set_ndb_key(NdbOperation *ndb_op, Field *field, */ int ha_ndbcluster::set_ndb_value(NdbOperation *ndb_op, Field *field, - uint fieldnr, bool *set_blob_value) + uint fieldnr, int row_offset, + bool *set_blob_value) { - const byte* field_ptr= field->ptr; - uint32 pack_len= field->pack_length(); + const byte* field_ptr= field->ptr + row_offset; + uint32 pack_len= field->pack_length(); DBUG_ENTER("set_ndb_value"); - DBUG_PRINT("enter", ("%d: %s, type: %u, len=%d, is_null=%s", + DBUG_PRINT("enter", ("%d: %s type: %u len=%d is_null=%s", fieldnr, field->field_name, field->type(), - pack_len, field->is_null()?"Y":"N")); + pack_len, field->is_null(row_offset) ? "Y" : "N")); DBUG_DUMP("value", (char*) field_ptr, pack_len); DBUG_ASSERT(ndb_supported_type(field->type())); @@ -672,7 +750,7 @@ int ha_ndbcluster::set_ndb_value(NdbOperation *ndb_op, Field *field, { pack_len= sizeof(empty_field); field_ptr= (byte *)&empty_field; - if (field->is_null()) + if (field->is_null(row_offset)) empty_field= 0; else empty_field= 1; @@ -681,13 +759,14 @@ int ha_ndbcluster::set_ndb_value(NdbOperation *ndb_op, Field *field, { if (field->type() != MYSQL_TYPE_BIT) { - if (field->is_null()) + if (field->is_null(row_offset)) + { + DBUG_PRINT("info", ("field is NULL")); // Set value to NULL - DBUG_RETURN((ndb_op->setValue(fieldnr, - (char*)NULL, pack_len) != 0)); + DBUG_RETURN((ndb_op->setValue(fieldnr, (char*)NULL) != 0)); + } // Common implementation for most field types - DBUG_RETURN(ndb_op->setValue(fieldnr, - (char*)field_ptr, pack_len) != 0); + DBUG_RETURN(ndb_op->setValue(fieldnr, (char*)field_ptr) != 0); } else // if (field->type() == MYSQL_TYPE_BIT) { @@ -696,26 +775,25 @@ int ha_ndbcluster::set_ndb_value(NdbOperation *ndb_op, Field *field, // Round up bit field length to nearest word boundry pack_len= ((pack_len + 3) >> 2) << 2; DBUG_ASSERT(pack_len <= 8); - if (field->is_null()) + if (field->is_null(row_offset)) // Set value to NULL - DBUG_RETURN((ndb_op->setValue(fieldnr, (char*)NULL, pack_len) != 0)); + DBUG_RETURN((ndb_op->setValue(fieldnr, (char*)NULL) != 0)); DBUG_PRINT("info", ("bit field")); DBUG_DUMP("value", (char*)&bits, pack_len); #ifdef WORDS_BIGENDIAN if (pack_len < 5) { - DBUG_RETURN(ndb_op->setValue(fieldnr, - ((char*)&bits)+4, pack_len) != 0); + DBUG_RETURN(ndb_op->setValue(fieldnr, ((char*)&bits)+4) != 0); } #endif - DBUG_RETURN(ndb_op->setValue(fieldnr, (char*)&bits, pack_len) != 0); + DBUG_RETURN(ndb_op->setValue(fieldnr, (char*)&bits) != 0); } } // Blob type NdbBlob *ndb_blob= ndb_op->getBlobHandle(fieldnr); if (ndb_blob != NULL) { - if (field->is_null()) + if (field->is_null(row_offset)) DBUG_RETURN(ndb_blob->setNull() != 0); Field_blob *field_blob= (Field_blob*)field; @@ -796,8 +874,8 @@ int ha_ndbcluster::get_ndb_blobs_value(NdbBlob *last_ndb_blob) { char *buf= m_blobs_buffer + offset; uint32 len= 0xffffffff; // Max uint32 - DBUG_PRINT("value", ("read blob ptr=%x len=%u", - (UintPtr)buf, (uint)blob_len)); + DBUG_PRINT("value", ("read blob ptr=%lx len=%u", + buf, (uint) blob_len)); if (ndb_blob->readData(buf, len) != 0) DBUG_RETURN(-1); DBUG_ASSERT(len == blob_len); @@ -879,21 +957,18 @@ int ha_ndbcluster::get_ndb_value(NdbOperation *ndb_op, Field *field, /* Check if any set or get of blob value in current query. */ -bool ha_ndbcluster::uses_blob_value(bool all_fields) +bool ha_ndbcluster::uses_blob_value() { if (table->s->blob_fields == 0) return FALSE; - if (all_fields) - return TRUE; { uint no_fields= table->s->fields; int i; - THD *thd= current_thd; // They always put blobs at the end.. for (i= no_fields - 1; i >= 0; i--) { - Field *field= table->field[i]; - if (thd->query_id == field->query_id) + if ((m_write_op && ha_get_bit_in_write_set(i+1)) || + (!m_write_op && ha_get_bit_in_read_set(i+1))) { return TRUE; } @@ -909,8 +984,25 @@ bool ha_ndbcluster::uses_blob_value(bool all_fields) IMPLEMENTATION - check that frm-file on disk is equal to frm-file of table accessed in NDB + + RETURN + 0 ok + -2 Meta data has changed; Re-read data and try again */ +static int cmp_frm(const NDBTAB *ndbtab, const void *pack_data, + uint pack_length) +{ + DBUG_ENTER("cmp_frm"); + /* + Compare FrmData in NDB with frm file from disk. + */ + if ((pack_length != ndbtab->getFrmLength()) || + (memcmp(pack_data, ndbtab->getFrmData(), pack_length))) + DBUG_RETURN(1); + DBUG_RETURN(0); +} + int ha_ndbcluster::get_metadata(const char *path) { Ndb *ndb= get_ndb(); @@ -918,7 +1010,6 @@ int ha_ndbcluster::get_metadata(const char *path) const NDBTAB *tab; int error; bool invalidating_ndb_table= FALSE; - DBUG_ENTER("get_metadata"); DBUG_PRINT("enter", ("m_tabname: %s, path: %s", m_tabname, path)); @@ -948,8 +1039,7 @@ int ha_ndbcluster::get_metadata(const char *path) DBUG_RETURN(1); } - if ((pack_length != tab->getFrmLength()) || - (memcmp(pack_data, tab->getFrmData(), pack_length))) + if (cmp_frm(tab, pack_data, pack_length)) { if (!invalidating_ndb_table) { @@ -960,12 +1050,12 @@ int ha_ndbcluster::get_metadata(const char *path) else { DBUG_PRINT("error", - ("metadata, pack_length: %d getFrmLength: %d memcmp: %d", + ("metadata, pack_length: %d getFrmLength: %d memcmp: %d", pack_length, tab->getFrmLength(), - memcmp(pack_data, tab->getFrmData(), pack_length))); + memcmp(pack_data, tab->getFrmData(), pack_length))); DBUG_DUMP("pack_data", (char*)pack_data, pack_length); DBUG_DUMP("frm", (char*)tab->getFrmData(), tab->getFrmLength()); - error= 3; + error= HA_ERR_TABLE_DEF_CHANGED; invalidating_ndb_table= FALSE; } } @@ -1041,7 +1131,7 @@ int ha_ndbcluster::build_index_list(Ndb *ndb, TABLE *tab, enum ILBP phase) m_index[i].type= idx_type; if (idx_type == UNIQUE_ORDERED_INDEX || idx_type == UNIQUE_INDEX) { - strxnmov(unique_index_name, FN_LEN, index_name, unique_suffix, NullS); + strxnmov(unique_index_name, FN_LEN-1, index_name, unique_suffix, NullS); DBUG_PRINT("info", ("Created unique index name \'%s\' for index %d", unique_index_name, i)); } @@ -1075,7 +1165,7 @@ int ha_ndbcluster::build_index_list(Ndb *ndb, TABLE *tab, enum ILBP phase) if (error) { DBUG_PRINT("error", ("Failed to create index %u", i)); - drop_table(); + intern_drop_table(); break; } } @@ -1086,6 +1176,26 @@ int ha_ndbcluster::build_index_list(Ndb *ndb, TABLE *tab, enum ILBP phase) const NDBINDEX *index= dict->getIndex(index_name, m_tabname); if (!index) DBUG_RETURN(1); m_index[i].index= (void *) index; + // ordered index - add stats + NDB_INDEX_DATA& d=m_index[i]; + delete d.index_stat; + d.index_stat=NULL; + THD *thd=current_thd; + if (thd->variables.ndb_index_stat_enable) + { + d.index_stat=new NdbIndexStat(index); + d.index_stat_cache_entries=thd->variables.ndb_index_stat_cache_entries; + d.index_stat_update_freq=thd->variables.ndb_index_stat_update_freq; + d.index_stat_query_count=0; + d.index_stat->alloc_cache(d.index_stat_cache_entries); + DBUG_PRINT("info", ("index %s stat=on cache_entries=%u update_freq=%u", + index->getName(), + d.index_stat_cache_entries, + d.index_stat_update_freq)); + } else + { + DBUG_PRINT("info", ("index %s stat=off", index->getName())); + } } if (idx_type == UNIQUE_ORDERED_INDEX || idx_type == UNIQUE_INDEX) { @@ -1107,11 +1217,12 @@ int ha_ndbcluster::build_index_list(Ndb *ndb, TABLE *tab, enum ILBP phase) */ NDB_INDEX_TYPE ha_ndbcluster::get_index_type_from_table(uint inx) const { - bool is_hash_index= (table->key_info[inx].algorithm == HA_KEY_ALG_HASH); - if (inx == table->s->primary_key) + bool is_hash_index= (table_share->key_info[inx].algorithm == + HA_KEY_ALG_HASH); + if (inx == table_share->primary_key) return is_hash_index ? PRIMARY_KEY_INDEX : PRIMARY_KEY_ORDERED_INDEX; - return ((table->key_info[inx].flags & HA_NOSAME) ? + return ((table_share->key_info[inx].flags & HA_NOSAME) ? (is_hash_index ? UNIQUE_INDEX : UNIQUE_ORDERED_INDEX) : ORDERED_INDEX); } @@ -1157,6 +1268,8 @@ void ha_ndbcluster::release_metadata() my_free((char *)m_index[i].unique_index_attrid_map, MYF(0)); m_index[i].unique_index_attrid_map= NULL; } + delete m_index[i].index_stat; + m_index[i].index_stat=NULL; } DBUG_VOID_RETURN; @@ -1166,7 +1279,7 @@ int ha_ndbcluster::get_ndb_lock_type(enum thr_lock_type type) { if (type >= TL_WRITE_ALLOW_WRITE) return NdbOperation::LM_Exclusive; - else if (uses_blob_value(m_retrieve_all_fields)) + else if (uses_blob_value()) return NdbOperation::LM_Read; else return NdbOperation::LM_CommittedRead; @@ -1228,7 +1341,7 @@ inline ulong ha_ndbcluster::index_flags(uint idx_no, uint part, bool all_parts) const { DBUG_ENTER("ha_ndbcluster::index_flags"); - DBUG_PRINT("info", ("idx_no: %d", idx_no)); + DBUG_PRINT("enter", ("idx_no: %u", idx_no)); DBUG_ASSERT(get_index_type_from_table(idx_no) < index_flags_size); DBUG_RETURN(index_type_flags[get_index_type_from_table(idx_no)] | HA_KEY_SCAN_NOT_ROR); @@ -1319,17 +1432,14 @@ inline int ha_ndbcluster::define_read_attrs(byte* buf, NdbOperation* op) { uint i; - THD *thd= current_thd; - DBUG_ENTER("define_read_attrs"); // Define attributes to read for (i= 0; i < table->s->fields; i++) { Field *field= table->field[i]; - if ((thd->query_id == field->query_id) || - ((field->flags & PRI_KEY_FLAG)) || - m_retrieve_all_fields) + if (ha_get_bit_in_read_set(i+1) || + ((field->flags & PRI_KEY_FLAG))) { if (get_ndb_value(op, field, i, buf)) ERR_RETURN(op->getNdbError()); @@ -1356,11 +1466,13 @@ int ha_ndbcluster::define_read_attrs(byte* buf, NdbOperation* op) DBUG_RETURN(0); } + /* Read one record from NDB using primary key */ -int ha_ndbcluster::pk_read(const byte *key, uint key_len, byte *buf) +int ha_ndbcluster::pk_read(const byte *key, uint key_len, byte *buf, + uint32 part_id) { uint no_fields= table->s->fields; NdbConnection *trans= m_active_trans; @@ -1370,6 +1482,7 @@ int ha_ndbcluster::pk_read(const byte *key, uint key_len, byte *buf) DBUG_ENTER("pk_read"); DBUG_PRINT("enter", ("key_len: %u", key_len)); DBUG_DUMP("key", (char*)key, key_len); + m_write_op= FALSE; NdbOperation::LockMode lm= (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type); @@ -1377,6 +1490,8 @@ int ha_ndbcluster::pk_read(const byte *key, uint key_len, byte *buf) op->readTuple(lm) != 0) ERR_RETURN(trans->getNdbError()); + if (m_use_partition_function) + op->setPartitionId(part_id); if (table->s->primary_key == MAX_KEY) { // This table has no primary key, use "hidden" primary key @@ -1414,17 +1529,20 @@ int ha_ndbcluster::pk_read(const byte *key, uint key_len, byte *buf) Read one complementing record from NDB using primary key from old_data */ -int ha_ndbcluster::complemented_pk_read(const byte *old_data, byte *new_data) +int ha_ndbcluster::complemented_pk_read(const byte *old_data, byte *new_data, + uint32 old_part_id) { uint no_fields= table->s->fields, i; NdbTransaction *trans= m_active_trans; NdbOperation *op; - THD *thd= current_thd; DBUG_ENTER("complemented_pk_read"); + m_write_op= FALSE; - if (m_retrieve_all_fields) + if (ha_get_all_bit_in_read_set()) + { // We have allready retrieved all fields, nothing to complement DBUG_RETURN(0); + } NdbOperation::LockMode lm= (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type); @@ -1434,12 +1552,16 @@ int ha_ndbcluster::complemented_pk_read(const byte *old_data, byte *new_data) int res; if ((res= set_primary_key_from_record(op, old_data))) ERR_RETURN(trans->getNdbError()); + + if (m_use_partition_function) + op->setPartitionId(old_part_id); + // Read all unreferenced non-key field(s) for (i= 0; i < no_fields; i++) { Field *field= table->field[i]; if (!((field->flags & PRI_KEY_FLAG) || - (thd->query_id == field->query_id))) + (ha_get_bit_in_read_set(i+1)))) { if (get_ndb_value(op, field, i, new_data)) ERR_RETURN(trans->getNdbError()); @@ -1463,7 +1585,7 @@ int ha_ndbcluster::complemented_pk_read(const byte *old_data, byte *new_data) { Field *field= table->field[i]; if (!((field->flags & PRI_KEY_FLAG) || - (thd->query_id == field->query_id))) + (ha_get_bit_in_read_set(i+1)))) { m_value[i].ptr= NULL; } @@ -1492,6 +1614,17 @@ int ha_ndbcluster::peek_row(const byte *record) if ((res= set_primary_key_from_record(op, record))) ERR_RETURN(trans->getNdbError()); + if (m_use_partition_function) + { + uint32 part_id; + int error; + if ((error= m_part_info->get_partition_id(m_part_info, &part_id))) + { + DBUG_RETURN(error); + } + op->setPartitionId(part_id); + } + if (execute_no_commit_ie(this,trans) != 0) { table->status= STATUS_NOT_FOUND; @@ -1651,10 +1784,12 @@ inline int ha_ndbcluster::next_result(byte *buf) */ int ha_ndbcluster::set_bounds(NdbIndexScanOperation *op, + uint inx, + bool rir, const key_range *keys[2], uint range_no) { - const KEY *const key_info= table->key_info + active_index; + const KEY *const key_info= table->key_info + inx; const uint key_parts= key_info->key_parts; uint key_tot_len[2]; uint tot_len; @@ -1719,7 +1854,10 @@ int ha_ndbcluster::set_bounds(NdbIndexScanOperation *op, switch (p.key->flag) { case HA_READ_KEY_EXACT: - p.bound_type= NdbIndexScanOperation::BoundEQ; + if (! rir) + p.bound_type= NdbIndexScanOperation::BoundEQ; + else // differs for records_in_range + p.bound_type= NdbIndexScanOperation::BoundLE; break; // ascending case HA_READ_KEY_OR_NEXT: @@ -1830,7 +1968,8 @@ int ha_ndbcluster::set_bounds(NdbIndexScanOperation *op, int ha_ndbcluster::ordered_index_scan(const key_range *start_key, const key_range *end_key, - bool sorted, bool descending, byte* buf) + bool sorted, bool descending, + byte* buf, part_id_range *part_spec) { int res; bool restart; @@ -1841,6 +1980,7 @@ int ha_ndbcluster::ordered_index_scan(const key_range *start_key, DBUG_PRINT("enter", ("index: %u, sorted: %d, descending: %d", active_index, sorted, descending)); DBUG_PRINT("enter", ("Starting new ordered scan on %s", m_tabname)); + m_write_op= FALSE; // Check that sorted seems to be initialised DBUG_ASSERT(sorted == 0 || sorted == 1); @@ -1855,11 +1995,17 @@ int ha_ndbcluster::ordered_index_scan(const key_range *start_key, (const NDBTAB *) m_table)) || op->readTuples(lm, 0, parallelism, sorted, descending)) ERR_RETURN(trans->getNdbError()); + if (m_use_partition_function && part_spec != NULL && + part_spec->start_part == part_spec->end_part) + op->setPartitionId(part_spec->start_part); m_active_cursor= op; } else { restart= TRUE; op= (NdbIndexScanOperation*)m_active_cursor; + if (m_use_partition_function && part_spec != NULL && + part_spec->start_part == part_spec->end_part) + op->setPartitionId(part_spec->start_part); DBUG_ASSERT(op->getSorted() == sorted); DBUG_ASSERT(op->getLockMode() == (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type)); @@ -1869,7 +2015,7 @@ int ha_ndbcluster::ordered_index_scan(const key_range *start_key, { const key_range *keys[2]= { start_key, end_key }; - res= set_bounds(op, keys); + res= set_bounds(op, active_index, false, keys); if (res) DBUG_RETURN(res); } @@ -1900,6 +2046,7 @@ int ha_ndbcluster::full_table_scan(byte *buf) DBUG_ENTER("full_table_scan"); DBUG_PRINT("enter", ("Starting new scan on %s", m_tabname)); + m_write_op= FALSE; NdbOperation::LockMode lm= (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type); @@ -1929,10 +2076,11 @@ int ha_ndbcluster::write_row(byte *record) NdbOperation *op; int res; THD *thd= current_thd; + m_write_op= TRUE; DBUG_ENTER("write_row"); - if (m_ignore_dup_key && table->s->primary_key != MAX_KEY) + if (!m_use_write && m_ignore_dup_key && table->s->primary_key != MAX_KEY) { int peek_res= peek_row(record); @@ -1957,6 +2105,17 @@ int ha_ndbcluster::write_row(byte *record) if (res != 0) ERR_RETURN(trans->getNdbError()); + if (m_use_partition_function) + { + uint32 part_id; + int error; + if ((error= m_part_info->get_partition_id(m_part_info, &part_id))) + { + DBUG_RETURN(error); + } + op->setPartitionId(part_id); + } + if (table->s->primary_key == MAX_KEY) { // Table has hidden primary key @@ -1998,7 +2157,8 @@ int ha_ndbcluster::write_row(byte *record) { Field *field= table->field[i]; if (!(field->flags & PRI_KEY_FLAG) && - set_ndb_value(op, field, i, &set_blob_value)) + (ha_get_bit_in_write_set(i + 1) || !m_use_write) && + set_ndb_value(op, field, i, record-table->record[0], &set_blob_value)) { m_skip_auto_increment= TRUE; ERR_RETURN(op->getNdbError()); @@ -2114,25 +2274,35 @@ int ha_ndbcluster::update_row(const byte *old_data, byte *new_data) NdbScanOperation* cursor= m_active_cursor; NdbOperation *op; uint i; + uint32 old_part_id= 0, new_part_id= 0; + int error; DBUG_ENTER("update_row"); + m_write_op= TRUE; statistic_increment(thd->status_var.ha_update_count, &LOCK_status); if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE) { table->timestamp_field->set_time(); - // Set query_id so that field is really updated - table->timestamp_field->query_id= thd->query_id; + ha_set_bit_in_write_set(table->timestamp_field->fieldnr); + } + + if (m_use_partition_function && + (error= get_parts_for_update(old_data, new_data, table->record[0], + m_part_info, &old_part_id, &new_part_id))) + { + DBUG_RETURN(error); } /* Check for update of primary key for special handling */ if ((table->s->primary_key != MAX_KEY) && - (key_cmp(table->s->primary_key, old_data, new_data))) + (key_cmp(table->s->primary_key, old_data, new_data)) || + (old_part_id != new_part_id)) { int read_res, insert_res, delete_res, undo_res; DBUG_PRINT("info", ("primary key update, doing pk read+delete+insert")); // Get all old fields, since we optimize away fields not in query - read_res= complemented_pk_read(old_data, new_data); + read_res= complemented_pk_read(old_data, new_data, old_part_id); if (read_res) { DBUG_PRINT("info", ("pk read failed")); @@ -2186,8 +2356,10 @@ int ha_ndbcluster::update_row(const byte *old_data, byte *new_data) if (!(op= cursor->updateCurrentTuple())) ERR_RETURN(trans->getNdbError()); m_ops_pending++; - if (uses_blob_value(FALSE)) + if (uses_blob_value()) m_blobs_pending= TRUE; + if (m_use_partition_function) + cursor->setPartitionId(new_part_id); } else { @@ -2195,6 +2367,8 @@ int ha_ndbcluster::update_row(const byte *old_data, byte *new_data) op->updateTuple() != 0) ERR_RETURN(trans->getNdbError()); + if (m_use_partition_function) + op->setPartitionId(new_part_id); if (table->s->primary_key == MAX_KEY) { // This table has no primary key, use "hidden" primary key @@ -2224,9 +2398,9 @@ int ha_ndbcluster::update_row(const byte *old_data, byte *new_data) for (i= 0; i < table->s->fields; i++) { Field *field= table->field[i]; - if (((thd->query_id == field->query_id) || m_retrieve_all_fields) && + if (ha_get_bit_in_write_set(i+1) && (!(field->flags & PRI_KEY_FLAG)) && - set_ndb_value(op, field, i)) + set_ndb_value(op, field, i, new_data - table->record[0])) ERR_RETURN(op->getNdbError()); } @@ -2250,11 +2424,21 @@ int ha_ndbcluster::delete_row(const byte *record) NdbTransaction *trans= m_active_trans; NdbScanOperation* cursor= m_active_cursor; NdbOperation *op; + uint32 part_id; + int error; DBUG_ENTER("delete_row"); + m_write_op= TRUE; statistic_increment(thd->status_var.ha_delete_count,&LOCK_status); m_rows_changed++; + if (m_use_partition_function && + (error= get_part_for_delete(record, table->record[0], m_part_info, + &part_id))) + { + DBUG_RETURN(error); + } + if (cursor) { /* @@ -2269,6 +2453,9 @@ int ha_ndbcluster::delete_row(const byte *record) ERR_RETURN(trans->getNdbError()); m_ops_pending++; + if (m_use_partition_function) + cursor->setPartitionId(part_id); + no_uncommitted_rows_update(-1); if (!m_primary_key_update) @@ -2282,6 +2469,9 @@ int ha_ndbcluster::delete_row(const byte *record) op->deleteTuple() != 0) ERR_RETURN(trans->getNdbError()); + if (m_use_partition_function) + op->setPartitionId(part_id); + no_uncommitted_rows_update(-1); if (table->s->primary_key == MAX_KEY) @@ -2325,51 +2515,73 @@ int ha_ndbcluster::delete_row(const byte *record) set to null. */ -void ha_ndbcluster::unpack_record(byte* buf) +static void ndb_unpack_record(TABLE *table, NdbValue *value, + MY_BITMAP *defined, byte *buf) { + Field **p_field= table->field, *field= *p_field; uint row_offset= (uint) (buf - table->record[0]); - Field **field, **end; - NdbValue *value= m_value; - DBUG_ENTER("unpack_record"); + DBUG_ENTER("ndb_unpack_record"); - end= table->field + table->s->fields; - // Set null flag(s) bzero(buf, table->s->null_bytes); - for (field= table->field; - field < end; - field++, value++) + for ( ; field; + p_field++, value++, field= *p_field) { if ((*value).ptr) { - if (! ((*field)->flags & BLOB_FLAG)) + if (!(field->flags & BLOB_FLAG)) { - if ((*value).rec->isNULL()) - (*field)->set_null(row_offset); - else if ((*field)->type() == MYSQL_TYPE_BIT) + int is_null= (*value).rec->isNULL(); + if (is_null) + { + if (is_null > 0) + { + DBUG_PRINT("info",("[%u] NULL", + (*value).rec->getColumn()->getColumnNo())); + field->set_null(row_offset); + } + else + { + DBUG_PRINT("info",("[%u] UNDEFINED", + (*value).rec->getColumn()->getColumnNo())); + bitmap_clear_bit(defined, + (*value).rec->getColumn()->getColumnNo()); + } + } + else if (field->type() == MYSQL_TYPE_BIT) { - uint pack_len= (*field)->pack_length(); - if (pack_len < 5) + byte *save_field_ptr= field->ptr; + field->ptr= save_field_ptr + row_offset; + if (field->pack_length() < 5) { DBUG_PRINT("info", ("bit field H'%.8X", (*value).rec->u_32_value())); - ((Field_bit *) *field)->store((longlong) - (*value).rec->u_32_value(), - FALSE); + ((Field_bit*) field)->store((longlong) + (*value).rec->u_32_value(), FALSE); } else { DBUG_PRINT("info", ("bit field H'%.8X%.8X", - *(Uint32 *)(*value).rec->aRef(), - *((Uint32 *)(*value).rec->aRef()+1))); - ((Field_bit *) *field)->store((longlong) - (*value).rec->u_64_value(), TRUE); + *(Uint32*) (*value).rec->aRef(), + *((Uint32*) (*value).rec->aRef()+1))); + ((Field_bit*) field)->store((longlong) + (*value).rec->u_64_value(),TRUE); } + field->ptr= save_field_ptr; + DBUG_PRINT("info",("[%u] SET", + (*value).rec->getColumn()->getColumnNo())); + DBUG_DUMP("info", (const char*) field->ptr, field->field_length); + } + else + { + DBUG_PRINT("info",("[%u] SET", + (*value).rec->getColumn()->getColumnNo())); + DBUG_DUMP("info", (const char*) field->ptr, field->field_length); } } else { - NdbBlob* ndb_blob= (*value).blob; + NdbBlob *ndb_blob= (*value).blob; bool isNull= TRUE; #ifndef DBUG_OFF int ret= @@ -2377,11 +2589,16 @@ void ha_ndbcluster::unpack_record(byte* buf) ndb_blob->getNull(isNull); DBUG_ASSERT(ret == 0); if (isNull) - (*field)->set_null(row_offset); + field->set_null(row_offset); } } } - + DBUG_VOID_RETURN; +} + +void ha_ndbcluster::unpack_record(byte *buf) +{ + ndb_unpack_record(table, m_value, 0, buf); #ifndef DBUG_OFF // Read and print all values that was fetched if (table->s->primary_key == MAX_KEY) @@ -2397,7 +2614,6 @@ void ha_ndbcluster::unpack_record(byte* buf) } //print_results(); #endif - DBUG_VOID_RETURN; } /* @@ -2409,8 +2625,6 @@ void ha_ndbcluster::print_results() DBUG_ENTER("print_results"); #ifndef DBUG_OFF - const NDBTAB *tab= (const NDBTAB*) m_table; - if (!_db_on_) DBUG_VOID_RETURN; @@ -2465,11 +2679,13 @@ print_value: } -int ha_ndbcluster::index_init(uint index) +int ha_ndbcluster::index_init(uint index, bool sorted) { DBUG_ENTER("ha_ndbcluster::index_init"); - DBUG_PRINT("enter", ("index: %u", index)); - DBUG_RETURN(handler::index_init(index)); + DBUG_PRINT("enter", ("index: %u sorted: %d", index, sorted)); + active_index= index; + m_sorted= sorted; + DBUG_RETURN(0); } @@ -2506,55 +2722,16 @@ int ha_ndbcluster::index_read(byte *buf, const byte *key, uint key_len, enum ha_rkey_function find_flag) { + key_range start_key; + bool descending= FALSE; DBUG_ENTER("ha_ndbcluster::index_read"); DBUG_PRINT("enter", ("active_index: %u, key_len: %u, find_flag: %d", active_index, key_len, find_flag)); - int error; - ndb_index_type type= get_index_type(active_index); - const KEY* key_info= table->key_info+active_index; - switch (type){ - case PRIMARY_KEY_ORDERED_INDEX: - case PRIMARY_KEY_INDEX: - if (find_flag == HA_READ_KEY_EXACT && key_info->key_length == key_len) - { - if (m_active_cursor && (error= close_scan())) - DBUG_RETURN(error); - DBUG_RETURN(pk_read(key, key_len, buf)); - } - else if (type == PRIMARY_KEY_INDEX) - { - DBUG_RETURN(1); - } - break; - case UNIQUE_ORDERED_INDEX: - case UNIQUE_INDEX: - if (find_flag == HA_READ_KEY_EXACT && key_info->key_length == key_len && - !check_null_in_key(key_info, key, key_len)) - { - if (m_active_cursor && (error= close_scan())) - DBUG_RETURN(error); - DBUG_RETURN(unique_index_read(key, key_len, buf)); - } - else if (type == UNIQUE_INDEX) - { - DBUG_RETURN(1); - } - break; - case ORDERED_INDEX: - break; - default: - case UNDEFINED_INDEX: - DBUG_ASSERT(FALSE); - DBUG_RETURN(1); - break; - } - - key_range start_key; start_key.key= key; start_key.length= key_len; start_key.flag= find_flag; - bool descending= FALSE; + descending= FALSE; switch (find_flag) { case HA_READ_KEY_OR_PREV: case HA_READ_BEFORE_KEY: @@ -2565,8 +2742,8 @@ int ha_ndbcluster::index_read(byte *buf, default: break; } - error= ordered_index_scan(&start_key, 0, TRUE, descending, buf); - DBUG_RETURN(error == HA_ERR_END_OF_FILE ? HA_ERR_KEY_NOT_FOUND : error); + DBUG_RETURN(read_range_first_to_buf(&start_key, 0, descending, + m_sorted, buf)); } @@ -2577,7 +2754,7 @@ int ha_ndbcluster::index_read_idx(byte *buf, uint index_no, statistic_increment(current_thd->status_var.ha_read_key_count, &LOCK_status); DBUG_ENTER("ha_ndbcluster::index_read_idx"); DBUG_PRINT("enter", ("index_no: %u, key_len: %u", index_no, key_len)); - index_init(index_no); + index_init(index_no, 0); DBUG_RETURN(index_read(buf, key, key_len, find_flag)); } @@ -2608,7 +2785,7 @@ int ha_ndbcluster::index_first(byte *buf) // Start the ordered index scan and fetch the first row // Only HA_READ_ORDER indexes get called by index_first - DBUG_RETURN(ordered_index_scan(0, 0, TRUE, FALSE, buf)); + DBUG_RETURN(ordered_index_scan(0, 0, TRUE, FALSE, buf, NULL)); } @@ -2616,7 +2793,7 @@ int ha_ndbcluster::index_last(byte *buf) { DBUG_ENTER("ha_ndbcluster::index_last"); statistic_increment(current_thd->status_var.ha_read_last_count,&LOCK_status); - DBUG_RETURN(ordered_index_scan(0, 0, TRUE, TRUE, buf)); + DBUG_RETURN(ordered_index_scan(0, 0, TRUE, TRUE, buf, NULL)); } int ha_ndbcluster::index_read_last(byte * buf, const byte * key, uint key_len) @@ -2625,66 +2802,76 @@ int ha_ndbcluster::index_read_last(byte * buf, const byte * key, uint key_len) DBUG_RETURN(index_read(buf, key, key_len, HA_READ_PREFIX_LAST)); } -inline int ha_ndbcluster::read_range_first_to_buf(const key_range *start_key, const key_range *end_key, - bool eq_r, bool sorted, + bool desc, bool sorted, byte* buf) { - KEY* key_info; - int error= 1; + part_id_range part_spec; + ndb_index_type type= get_index_type(active_index); + const KEY* key_info= table->key_info+active_index; + int error; DBUG_ENTER("ha_ndbcluster::read_range_first_to_buf"); - DBUG_PRINT("info", ("eq_r: %d, sorted: %d", eq_r, sorted)); + DBUG_PRINT("info", ("desc: %d, sorted: %d", desc, sorted)); - switch (get_index_type(active_index)){ + if (m_use_partition_function) + { + get_partition_set(table, buf, active_index, start_key, &part_spec); + if (part_spec.start_part > part_spec.end_part) + { + DBUG_RETURN(HA_ERR_END_OF_FILE); + } + else if (part_spec.start_part == part_spec.end_part) + { + /* + Only one partition is required to scan, if sorted is required we + don't need it any more since output from one ordered partitioned + index is always sorted. + */ + sorted= FALSE; + } + } + m_write_op= FALSE; + switch (type){ case PRIMARY_KEY_ORDERED_INDEX: case PRIMARY_KEY_INDEX: - key_info= table->key_info + active_index; if (start_key && start_key->length == key_info->key_length && start_key->flag == HA_READ_KEY_EXACT) { if (m_active_cursor && (error= close_scan())) DBUG_RETURN(error); - error= pk_read(start_key->key, start_key->length, buf); - DBUG_RETURN(error == HA_ERR_KEY_NOT_FOUND ? HA_ERR_END_OF_FILE : error); + DBUG_RETURN(pk_read(start_key->key, start_key->length, buf, + part_spec.start_part)); } break; case UNIQUE_ORDERED_INDEX: case UNIQUE_INDEX: - key_info= table->key_info + active_index; if (start_key && start_key->length == key_info->key_length && start_key->flag == HA_READ_KEY_EXACT && !check_null_in_key(key_info, start_key->key, start_key->length)) { if (m_active_cursor && (error= close_scan())) DBUG_RETURN(error); - error= unique_index_read(start_key->key, start_key->length, buf); - DBUG_RETURN(error == HA_ERR_KEY_NOT_FOUND ? HA_ERR_END_OF_FILE : error); + DBUG_RETURN(unique_index_read(start_key->key, start_key->length, buf)); } break; default: break; } - // Start the ordered index scan and fetch the first row - error= ordered_index_scan(start_key, end_key, sorted, FALSE, buf); - DBUG_RETURN(error); + DBUG_RETURN(ordered_index_scan(start_key, end_key, sorted, desc, buf, + &part_spec)); } - int ha_ndbcluster::read_range_first(const key_range *start_key, const key_range *end_key, bool eq_r, bool sorted) { byte* buf= table->record[0]; DBUG_ENTER("ha_ndbcluster::read_range_first"); - - DBUG_RETURN(read_range_first_to_buf(start_key, - end_key, - eq_r, - sorted, - buf)); + DBUG_RETURN(read_range_first_to_buf(start_key, end_key, FALSE, + sorted, buf)); } int ha_ndbcluster::read_range_next() @@ -2710,7 +2897,7 @@ int ha_ndbcluster::rnd_init(bool scan) DBUG_RETURN(-1); } } - index_init(table->s->primary_key); + index_init(table->s->primary_key, 0); DBUG_RETURN(0); } @@ -2777,7 +2964,20 @@ int ha_ndbcluster::rnd_pos(byte *buf, byte *pos) &LOCK_status); // The primary key for the record is stored in pos // Perform a pk_read using primary key "index" - DBUG_RETURN(pk_read(pos, ref_length, buf)); + { + part_id_range part_spec; + if (m_use_partition_function) + { + key_range key_spec; + KEY *key_info= table->key_info + active_index; + key_spec.key= pos; + key_spec.length= ref_length; + key_spec.flag= HA_READ_KEY_EXACT; + get_full_part_id_from_key(table, buf, key_info, &key_spec, &part_spec); + DBUG_ASSERT(part_spec.start_part == part_spec.end_part); + } + DBUG_RETURN(pk_read(pos, ref_length, buf, part_spec.start_part)); + } } @@ -2922,87 +3122,16 @@ void ha_ndbcluster::info(uint flag) DBUG_VOID_RETURN; } - int ha_ndbcluster::extra(enum ha_extra_function operation) { DBUG_ENTER("extra"); switch (operation) { - case HA_EXTRA_NORMAL: /* Optimize for space (def) */ - DBUG_PRINT("info", ("HA_EXTRA_NORMAL")); - break; - case HA_EXTRA_QUICK: /* Optimize for speed */ - DBUG_PRINT("info", ("HA_EXTRA_QUICK")); - break; case HA_EXTRA_RESET: /* Reset database to after open */ DBUG_PRINT("info", ("HA_EXTRA_RESET")); DBUG_PRINT("info", ("Clearing condition stack")); cond_clear(); - break; - case HA_EXTRA_CACHE: /* Cash record in HA_rrnd() */ - DBUG_PRINT("info", ("HA_EXTRA_CACHE")); - break; - case HA_EXTRA_NO_CACHE: /* End cacheing of records (def) */ - DBUG_PRINT("info", ("HA_EXTRA_NO_CACHE")); - break; - case HA_EXTRA_NO_READCHECK: /* No readcheck on update */ - DBUG_PRINT("info", ("HA_EXTRA_NO_READCHECK")); - break; - case HA_EXTRA_READCHECK: /* Use readcheck (def) */ - DBUG_PRINT("info", ("HA_EXTRA_READCHECK")); - break; - case HA_EXTRA_KEYREAD: /* Read only key to database */ - DBUG_PRINT("info", ("HA_EXTRA_KEYREAD")); - break; - case HA_EXTRA_NO_KEYREAD: /* Normal read of records (def) */ - DBUG_PRINT("info", ("HA_EXTRA_NO_KEYREAD")); - break; - case HA_EXTRA_NO_USER_CHANGE: /* No user is allowed to write */ - DBUG_PRINT("info", ("HA_EXTRA_NO_USER_CHANGE")); - break; - case HA_EXTRA_KEY_CACHE: - DBUG_PRINT("info", ("HA_EXTRA_KEY_CACHE")); - break; - case HA_EXTRA_NO_KEY_CACHE: - DBUG_PRINT("info", ("HA_EXTRA_NO_KEY_CACHE")); - break; - case HA_EXTRA_WAIT_LOCK: /* Wait until file is avalably (def) */ - DBUG_PRINT("info", ("HA_EXTRA_WAIT_LOCK")); - break; - case HA_EXTRA_NO_WAIT_LOCK: /* If file is locked, return quickly */ - DBUG_PRINT("info", ("HA_EXTRA_NO_WAIT_LOCK")); - break; - case HA_EXTRA_WRITE_CACHE: /* Use write cache in ha_write() */ - DBUG_PRINT("info", ("HA_EXTRA_WRITE_CACHE")); - break; - case HA_EXTRA_FLUSH_CACHE: /* flush write_record_cache */ - DBUG_PRINT("info", ("HA_EXTRA_FLUSH_CACHE")); - break; - case HA_EXTRA_NO_KEYS: /* Remove all update of keys */ - DBUG_PRINT("info", ("HA_EXTRA_NO_KEYS")); - break; - case HA_EXTRA_KEYREAD_CHANGE_POS: /* Keyread, but change pos */ - DBUG_PRINT("info", ("HA_EXTRA_KEYREAD_CHANGE_POS")); /* xxxxchk -r must be used */ - break; - case HA_EXTRA_REMEMBER_POS: /* Remember pos for next/prev */ - DBUG_PRINT("info", ("HA_EXTRA_REMEMBER_POS")); - break; - case HA_EXTRA_RESTORE_POS: - DBUG_PRINT("info", ("HA_EXTRA_RESTORE_POS")); - break; - case HA_EXTRA_REINIT_CACHE: /* init cache from current record */ - DBUG_PRINT("info", ("HA_EXTRA_REINIT_CACHE")); - break; - case HA_EXTRA_FORCE_REOPEN: /* Datafile have changed on disk */ - DBUG_PRINT("info", ("HA_EXTRA_FORCE_REOPEN")); - break; - case HA_EXTRA_FLUSH: /* Flush tables to disk */ - DBUG_PRINT("info", ("HA_EXTRA_FLUSH")); - break; - case HA_EXTRA_NO_ROWS: /* Don't write rows */ - DBUG_PRINT("info", ("HA_EXTRA_NO_ROWS")); - break; - case HA_EXTRA_RESET_STATE: /* Reset positions */ - DBUG_PRINT("info", ("HA_EXTRA_RESET_STATE")); + if (m_part_info) + bitmap_clear_all(&m_part_info->used_partitions); break; case HA_EXTRA_IGNORE_DUP_KEY: /* Dup keys don't rollback everything*/ DBUG_PRINT("info", ("HA_EXTRA_IGNORE_DUP_KEY")); @@ -3022,34 +3151,8 @@ int ha_ndbcluster::extra(enum ha_extra_function operation) m_use_write= FALSE; m_ignore_dup_key= FALSE; break; - case HA_EXTRA_RETRIEVE_ALL_COLS: /* Retrieve all columns, not just those - where field->query_id is the same as - the current query id */ - DBUG_PRINT("info", ("HA_EXTRA_RETRIEVE_ALL_COLS")); - m_retrieve_all_fields= TRUE; - break; - case HA_EXTRA_PREPARE_FOR_DELETE: - DBUG_PRINT("info", ("HA_EXTRA_PREPARE_FOR_DELETE")); - break; - case HA_EXTRA_PREPARE_FOR_UPDATE: /* Remove read cache if problems */ - DBUG_PRINT("info", ("HA_EXTRA_PREPARE_FOR_UPDATE")); - break; - case HA_EXTRA_PRELOAD_BUFFER_SIZE: - DBUG_PRINT("info", ("HA_EXTRA_PRELOAD_BUFFER_SIZE")); - break; - case HA_EXTRA_RETRIEVE_PRIMARY_KEY: - DBUG_PRINT("info", ("HA_EXTRA_RETRIEVE_PRIMARY_KEY")); - m_retrieve_primary_key= TRUE; - break; - case HA_EXTRA_CHANGE_KEY_TO_UNIQUE: - DBUG_PRINT("info", ("HA_EXTRA_CHANGE_KEY_TO_UNIQUE")); - break; - case HA_EXTRA_CHANGE_KEY_TO_DUP: - DBUG_PRINT("info", ("HA_EXTRA_CHANGE_KEY_TO_DUP")); - case HA_EXTRA_KEYREAD_PRESERVE_FIELDS: - DBUG_PRINT("info", ("HA_EXTRA_KEYREAD_PRESERVE_FIELDS")); + default: break; - } DBUG_RETURN(0); @@ -3250,8 +3353,9 @@ int ha_ndbcluster::external_lock(THD *thd, int lock_type) Thd_ndb *thd_ndb= get_thd_ndb(thd); Ndb *ndb= thd_ndb->ndb; - DBUG_PRINT("enter", ("thd: %x, thd_ndb: %x, thd_ndb->lock_count: %d", - thd, thd_ndb, thd_ndb->lock_count)); + DBUG_PRINT("enter", ("this: %x thd: %lx thd_ndb: %lx " + "thd_ndb->lock_count: %d", + this, thd, thd_ndb, thd_ndb->lock_count)); if (lock_type != F_UNLCK) { @@ -3330,8 +3434,6 @@ int ha_ndbcluster::external_lock(THD *thd, int lock_type) DBUG_ASSERT(m_active_trans); // Start of transaction m_rows_changed= 0; - m_retrieve_all_fields= FALSE; - m_retrieve_primary_key= FALSE; m_ops_pending= 0; { NDBDICT *dict= ndb->getDictionary(); @@ -3469,8 +3571,6 @@ int ha_ndbcluster::start_stmt(THD *thd, thr_lock_type lock_type) m_active_trans= trans; // Start of statement - m_retrieve_all_fields= FALSE; - m_retrieve_primary_key= FALSE; m_ops_pending= 0; DBUG_RETURN(error); @@ -3516,7 +3616,8 @@ int ndbcluster_commit(THD *thd, bool all) while ((share= it++)) { pthread_mutex_lock(&share->mutex); - DBUG_PRINT("info", ("Invalidate commit_count for %s, share->commit_count: %d ", share->table_name, share->commit_count)); + DBUG_PRINT("info", ("Invalidate commit_count for %s, share->commit_count: %d ", + share->key, share->commit_count)); share->commit_count= 0; share->commit_count_lock++; pthread_mutex_unlock(&share->mutex); @@ -3847,53 +3948,7 @@ static int create_ndb_column(NDBCOL &col, /* Create a table in NDB Cluster - */ - -static void ndb_set_fragmentation(NDBTAB &tab, TABLE *form, uint pk_length) -{ - if (form->s->max_rows == (ha_rows) 0) /* default setting, don't set fragmentation */ - return; - /** - * get the number of fragments right - */ - uint no_fragments; - { -#if MYSQL_VERSION_ID >= 50000 - uint acc_row_size= 25 + /*safety margin*/ 2; -#else - uint acc_row_size= pk_length*4; - /* add acc overhead */ - if (pk_length <= 8) /* main page will set the limit */ - acc_row_size+= 25 + /*safety margin*/ 2; - else /* overflow page will set the limit */ - acc_row_size+= 4 + /*safety margin*/ 4; -#endif - ulonglong acc_fragment_size= 512*1024*1024; - ulonglong max_rows= form->s->max_rows; -#if MYSQL_VERSION_ID >= 50100 - no_fragments= (max_rows*acc_row_size)/acc_fragment_size+1; -#else - no_fragments= ((max_rows*acc_row_size)/acc_fragment_size+1 - +1/*correct rounding*/)/2; -#endif - } - { - uint no_nodes= g_ndb_cluster_connection->no_db_nodes(); - NDBTAB::FragmentType ftype; - if (no_fragments > 2*no_nodes) - { - ftype= NDBTAB::FragAllLarge; - if (no_fragments > 4*no_nodes) - push_warning(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR, - "Ndb might have problems storing the max amount of rows specified"); - } - else if (no_fragments > no_nodes) - ftype= NDBTAB::FragAllMedium; - else - ftype= NDBTAB::FragAllSmall; - tab.setFragmentType(ftype); - } -} +*/ int ha_ndbcluster::create(const char *name, TABLE *form, @@ -3908,10 +3963,13 @@ int ha_ndbcluster::create(const char *name, DBUG_ENTER("ha_ndbcluster::create"); DBUG_PRINT("enter", ("name: %s", name)); - fn_format(name2, name, "", "",2); // Remove the .frm extension + + strcpy(name2, name); + DBUG_ASSERT(*fn_rext((char*)name2) == 0); set_dbname(name2); set_tabname(name2); + table= form; if (create_from_engine) { /* @@ -3919,7 +3977,8 @@ int ha_ndbcluster::create(const char *name, caller. Do Ndb specific stuff, such as create a .ndb file */ - my_errno= write_ndb_file(); + if ((my_errno= write_ndb_file())) + DBUG_RETURN(my_errno); DBUG_RETURN(my_errno); } @@ -3933,7 +3992,7 @@ int ha_ndbcluster::create(const char *name, if (packfrm(data, length, &pack_data, &pack_length)) DBUG_RETURN(2); - DBUG_PRINT("info", ("setFrm data=%x, len=%d", pack_data, pack_length)); + DBUG_PRINT("info", ("setFrm data=%lx len=%d", pack_data, pack_length)); tab.setFrm(pack_data, pack_length); my_free((char*)data, MYF(0)); my_free((char*)pack_data, MYF(0)); @@ -3946,11 +4005,40 @@ int ha_ndbcluster::create(const char *name, field->pack_length())); if ((my_errno= create_ndb_column(col, field, info))) DBUG_RETURN(my_errno); + + if ( +#ifdef NDB_DISKDATA + info->store_on_disk || +#else + getenv("NDB_DEFAULT_DISK")) +#endif + col.setStorageType(NdbDictionary::Column::StorageTypeDisk); + else + col.setStorageType(NdbDictionary::Column::StorageTypeMemory); + tab.addColumn(col); if (col.getPrimaryKey()) pk_length += (field->pack_length() + 3) / 4; } - + + KEY* key_info; + for (i= 0, key_info= form->key_info; i < form->s->keys; i++, key_info++) + { + KEY_PART_INFO *key_part= key_info->key_part; + KEY_PART_INFO *end= key_part + key_info->key_parts; + for (; key_part != end; key_part++) + tab.getColumn(key_part->fieldnr-1)->setStorageType( + NdbDictionary::Column::StorageTypeMemory); + } + +#ifdef NDB_DISKDATA + if (info->store_on_disk) + if (info->tablespace) + tab.setTablespace(info->tablespace); + else + tab.setTablespace("DEFAULT-TS"); +#endif + // No primary key, create shadow key as 64 bit, auto increment if (form->s->primary_key == MAX_KEY) { @@ -3998,7 +4086,22 @@ int ha_ndbcluster::create(const char *name, } } - ndb_set_fragmentation(tab, form, pk_length); + // Check partition info + partition_info *part_info= form->part_info; + if (part_info) + { + int error; + if ((error= set_up_partition_info(part_info, form, (void*)&tab))) + { + DBUG_RETURN(error); + } + } + else + { + ndb_set_fragmentation(tab, form, pk_length); + } + + if ((my_errno= check_ndb_connection())) DBUG_RETURN(my_errno); @@ -4120,10 +4223,17 @@ int ha_ndbcluster::rename_table(const char *from, const char *to) // Change current database to that of target table set_dbname(to); ndb->setDatabaseName(m_dbname); - if (!(result= alter_table_name(new_tabname))) + + if ((result= alter_table_name(new_tabname))) + { + DBUG_RETURN(result); + } + + // Rename .ndb file + if ((result= handler::rename_table(from, to))) { - // Rename .ndb file - result= handler::rename_table(from, to); + // ToDo in 4.1 should rollback alter table... + DBUG_RETURN(result); } DBUG_RETURN(result); @@ -4139,7 +4249,7 @@ int ha_ndbcluster::alter_table_name(const char *to) Ndb *ndb= get_ndb(); NDBDICT *dict= ndb->getDictionary(); const NDBTAB *orig_tab= (const NDBTAB *) m_table; - DBUG_ENTER("alter_table_name_table"); + DBUG_ENTER("alter_table_name"); NdbDictionary::Table new_tab= *orig_tab; new_tab.setName(to); @@ -4158,6 +4268,38 @@ int ha_ndbcluster::alter_table_name(const char *to) */ +/* static version which does not need a handler */ + +int +ha_ndbcluster::delete_table(ha_ndbcluster *h, Ndb *ndb, + const char *path, + const char *db, + const char *table_name) +{ + DBUG_ENTER("ha_ndbcluster::ndbcluster_delete_table"); + NDBDICT *dict= ndb->getDictionary(); + + /* Drop the table from NDB */ + + int res; + if (h) + { + res= h->intern_drop_table(); + } + else + { + ndb->setDatabaseName(db); + res= dict->dropTable(table_name); + } + + if (res) + { + DBUG_RETURN(res); + } + + DBUG_RETURN(0); +} + int ha_ndbcluster::delete_table(const char *name) { DBUG_ENTER("ha_ndbcluster::delete_table"); @@ -4170,9 +4312,8 @@ int ha_ndbcluster::delete_table(const char *name) /* Call ancestor function to delete .ndb file */ handler::delete_table(name); - - /* Drop the table from NDB */ - DBUG_RETURN(drop_table()); + + DBUG_RETURN(delete_table(this, get_ndb(),name, m_dbname, m_tabname)); } @@ -4180,14 +4321,13 @@ int ha_ndbcluster::delete_table(const char *name) Drop table in NDB Cluster */ -int ha_ndbcluster::drop_table() +int ha_ndbcluster::intern_drop_table() { Ndb *ndb= get_ndb(); NdbDictionary::Dictionary *dict= ndb->getDictionary(); - DBUG_ENTER("drop_table"); + DBUG_ENTER("intern_drop_table"); DBUG_PRINT("enter", ("Deleting %s", m_tabname)); - release_metadata(); if (dict->dropTable(m_tabname)) ERR_RETURN(dict->getNdbError()); @@ -4238,26 +4378,31 @@ ulonglong ha_ndbcluster::get_auto_increment() Constructor for the NDB Cluster table handler */ -ha_ndbcluster::ha_ndbcluster(TABLE *table_arg): +#define HA_NDBCLUSTER_TABLE_FLAGS \ + HA_REC_NOT_IN_SEQ | \ + HA_NULL_IN_KEY | \ + HA_AUTO_PART_KEY | \ + HA_NO_PREFIX_CHAR_KEYS | \ + HA_NEED_READ_RANGE_BUFFER | \ + HA_CAN_GEOMETRY | \ + HA_CAN_BIT_FIELD + +ha_ndbcluster::ha_ndbcluster(TABLE_SHARE *table_arg): handler(&ndbcluster_hton, table_arg), m_active_trans(NULL), m_active_cursor(NULL), m_table(NULL), m_table_version(-1), m_table_info(NULL), - m_table_flags(HA_REC_NOT_IN_SEQ | - HA_NULL_IN_KEY | - HA_AUTO_PART_KEY | - HA_NO_PREFIX_CHAR_KEYS | - HA_NEED_READ_RANGE_BUFFER | - HA_CAN_GEOMETRY | - HA_CAN_BIT_FIELD), + m_table_flags(HA_NDBCLUSTER_TABLE_FLAGS), m_share(0), + m_part_info(NULL), + m_use_partition_function(FALSE), + m_sorted(FALSE), m_use_write(FALSE), m_ignore_dup_key(FALSE), m_primary_key_update(FALSE), - m_retrieve_all_fields(FALSE), - m_retrieve_primary_key(FALSE), + m_ignore_no_key(FALSE), m_rows_to_insert((ha_rows) 1), m_rows_inserted((ha_rows) 0), m_bulk_insert_rows((ha_rows) 1024), @@ -4292,6 +4437,10 @@ ha_ndbcluster::ha_ndbcluster(TABLE *table_arg): m_index[i].unique_index= NULL; m_index[i].index= NULL; m_index[i].unique_index_attrid_map= NULL; + m_index[i].index_stat=NULL; + m_index[i].index_stat_cache_entries=0; + m_index[i].index_stat_update_freq=0; + m_index[i].index_stat_query_count=0; } DBUG_VOID_RETURN; @@ -4307,7 +4456,9 @@ ha_ndbcluster::~ha_ndbcluster() DBUG_ENTER("~ha_ndbcluster"); if (m_share) - free_share(m_share); + { + free_share(&m_share); + } release_metadata(); my_free(m_blobs_buffer, MYF(MY_ALLOW_ZERO_PTR)); m_blobs_buffer= 0; @@ -4333,18 +4484,24 @@ ha_ndbcluster::~ha_ndbcluster() Open a table for further use - fetch metadata for this table from NDB - check that table exists + + RETURN + 0 ok + < 0 Table has changed */ int ha_ndbcluster::open(const char *name, int mode, uint test_if_locked) { int res; KEY *key; - DBUG_ENTER("open"); - DBUG_PRINT("enter", ("name: %s mode: %d test_if_locked: %d", + DBUG_ENTER("ha_ndbcluster::open"); + DBUG_PRINT("enter", ("name: %s mode: %d test_if_locked: %d", name, mode, test_if_locked)); - // Setup ref_length to make room for the whole - // primary key to be written in the ref variable + /* + Setup ref_length to make room for the whole + primary key to be written in the ref variable + */ if (table->s->primary_key != MAX_KEY) { @@ -4361,7 +4518,8 @@ int ha_ndbcluster::open(const char *name, int mode, uint test_if_locked) set_tabname(name); if (check_ndb_connection()) { - free_share(m_share); m_share= 0; + free_share(&m_share); + m_share= 0; DBUG_RETURN(HA_ERR_NO_CONNECTION); } @@ -4369,9 +4527,18 @@ int ha_ndbcluster::open(const char *name, int mode, uint test_if_locked) if (!res) info(HA_STATUS_VARIABLE | HA_STATUS_CONST); + DBUG_RETURN(res); } +void ha_ndbcluster::set_part_info(partition_info *part_info) +{ + m_part_info= part_info; + if (!(m_part_info->part_type == HASH_PARTITION && + m_part_info->list_of_part_fields && + !is_sub_partitioned(m_part_info))) + m_use_partition_function= TRUE; +} /* Close the table @@ -4381,7 +4548,8 @@ int ha_ndbcluster::open(const char *name, int mode, uint test_if_locked) int ha_ndbcluster::close(void) { DBUG_ENTER("close"); - free_share(m_share); m_share= 0; + free_share(&m_share); + m_share= 0; release_metadata(); DBUG_RETURN(0); } @@ -4490,7 +4658,7 @@ int ndbcluster_discover(THD* thd, const char *db, const char *name, if (!(tab= dict->getTable(name))) { const NdbError err= dict->getNdbError(); - if (err.code == 709) + if (err.code == 709 || err.code == 723) DBUG_RETURN(-1); ERR_RETURN(err); } @@ -4537,7 +4705,7 @@ int ndbcluster_table_exists_in_engine(THD* thd, const char *db, const char *name if (!(tab= dict->getTable(name))) { const NdbError err= dict->getNdbError(); - if (err.code == 709) + if (err.code == 709 || err.code == 723) DBUG_RETURN(0); ERR_RETURN(err); } @@ -4558,9 +4726,10 @@ extern "C" byte* tables_get_key(const char *entry, uint *length, /* Drop a database in NDB Cluster - */ + NOTE add a dummy void function, since stupid handlerton is returning void instead of int... +*/ -int ndbcluster_drop_database(const char *path) +int ndbcluster_drop_database_impl(const char *path) { DBUG_ENTER("ndbcluster_drop_database"); THD *thd= current_thd; @@ -4575,33 +4744,37 @@ int ndbcluster_drop_database(const char *path) DBUG_PRINT("enter", ("db: %s", dbname)); if (!(ndb= check_ndb_in_thd(thd))) - DBUG_RETURN(HA_ERR_NO_CONNECTION); + DBUG_RETURN(-1); // List tables in NDB NDBDICT *dict= ndb->getDictionary(); if (dict->listObjects(list, NdbDictionary::Object::UserTable) != 0) - ERR_RETURN(dict->getNdbError()); + DBUG_RETURN(-1); for (i= 0 ; i < list.count ; i++) { - NdbDictionary::Dictionary::List::Element& t= list.elements[i]; - DBUG_PRINT("info", ("Found %s/%s in NDB", t.database, t.name)); + NdbDictionary::Dictionary::List::Element& elmt= list.elements[i]; + DBUG_PRINT("info", ("Found %s/%s in NDB", elmt.database, elmt.name)); // Add only tables that belongs to db - if (my_strcasecmp(system_charset_info, t.database, dbname)) + if (my_strcasecmp(system_charset_info, elmt.database, dbname)) continue; - DBUG_PRINT("info", ("%s must be dropped", t.name)); - drop_list.push_back(thd->strdup(t.name)); + DBUG_PRINT("info", ("%s must be dropped", elmt.name)); + drop_list.push_back(thd->strdup(elmt.name)); } // Drop any tables belonging to database + char full_path[FN_REFLEN]; + char *tmp= strxnmov(full_path, FN_REFLEN-1, share_prefix, dbname, "/", + NullS); ndb->setDatabaseName(dbname); List_iterator_fast<char> it(drop_list); while ((tabname=it++)) { - if (dict->dropTable(tabname)) + strxnmov(tmp, FN_REFLEN - (tmp - full_path)-1, tabname, NullS); + if (ha_ndbcluster::delete_table(0, ndb, full_path, dbname, tabname)) { const NdbError err= dict->getNdbError(); - if (err.code != 709) + if (err.code != 709 && err.code != 723) { ERR_PRINT(err); ret= ndb_to_mysql_error(&err); @@ -4611,6 +4784,96 @@ int ndbcluster_drop_database(const char *path) DBUG_RETURN(ret); } +void ndbcluster_drop_database(char *path) +{ + ndbcluster_drop_database_impl(path); +} +/* + find all tables in ndb and discover those needed +*/ +static int ndbcluster_find_all_files(THD *thd) +{ + DBUG_ENTER("ndbcluster_find_all_files"); + Ndb* ndb; + char key[FN_REFLEN]; + NdbDictionary::Dictionary::List list; + + if (!(ndb= check_ndb_in_thd(thd))) + DBUG_RETURN(HA_ERR_NO_CONNECTION); + + NDBDICT *dict= ndb->getDictionary(); + + int unhandled, retries= 5; + do + { + if (dict->listObjects(list, NdbDictionary::Object::UserTable) != 0) + ERR_RETURN(dict->getNdbError()); + unhandled= 0; + for (uint i= 0 ; i < list.count ; i++) + { + NDBDICT::List::Element& elmt= list.elements[i]; + DBUG_PRINT("info", ("Found %s.%s in NDB", elmt.database, elmt.name)); + if (!(elmt.state == NDBOBJ::StateBuilding || + elmt.state == NDBOBJ::StateOnline)) + { + sql_print_information("NDB: skipping setup table %s.%s, in state %d", + elmt.database, elmt.name, elmt.state); + continue; + } + + ndb->setDatabaseName(elmt.database); + const NDBTAB *ndbtab; + + if (!(ndbtab= dict->getTable(elmt.name))) + { + sql_print_error("NDB: failed to setup table %s.%s, error: %d, %s", + elmt.database, elmt.name, + dict->getNdbError().code, + dict->getNdbError().message); + unhandled++; + continue; + } + + if (ndbtab->getFrmLength() == 0) + continue; + + strxnmov(key, FN_LEN-1, mysql_data_home, "/", + elmt.database, "/", elmt.name, NullS); + const void *data= 0, *pack_data= 0; + uint length, pack_length; + int discover= 0; + if (readfrm(key, &data, &length) || + packfrm(data, length, &pack_data, &pack_length)) + { + discover= 1; + sql_print_information("NDB: missing frm for %s.%s, discovering...", + elmt.database, elmt.name); + } + else if (cmp_frm(ndbtab, pack_data, pack_length)) + { + discover= 1; + sql_print_information("NDB: mismatch in frm for %s.%s, discovering...", + elmt.database, elmt.name); + } + my_free((char*) data, MYF(MY_ALLOW_ZERO_PTR)); + my_free((char*) pack_data, MYF(MY_ALLOW_ZERO_PTR)); + + if (discover) + { + /* ToDo 4.1 database needs to be created if missing */ + pthread_mutex_lock(&LOCK_open); + if (ha_create_table_from_engine(thd, elmt.database, elmt.name)) + { + /* ToDo 4.1 handle error */ + } + pthread_mutex_unlock(&LOCK_open); + } + } + } + while (unhandled && retries--); + + DBUG_RETURN(0); +} int ndbcluster_find_files(THD *thd,const char *db,const char *path, const char *wild, bool dir, List<char> *files) @@ -4622,7 +4885,7 @@ int ndbcluster_find_files(THD *thd,const char *db,const char *path, Ndb* ndb; char name[FN_REFLEN]; HASH ndb_tables, ok_tables; - NdbDictionary::Dictionary::List list; + NDBDICT::List list; if (!(ndb= check_ndb_in_thd(thd))) DBUG_RETURN(HA_ERR_NO_CONNECTION); @@ -4653,11 +4916,11 @@ int ndbcluster_find_files(THD *thd,const char *db,const char *path, for (i= 0 ; i < list.count ; i++) { - NdbDictionary::Dictionary::List::Element& t= list.elements[i]; - DBUG_PRINT("info", ("Found %s/%s in NDB", t.database, t.name)); + NDBDICT::List::Element& elmt= list.elements[i]; + DBUG_PRINT("info", ("Found %s/%s in NDB", elmt.database, elmt.name)); // Add only tables that belongs to db - if (my_strcasecmp(system_charset_info, t.database, db)) + if (my_strcasecmp(system_charset_info, elmt.database, db)) continue; // Apply wildcard to list of tables in NDB @@ -4665,14 +4928,14 @@ int ndbcluster_find_files(THD *thd,const char *db,const char *path, { if (lower_case_table_names) { - if (wild_case_compare(files_charset_info, t.name, wild)) + if (wild_case_compare(files_charset_info, elmt.name, wild)) continue; } - else if (wild_compare(t.name,wild,0)) + else if (wild_compare(elmt.name,wild,0)) continue; } - DBUG_PRINT("info", ("Inserting %s into ndb_tables hash", t.name)); - my_hash_insert(&ndb_tables, (byte*)thd->strdup(t.name)); + DBUG_PRINT("info", ("Inserting %s into ndb_tables hash", elmt.name)); + my_hash_insert(&ndb_tables, (byte*)thd->strdup(elmt.name)); } char *file_name; @@ -4690,7 +4953,7 @@ int ndbcluster_find_files(THD *thd,const char *db,const char *path, } // File is not in NDB, check for .ndb file with this name - (void)strxnmov(name, FN_REFLEN, + (void)strxnmov(name, FN_REFLEN-1, mysql_data_home,"/",db,"/",file_name,ha_ndb_ext,NullS); DBUG_PRINT("info", ("Check access for %s", name)); if (access(name, F_OK)) @@ -4720,10 +4983,15 @@ int ndbcluster_find_files(THD *thd,const char *db,const char *path, file_name= hash_element(&ndb_tables, i); if (!hash_search(&ok_tables, file_name, strlen(file_name))) { - DBUG_PRINT("info", ("%s must be discovered", file_name)); - // File is in list of ndb tables and not in ok_tables - // This table need to be created - create_list.push_back(thd->strdup(file_name)); + strxnmov(name, sizeof(name)-1, + mysql_data_home, "/", db, "/", file_name, reg_ext, NullS); + if (access(name, F_OK)) + { + DBUG_PRINT("info", ("%s must be discovered", file_name)); + // File is in list of ndb tables and not in ok_tables + // This table need to be created + create_list.push_back(thd->strdup(file_name)); + } } } @@ -4761,7 +5029,7 @@ int ndbcluster_find_files(THD *thd,const char *db,const char *path, files->push_back(thd->strdup(file_name)); } - pthread_mutex_unlock(&LOCK_open); + pthread_mutex_unlock(&LOCK_open); hash_free(&ok_tables); hash_free(&ndb_tables); @@ -4779,6 +5047,7 @@ int ndbcluster_find_files(THD *thd,const char *db,const char *path, static int connect_callback() { update_status_variables(g_ndb_cluster_connection); + pthread_cond_signal(&COND_ndb_util_thread); return 0; } @@ -4890,7 +5159,7 @@ ndbcluster_init_error: ndbcluster_init() */ -bool ndbcluster_end() +int ndbcluster_end(ha_panic_function type) { DBUG_ENTER("ndbcluster_end"); @@ -4929,6 +5198,7 @@ bool ndbcluster_end() pthread_mutex_destroy(&LOCK_ndb_util_thread); pthread_cond_destroy(&COND_ndb_util_thread); ndbcluster_inited= 0; + ndbcluster_util_inited= 0; DBUG_RETURN(0); } @@ -4941,11 +5211,13 @@ bool ndbcluster_end() void ndbcluster_print_error(int error, const NdbOperation *error_op) { DBUG_ENTER("ndbcluster_print_error"); - TABLE tab; + TABLE_SHARE share; const char *tab_name= (error_op) ? error_op->getTableName() : ""; - tab.alias= (char *) tab_name; - ha_ndbcluster error_handler(&tab); - tab.file= &error_handler; + share.db.str= (char*) ""; + share.db.length= 0; + share.table_name.str= (char *) tab_name; + share.table_name.length= strlen(tab_name); + ha_ndbcluster error_handler(&share); error_handler.print_error(error, MYF(0)); DBUG_VOID_RETURN; } @@ -5053,6 +5325,84 @@ ha_ndbcluster::records_in_range(uint inx, key_range *min_key, (max_key && max_key->length == key_length))) DBUG_RETURN(1); + if ((idx_type == PRIMARY_KEY_ORDERED_INDEX || + idx_type == UNIQUE_ORDERED_INDEX || + idx_type == ORDERED_INDEX) && + m_index[inx].index_stat != NULL) + { + NDB_INDEX_DATA& d=m_index[inx]; + NDBINDEX* index=(NDBINDEX*)d.index; + Ndb* ndb=get_ndb(); + NdbTransaction* trans=NULL; + NdbIndexScanOperation* op=NULL; + int res=0; + Uint64 rows; + + do + { + // We must provide approx table rows + Uint64 table_rows=0; + Ndb_local_table_statistics *info= + (Ndb_local_table_statistics *)m_table_info; + if (info->records != ~(ha_rows)0 && info->records != 0) + { + table_rows = info->records; + DBUG_PRINT("info", ("use info->records: %llu", table_rows)); + } + else + { + Ndb_statistics stat; + if ((res=ndb_get_table_statistics(ndb, m_tabname, &stat)) != 0) + break; + table_rows=stat.row_count; + DBUG_PRINT("info", ("use db row_count: %llu", table_rows)); + if (table_rows == 0) { + // Problem if autocommit=0 +#ifdef ndb_get_table_statistics_uses_active_trans + rows=0; + break; +#endif + } + } + + // Define scan op for the range + if ((trans=m_active_trans) == NULL) + { + DBUG_PRINT("info", ("no active trans")); + if (! (trans=ndb->startTransaction())) + ERR_BREAK(ndb->getNdbError(), res); + } + if (! (op=trans->getNdbIndexScanOperation(index, (NDBTAB*)m_table))) + ERR_BREAK(trans->getNdbError(), res); + if ((op->readTuples(NdbOperation::LM_CommittedRead)) == -1) + ERR_BREAK(op->getNdbError(), res); + const key_range *keys[2]={ min_key, max_key }; + if ((res=set_bounds(op, inx, true, keys)) != 0) + break; + + // Decide if db should be contacted + int flags=0; + if (d.index_stat_query_count < d.index_stat_cache_entries || + (d.index_stat_update_freq != 0 && + d.index_stat_query_count % d.index_stat_update_freq == 0)) + { + DBUG_PRINT("info", ("force stat from db")); + flags|=NdbIndexStat::RR_UseDb; + } + if (d.index_stat->records_in_range(index, op, table_rows, &rows, flags) == -1) + ERR_BREAK(d.index_stat->getNdbError(), res); + d.index_stat_query_count++; + } while (0); + + if (trans != m_active_trans && rows == 0) + rows = 1; + if (trans != m_active_trans && trans != NULL) + ndb->closeTransaction(trans); + if (res != 0) + DBUG_RETURN(HA_POS_ERROR); + DBUG_RETURN(rows); + } + DBUG_RETURN(10); /* Good guess when you don't know anything */ } @@ -5065,7 +5415,7 @@ ulong ha_ndbcluster::table_flags(void) const } const char * ha_ndbcluster::table_type() const { - return("ndbcluster"); + return("NDBCLUSTER"); } uint ha_ndbcluster::max_supported_record_length() const { @@ -5119,11 +5469,11 @@ uint8 ha_ndbcluster::table_cache_type() uint ndb_get_commitcount(THD *thd, char *dbname, char *tabname, Uint64 *commit_count) { - DBUG_ENTER("ndb_get_commitcount"); - char name[FN_REFLEN]; NDB_SHARE *share; - (void)strxnmov(name, FN_REFLEN, "./",dbname,"/",tabname,NullS); + DBUG_ENTER("ndb_get_commitcount"); + + (void)strxnmov(name, FN_REFLEN-1, share_prefix, dbname, "/", tabname, NullS); DBUG_PRINT("enter", ("name: %s", name)); pthread_mutex_lock(&ndbcluster_mutex); if (!(share=(NDB_SHARE*) hash_search(&ndbcluster_open_tables, @@ -5131,8 +5481,7 @@ uint ndb_get_commitcount(THD *thd, char *dbname, char *tabname, strlen(name)))) { pthread_mutex_unlock(&ndbcluster_mutex); - DBUG_PRINT("info", ("Table %s not found in ndbcluster_open_tables", - name)); + DBUG_PRINT("info", ("Table %s not found in ndbcluster_open_tables", name)); DBUG_RETURN(1); } share->use_count++; @@ -5147,7 +5496,7 @@ uint ndb_get_commitcount(THD *thd, char *dbname, char *tabname, DBUG_PRINT("info", ("Getting commit_count: %llu from share", share->commit_count)); pthread_mutex_unlock(&share->mutex); - free_share(share); + free_share(&share); DBUG_RETURN(0); } } @@ -5162,7 +5511,7 @@ uint ndb_get_commitcount(THD *thd, char *dbname, char *tabname, struct Ndb_statistics stat; if (ndb_get_table_statistics(ndb, tabname, &stat)) { - free_share(share); + free_share(&share); DBUG_RETURN(1); } @@ -5179,7 +5528,7 @@ uint ndb_get_commitcount(THD *thd, char *dbname, char *tabname, *commit_count= 0; } pthread_mutex_unlock(&share->mutex); - free_share(share); + free_share(&share); DBUG_RETURN(0); } @@ -5316,6 +5665,60 @@ ha_ndbcluster::register_query_cache_table(THD *thd, } +#ifndef DBUG_OFF +static void dbug_print_table(const char *info, TABLE *table) +{ + if (table == 0) + { + DBUG_PRINT("info",("%s: (null)", info)); + return; + } + DBUG_PRINT("info", + ("%s: %s.%s s->fields: %d " + "reclength: %d rec_buff_length: %d record[0]: %lx " + "record[1]: %lx", + info, + table->s->db, + table->s->table_name, + table->s->fields, + table->s->reclength, + table->s->rec_buff_length, + table->record[0], + table->record[1])); + + for (unsigned int i= 0; i < table->s->fields; i++) + { + Field *f= table->field[i]; + DBUG_PRINT("info", + ("[%d] \"%s\"(0x%lx:%s%s%s%s%s%s) type: %d pack_length: %d " + "ptr: 0x%lx[+%d] null_bit: %u null_ptr: 0x%lx[+%d]", + i, + f->field_name, + f->flags, + (f->flags & PRI_KEY_FLAG) ? "pri" : "attr", + (f->flags & NOT_NULL_FLAG) ? "" : ",nullable", + (f->flags & UNSIGNED_FLAG) ? ",unsigned" : ",signed", + (f->flags & ZEROFILL_FLAG) ? ",zerofill" : "", + (f->flags & BLOB_FLAG) ? ",blob" : "", + (f->flags & BINARY_FLAG) ? ",binary" : "", + f->real_type(), + f->pack_length(), + f->ptr, f->ptr - table->record[0], + f->null_bit, + f->null_ptr, (byte*) f->null_ptr - table->record[0])); + if (f->type() == MYSQL_TYPE_BIT) + { + Field_bit *g= (Field_bit*) f; + DBUG_PRINT("MYSQL_TYPE_BIT",("field_length: %d bit_ptr: 0x%lx[+%d] " + "bit_ofs: %u bit_len: %u", + g->field_length, g->bit_ptr, + (byte*) g->bit_ptr-table->record[0], + g->bit_ofs, g->bit_len)); + } + } +} +#endif + /* Handling the shared NDB_SHARE structure that is needed to provide table locking. @@ -5324,68 +5727,195 @@ ha_ndbcluster::register_query_cache_table(THD *thd, data we want to or can share. */ -static byte* ndbcluster_get_key(NDB_SHARE *share,uint *length, +static byte *ndbcluster_get_key(NDB_SHARE *share,uint *length, my_bool not_used __attribute__((unused))) { - *length=share->table_name_length; - return (byte*) share->table_name; + *length= share->key_length; + return (byte*) share->key; } -static NDB_SHARE* get_share(const char *table_name) +#ifndef DBUG_OFF +static void dbug_print_open_tables() +{ + DBUG_ENTER("dbug_print_open_tables"); + for (uint i= 0; i < ndbcluster_open_tables.records; i++) + { + NDB_SHARE *share= (NDB_SHARE*) hash_element(&ndbcluster_open_tables, i); + DBUG_PRINT("share", + ("[%d] 0x%lx key: %s key_length: %d", + i, share, share->key, share->key_length)); + DBUG_PRINT("share", + ("db.tablename: %s.%s use_count: %d commit_count: %d", + share->db, share->table_name, + share->use_count, share->commit_count)); + } + DBUG_VOID_RETURN; +} +#else +#define dbug_print_open_tables() +#endif + +/* + Increase refcount on existing share. + Always returns share and cannot fail. +*/ +static NDB_SHARE *get_share(NDB_SHARE *share) { - NDB_SHARE *share; pthread_mutex_lock(&ndbcluster_mutex); - uint length=(uint) strlen(table_name); - if (!(share=(NDB_SHARE*) hash_search(&ndbcluster_open_tables, - (byte*) table_name, - length))) + share->use_count++; + + dbug_print_open_tables(); + + DBUG_PRINT("get_share", + ("0x%lx key: %s key_length: %d", + share, share->key, share->key_length)); + DBUG_PRINT("get_share", + ("db.tablename: %s.%s use_count: %d commit_count: %d", + share->db, share->table_name, + share->use_count, share->commit_count)); + pthread_mutex_unlock(&ndbcluster_mutex); + return share; +} + +/* + Get a share object for key + + Returns share for key, and increases the refcount on the share. + + create_if_not_exists == TRUE: + creates share if it does not alreade exist + returns 0 only due to out of memory, and then sets my_error + + create_if_not_exists == FALSE: + returns 0 if share does not exist + + have_lock == TRUE, pthread_mutex_lock(&ndbcluster_mutex) already taken +*/ +static NDB_SHARE *get_share(const char *key, bool create_if_not_exists, + bool have_lock) +{ + NDB_SHARE *share; + if (!have_lock) + pthread_mutex_lock(&ndbcluster_mutex); + uint length= (uint) strlen(key); + if (!(share= (NDB_SHARE*) hash_search(&ndbcluster_open_tables, + (byte*) key, + length))) { - if ((share=(NDB_SHARE *) my_malloc(sizeof(*share)+length+1, + if (!create_if_not_exists) + { + DBUG_PRINT("error", ("get_share: %s does not exist", key)); + if (!have_lock) + pthread_mutex_unlock(&ndbcluster_mutex); + return 0; + } + if ((share= (NDB_SHARE*) my_malloc(sizeof(*share), MYF(MY_WME | MY_ZEROFILL)))) { - share->table_name_length=length; - share->table_name=(char*) (share+1); - strmov(share->table_name,table_name); + MEM_ROOT **root_ptr= + my_pthread_getspecific_ptr(MEM_ROOT**, THR_MALLOC); + MEM_ROOT *old_root= *root_ptr; + init_sql_alloc(&share->mem_root, 1024, 0); + *root_ptr= &share->mem_root; // remember to reset before return + + /* enough space for key, db, and table_name */ + share->key= alloc_root(*root_ptr, 2 * (length + 1)); + share->key_length= length; + strmov(share->key, key); if (my_hash_insert(&ndbcluster_open_tables, (byte*) share)) { - pthread_mutex_unlock(&ndbcluster_mutex); - my_free((gptr) share,0); + free_root(&share->mem_root, MYF(0)); + my_free((gptr) share, 0); + *root_ptr= old_root; + if (!have_lock) + pthread_mutex_unlock(&ndbcluster_mutex); return 0; } thr_lock_init(&share->lock); - pthread_mutex_init(&share->mutex,MY_MUTEX_INIT_FAST); + pthread_mutex_init(&share->mutex, MY_MUTEX_INIT_FAST); share->commit_count= 0; share->commit_count_lock= 0; + share->db= share->key + length + 1; + ha_ndbcluster::set_dbname(key, share->db); + share->table_name= share->db + strlen(share->db) + 1; + ha_ndbcluster::set_tabname(key, share->table_name); + *root_ptr= old_root; } else { - DBUG_PRINT("error", ("Failed to alloc share")); - pthread_mutex_unlock(&ndbcluster_mutex); + DBUG_PRINT("error", ("get_share: failed to alloc share")); + if (!have_lock) + pthread_mutex_unlock(&ndbcluster_mutex); + my_error(ER_OUTOFMEMORY, MYF(0), sizeof(*share)); return 0; } } share->use_count++; - DBUG_PRINT("share", - ("table_name: %s, length: %d, use_count: %d, commit_count: %d", - share->table_name, share->table_name_length, share->use_count, - share->commit_count)); - pthread_mutex_unlock(&ndbcluster_mutex); + dbug_print_open_tables(); + + DBUG_PRINT("get_share", + ("0x%lx key: %s key_length: %d key: %s", + share, share->key, share->key_length, key)); + DBUG_PRINT("get_share", + ("db.tablename: %s.%s use_count: %d commit_count: %d", + share->db, share->table_name, + share->use_count, share->commit_count)); + if (!have_lock) + pthread_mutex_unlock(&ndbcluster_mutex); return share; } +static void real_free_share(NDB_SHARE **share) +{ + DBUG_PRINT("real_free_share", + ("0x%lx key: %s key_length: %d", + (*share), (*share)->key, (*share)->key_length)); + DBUG_PRINT("real_free_share", + ("db.tablename: %s.%s use_count: %d commit_count: %d", + (*share)->db, (*share)->table_name, + (*share)->use_count, (*share)->commit_count)); + + hash_delete(&ndbcluster_open_tables, (byte*) *share); + thr_lock_delete(&(*share)->lock); + pthread_mutex_destroy(&(*share)->mutex); + free_root(&(*share)->mem_root, MYF(0)); + + my_free((gptr) *share, MYF(0)); + *share= 0; + + dbug_print_open_tables(); +} + +/* + decrease refcount of share + calls real_free_share when refcount reaches 0 -static void free_share(NDB_SHARE *share) + have_lock == TRUE, pthread_mutex_lock(&ndbcluster_mutex) already taken +*/ +static void free_share(NDB_SHARE **share, bool have_lock) { - pthread_mutex_lock(&ndbcluster_mutex); - if (!--share->use_count) + if (!have_lock) + pthread_mutex_lock(&ndbcluster_mutex); + if ((*share)->util_lock == current_thd) + (*share)->util_lock= 0; + if (!--(*share)->use_count) { - hash_delete(&ndbcluster_open_tables, (byte*) share); - thr_lock_delete(&share->lock); - pthread_mutex_destroy(&share->mutex); - my_free((gptr) share, MYF(0)); + real_free_share(share); } - pthread_mutex_unlock(&ndbcluster_mutex); + else + { + dbug_print_open_tables(); + DBUG_PRINT("free_share", + ("0x%lx key: %s key_length: %d", + *share, (*share)->key, (*share)->key_length)); + DBUG_PRINT("free_share", + ("db.tablename: %s.%s use_count: %d commit_count: %d", + (*share)->db, (*share)->table_name, + (*share)->use_count, (*share)->commit_count)); + } + if (!have_lock) + pthread_mutex_unlock(&ndbcluster_mutex); } @@ -5416,14 +5946,14 @@ static int packfrm(const void *data, uint len, uint blob_len; frm_blob_struct* blob; DBUG_ENTER("packfrm"); - DBUG_PRINT("enter", ("data: %x, len: %d", data, len)); + DBUG_PRINT("enter", ("data: 0x%lx len: %d", data, len)); error= 1; org_len= len; if (my_compress((byte*)data, &org_len, &comp_len)) goto err; - DBUG_PRINT("info", ("org_len: %d, comp_len: %d", org_len, comp_len)); + DBUG_PRINT("info", ("org_len: %d comp_len: %d", org_len, comp_len)); DBUG_DUMP("compressed", (char*)data, org_len); error= 2; @@ -5443,7 +5973,8 @@ static int packfrm(const void *data, uint len, *pack_len= blob_len; error= 0; - DBUG_PRINT("exit", ("pack_data: %x, pack_len: %d", *pack_data, *pack_len)); + DBUG_PRINT("exit", + ("pack_data: 0x%lx pack_len: %d", *pack_data, *pack_len)); err: DBUG_RETURN(error); @@ -5457,7 +5988,7 @@ static int unpackfrm(const void **unpack_data, uint *unpack_len, byte *data; ulong complen, orglen, ver; DBUG_ENTER("unpackfrm"); - DBUG_PRINT("enter", ("pack_data: %x", pack_data)); + DBUG_PRINT("enter", ("pack_data: 0x%lx", pack_data)); complen= uint4korr((char*)&blob->head.complen); orglen= uint4korr((char*)&blob->head.orglen); @@ -5482,7 +6013,7 @@ static int unpackfrm(const void **unpack_data, uint *unpack_len, *unpack_data= data; *unpack_len= complen; - DBUG_PRINT("exit", ("frmdata: %x, len: %d", *unpack_data, *unpack_len)); + DBUG_PRINT("exit", ("frmdata: 0x%lx, len: %d", *unpack_data, *unpack_len)); DBUG_RETURN(0); } @@ -5495,11 +6026,10 @@ ndb_get_table_statistics(Ndb* ndb, const char * table, DBUG_ENTER("ndb_get_table_statistics"); DBUG_PRINT("enter", ("table: %s", table)); NdbTransaction* pTrans= ndb->startTransaction(); + if (pTrans == NULL) + ERR_RETURN(ndb->getNdbError()); do { - if (pTrans == NULL) - break; - NdbScanOperation* pOp= pTrans->getNdbScanOperation(table); if (pOp == NULL) break; @@ -5579,7 +6109,7 @@ int ha_ndbcluster::write_ndb_file() DBUG_ENTER("write_ndb_file"); DBUG_PRINT("enter", ("db: %s, name: %s", m_dbname, m_tabname)); - (void)strxnmov(path, FN_REFLEN, + (void)strxnmov(path, FN_REFLEN-1, mysql_data_home,"/",m_dbname,"/",m_tabname,ha_ndb_ext,NullS); if ((file=my_create(path, CREATE_MODE,O_RDWR | O_TRUNC,MYF(MY_WME))) >= 0) @@ -5599,6 +6129,7 @@ ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p, HANDLER_BUFFER *buffer) { DBUG_ENTER("ha_ndbcluster::read_multi_range_first"); + m_write_op= FALSE; int res; KEY* key_info= table->key_info + active_index; @@ -5606,7 +6137,7 @@ ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p, ulong reclength= table->s->reclength; NdbOperation* op; - if (uses_blob_value(m_retrieve_all_fields)) + if (uses_blob_value()) { /** * blobs can't be batched currently @@ -5658,12 +6189,29 @@ ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p, for (; multi_range_curr<multi_range_end && curr+reclength <= end_of_buffer; multi_range_curr++) { - switch (index_type){ + part_id_range part_spec; + if (m_use_partition_function) + { + get_partition_set(table, curr, active_index, + &multi_range_curr->start_key, + &part_spec); + if (part_spec.start_part > part_spec.end_part) + { + /* + We can skip this partition since the key won't fit into any + partition + */ + curr += reclength; + multi_range_curr->range_flag |= SKIP_RANGE; + continue; + } + } + switch(index_type){ case PRIMARY_KEY_ORDERED_INDEX: if (!(multi_range_curr->start_key.length == key_info->key_length && - multi_range_curr->start_key.flag == HA_READ_KEY_EXACT)) - goto range; - /* fall through */ + multi_range_curr->start_key.flag == HA_READ_KEY_EXACT)) + goto range; + // else fall through case PRIMARY_KEY_INDEX: { multi_range_curr->range_flag |= UNIQUE_RANGE; @@ -5671,7 +6219,9 @@ ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p, !op->readTuple(lm) && !set_primary_key(op, multi_range_curr->start_key.key) && !define_read_attrs(curr, op) && - (op->setAbortOption(AO_IgnoreError), TRUE)) + (op->setAbortOption(AO_IgnoreError), TRUE) && + (!m_use_partition_function || + (op->setPartitionId(part_spec.start_part), true))) curr += reclength; else ERR_RETURN(op ? op->getNdbError() : m_active_trans->getNdbError()); @@ -5680,11 +6230,11 @@ ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p, break; case UNIQUE_ORDERED_INDEX: if (!(multi_range_curr->start_key.length == key_info->key_length && - multi_range_curr->start_key.flag == HA_READ_KEY_EXACT && - !check_null_in_key(key_info, multi_range_curr->start_key.key, - multi_range_curr->start_key.length))) - goto range; - /* fall through */ + multi_range_curr->start_key.flag == HA_READ_KEY_EXACT && + !check_null_in_key(key_info, multi_range_curr->start_key.key, + multi_range_curr->start_key.length))) + goto range; + // else fall through case UNIQUE_INDEX: { multi_range_curr->range_flag |= UNIQUE_RANGE; @@ -5698,8 +6248,7 @@ ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p, ERR_RETURN(op ? op->getNdbError() : m_active_trans->getNdbError()); break; } - case ORDERED_INDEX: - { + case ORDERED_INDEX: { range: multi_range_curr->range_flag &= ~(uint)UNIQUE_RANGE; if (scanOp == 0) @@ -5732,7 +6281,8 @@ ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p, const key_range *keys[2]= { &multi_range_curr->start_key, &multi_range_curr->end_key }; - if ((res= set_bounds(scanOp, keys, multi_range_curr-ranges))) + if ((res= set_bounds(scanOp, active_index, false, keys, + multi_range_curr-ranges))) DBUG_RETURN(res); break; } @@ -5774,7 +6324,7 @@ ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p, } #if 0 -#define DBUG_MULTI_RANGE(x) printf("read_multi_range_next: case %d\n", x); +#define DBUG_MULTI_RANGE(x) DBUG_PRINT("info", ("read_multi_range_next: case %d\n", x)); #else #define DBUG_MULTI_RANGE(x) #endif @@ -5785,6 +6335,7 @@ ha_ndbcluster::read_multi_range_next(KEY_MULTI_RANGE ** multi_range_found_p) DBUG_ENTER("ha_ndbcluster::read_multi_range_next"); if (m_disable_multi_read) { + DBUG_MULTI_RANGE(11); DBUG_RETURN(handler::read_multi_range_next(multi_range_found_p)); } @@ -5794,10 +6345,16 @@ ha_ndbcluster::read_multi_range_next(KEY_MULTI_RANGE ** multi_range_found_p) const NdbOperation* op= m_current_multi_operation; for (;multi_range_curr < m_multi_range_defined; multi_range_curr++) { + DBUG_MULTI_RANGE(12); + if (multi_range_curr->range_flag & SKIP_RANGE) + continue; if (multi_range_curr->range_flag & UNIQUE_RANGE) { if (op->getNdbError().code == 0) + { + DBUG_MULTI_RANGE(13); goto found_next; + } op= m_active_trans->getNextCompletedOperation(op); m_multi_range_result_ptr += reclength; @@ -5814,6 +6371,7 @@ ha_ndbcluster::read_multi_range_next(KEY_MULTI_RANGE ** multi_range_found_p) } else { + DBUG_MULTI_RANGE(14); goto close_scan; } } @@ -5847,6 +6405,7 @@ ha_ndbcluster::read_multi_range_next(KEY_MULTI_RANGE ** multi_range_found_p) DBUG_ASSERT(range_no == -1); if ((res= m_multi_cursor->nextResult(true))) { + DBUG_MULTI_RANGE(15); goto close_scan; } multi_range_curr--; // Will be increased in for-loop @@ -5874,12 +6433,16 @@ close_scan: } else { + DBUG_MULTI_RANGE(9); DBUG_RETURN(ndb_err(m_active_trans)); } } if (multi_range_curr == multi_range_end) + { + DBUG_MULTI_RANGE(16); DBUG_RETURN(HA_ERR_END_OF_FILE); + } /** * Read remaining ranges @@ -5988,6 +6551,7 @@ pthread_handler_t ndb_util_thread_func(void *arg __attribute__((unused))) THD *thd; /* needs to be first for thread_stack */ Ndb* ndb; struct timespec abstime; + List<NDB_SHARE> util_open_tables; my_thread_init(); DBUG_ENTER("ndb_util_thread"); @@ -6008,10 +6572,51 @@ pthread_handler_t ndb_util_thread_func(void *arg __attribute__((unused))) delete ndb; DBUG_RETURN(NULL); } + thd->init_for_queries(); + thd->version=refresh_version; + thd->set_time(); + thd->main_security_ctx.host_or_ip= ""; + thd->client_capabilities = 0; + my_net_init(&thd->net, 0); + thd->main_security_ctx.master_access= ~0; + thd->main_security_ctx.priv_user = 0; + + /* + wait for mysql server to start + */ + pthread_mutex_lock(&LOCK_server_started); + while (!mysqld_server_started) + pthread_cond_wait(&COND_server_started, &LOCK_server_started); + pthread_mutex_unlock(&LOCK_server_started); + + /* + Wait for cluster to start + */ + pthread_mutex_lock(&LOCK_ndb_util_thread); + while (!ndb_cluster_node_id) + { + /* ndb not connected yet */ + set_timespec(abstime, 1); + pthread_cond_timedwait(&COND_ndb_util_thread, + &LOCK_ndb_util_thread, + &abstime); + if (abort_loop) + { + pthread_mutex_unlock(&LOCK_ndb_util_thread); + goto ndb_util_thread_end; + } + } + pthread_mutex_unlock(&LOCK_ndb_util_thread); + + /* + Get all table definitions from the storage node + */ + ndbcluster_find_all_files(thd); + + ndbcluster_util_inited= 1; - List<NDB_SHARE> util_open_tables; set_timespec(abstime, 0); - for (;;) + for (;!abort_loop;) { pthread_mutex_lock(&LOCK_ndb_util_thread); @@ -6019,10 +6624,10 @@ pthread_handler_t ndb_util_thread_func(void *arg __attribute__((unused))) &LOCK_ndb_util_thread, &abstime); pthread_mutex_unlock(&LOCK_ndb_util_thread); - +#ifdef NDB_EXTRA_DEBUG_UTIL_THREAD DBUG_PRINT("ndb_util_thread", ("Started, ndb_cache_check_time: %d", ndb_cache_check_time)); - +#endif if (abort_loop) break; /* Shutting down server */ @@ -6053,20 +6658,12 @@ pthread_handler_t ndb_util_thread_func(void *arg __attribute__((unused))) List_iterator_fast<NDB_SHARE> it(util_open_tables); while ((share= it++)) { - /* Split tab- and dbname */ - char buf[FN_REFLEN]; - char *tabname, *db; - uint length= dirname_length(share->table_name); - tabname= share->table_name+length; - memcpy(buf, share->table_name, length-1); - buf[length-1]= 0; - db= buf+dirname_length(buf); DBUG_PRINT("ndb_util_thread", ("Fetching commit count for: %s", - share->table_name)); + share->key)); /* Contact NDB to get commit count for table */ - ndb->setDatabaseName(db); + ndb->setDatabaseName(share->db); struct Ndb_statistics stat; uint lock; @@ -6074,17 +6671,17 @@ pthread_handler_t ndb_util_thread_func(void *arg __attribute__((unused))) lock= share->commit_count_lock; pthread_mutex_unlock(&share->mutex); - if (ndb_get_table_statistics(ndb, tabname, &stat) == 0) + if (ndb_get_table_statistics(ndb, share->table_name, &stat) == 0) { DBUG_PRINT("ndb_util_thread", ("Table: %s, commit_count: %llu, rows: %llu", - share->table_name, stat.commit_count, stat.row_count)); + share->key, stat.commit_count, stat.row_count)); } else { DBUG_PRINT("ndb_util_thread", ("Error: Could not get commit count for table %s", - share->table_name)); + share->key)); stat.commit_count= 0; } @@ -6094,7 +6691,7 @@ pthread_handler_t ndb_util_thread_func(void *arg __attribute__((unused))) pthread_mutex_unlock(&share->mutex); /* Decrease the use count and possibly free share */ - free_share(share); + free_share(&share); } /* Clear the list of open tables */ @@ -6121,7 +6718,8 @@ pthread_handler_t ndb_util_thread_func(void *arg __attribute__((unused))) abstime.tv_nsec-= 1000000000; } } - +ndb_util_thread_end: + net_end(&thd->net); thd->cleanup(); delete thd; delete ndb; @@ -6513,9 +7111,9 @@ void ndb_serialize_cond(const Item *item, void *arg) } else { - DBUG_PRINT("info", ("Was not expecting field from table %s(%s)", - context->table->s->table_name, - field->table->s->table_name)); + DBUG_PRINT("info", ("Was not expecting field from table %s (%s)", + context->table->s->table_name.str, + field->table->s->table_name.str)); context->supported= FALSE; } break; @@ -7109,6 +7707,8 @@ ha_ndbcluster::build_scan_filter_predicate(Ndb_cond * &cond, : NULL; break; default: + field= NULL; //Keep compiler happy + DBUG_ASSERT(0); break; } switch ((negated) ? @@ -7456,31 +8056,24 @@ ha_ndbcluster::generate_scan_filter(Ndb_cond_stack *ndb_cond_stack, DBUG_RETURN(0); } -int -ndbcluster_show_status(THD* thd) +/* + Implements the SHOW NDB STATUS command. +*/ +bool +ndbcluster_show_status(THD* thd, stat_print_fn *stat_print, + enum ha_stat_type stat_type) { - Protocol *protocol= thd->protocol; - + char buf[IO_SIZE]; DBUG_ENTER("ndbcluster_show_status"); if (have_ndbcluster != SHOW_OPTION_YES) { - my_message(ER_NOT_SUPPORTED_YET, - "Cannot call SHOW NDBCLUSTER STATUS because skip-ndbcluster is " - "defined", - MYF(0)); - DBUG_RETURN(TRUE); + DBUG_RETURN(FALSE); + } + if (stat_type != HA_ENGINE_STATUS) + { + DBUG_RETURN(FALSE); } - - List<Item> field_list; - field_list.push_back(new Item_empty_string("free_list", 255)); - field_list.push_back(new Item_return_int("created", 10,MYSQL_TYPE_LONG)); - field_list.push_back(new Item_return_int("free", 10,MYSQL_TYPE_LONG)); - field_list.push_back(new Item_return_int("sizeof", 10,MYSQL_TYPE_LONG)); - - if (protocol->send_fields(&field_list, - Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF)) - DBUG_RETURN(TRUE); if (get_thd_ndb(thd) && get_thd_ndb(thd)->ndb) { @@ -7489,14 +8082,13 @@ ndbcluster_show_status(THD* thd) tmp.m_name= 0; while (ndb->get_free_list_usage(&tmp)) { - protocol->prepare_for_resend(); - - protocol->store(tmp.m_name, &my_charset_bin); - protocol->store((uint)tmp.m_created); - protocol->store((uint)tmp.m_free); - protocol->store((uint)tmp.m_sizeof); - if (protocol->write()) - DBUG_RETURN(TRUE); + uint buflen= + my_snprintf(buf, sizeof(buf), + "created=%u, free=%u, sizeof=%u", + tmp.m_created, tmp.m_free, tmp.m_sizeof); + if (stat_print(thd, ndbcluster_hton.name, strlen(ndbcluster_hton.name), + tmp.m_name, strlen(tmp.m_name), buf, buflen)) + DBUG_RETURN(TRUE); } } send_eof(thd); @@ -7504,4 +8096,427 @@ ndbcluster_show_status(THD* thd) DBUG_RETURN(FALSE); } -#endif /* HAVE_NDBCLUSTER_DB */ +/* + Create a table in NDB Cluster + */ +static uint get_no_fragments(ulonglong max_rows) +{ +#if MYSQL_VERSION_ID >= 50000 + uint acc_row_size= 25 + /*safety margin*/ 2; +#else + uint acc_row_size= pk_length*4; + /* add acc overhead */ + if (pk_length <= 8) /* main page will set the limit */ + acc_row_size+= 25 + /*safety margin*/ 2; + else /* overflow page will set the limit */ + acc_row_size+= 4 + /*safety margin*/ 4; +#endif + ulonglong acc_fragment_size= 512*1024*1024; +#if MYSQL_VERSION_ID >= 50100 + return (max_rows*acc_row_size)/acc_fragment_size+1; +#else + return ((max_rows*acc_row_size)/acc_fragment_size+1 + +1/*correct rounding*/)/2; +#endif +} + + +/* + Routine to adjust default number of partitions to always be a multiple + of number of nodes and never more than 4 times the number of nodes. + +*/ +static bool adjusted_frag_count(uint no_fragments, uint no_nodes, + uint &reported_frags) +{ + uint i= 0; + reported_frags= no_nodes; + while (reported_frags < no_fragments && ++i < 4 && + (reported_frags + no_nodes) < MAX_PARTITIONS) + reported_frags+= no_nodes; + return (reported_frags < no_fragments); +} + +int ha_ndbcluster::get_default_no_partitions(ulonglong max_rows) +{ + uint reported_frags; + uint no_fragments= get_no_fragments(max_rows); + uint no_nodes= g_ndb_cluster_connection->no_db_nodes(); + adjusted_frag_count(no_fragments, no_nodes, reported_frags); + return (int)reported_frags; +} + + +/* + User defined partitioning set-up. We need to check how many fragments the + user wants defined and which node groups to put those into. Later we also + want to attach those partitions to a tablespace. + + All the functionality of the partition function, partition limits and so + forth are entirely handled by the MySQL Server. There is one exception to + this rule for PARTITION BY KEY where NDB handles the hash function and + this type can thus be handled transparently also by NDB API program. + For RANGE, HASH and LIST and subpartitioning the NDB API programs must + implement the function to map to a partition. +*/ + +uint ha_ndbcluster::set_up_partition_info(partition_info *part_info, + TABLE *table, + void *tab_par) +{ + DBUG_ENTER("ha_ndbcluster::set_up_partition_info"); + ushort node_group[MAX_PARTITIONS]; + ulong ng_index= 0, i, j; + NDBTAB *tab= (NDBTAB*)tab_par; + NDBTAB::FragmentType ftype= NDBTAB::UserDefined; + partition_element *part_elem; + + if (part_info->part_type == HASH_PARTITION && + part_info->list_of_part_fields == TRUE) + { + Field **fields= part_info->part_field_array; + + if (part_info->linear_hash_ind) + ftype= NDBTAB::DistrKeyLin; + else + ftype= NDBTAB::DistrKeyHash; + + for (i= 0; i < part_info->part_field_list.elements; i++) + { + NDBCOL *col= tab->getColumn(fields[i]->fieldnr - 1); + DBUG_PRINT("info",("setting dist key on %s", col->getName())); + col->setPartitionKey(TRUE); + } + } + List_iterator<partition_element> part_it(part_info->partitions); + for (i= 0; i < part_info->no_parts; i++) + { + part_elem= part_it++; + if (!is_sub_partitioned(part_info)) + { + node_group[ng_index++]= part_elem->nodegroup_id; + //Here we should insert tablespace id based on tablespace name + } + else + { + List_iterator<partition_element> sub_it(part_elem->subpartitions); + for (j= 0; j < part_info->no_subparts; j++) + { + part_elem= sub_it++; + node_group[ng_index++]= part_elem->nodegroup_id; + //Here we should insert tablespace id based on tablespace name + } + } + } + { + uint no_nodes= g_ndb_cluster_connection->no_db_nodes(); + if (ng_index > 4 * no_nodes) + { + DBUG_RETURN(1300); + } + } + tab->setNodeGroupIds(&node_group, ng_index); + tab->setFragmentType(ftype); + DBUG_RETURN(0); +} + + +/* + This routine is used to set-up fragmentation when the user has only specified + ENGINE = NDB and no user defined partitioning what so ever. Thus all values + will be based on default values. We will choose Linear Hash or Hash with + perfect spread dependent on a session variable defined in MySQL. +*/ + +static void ndb_set_fragmentation(NDBTAB &tab, TABLE *form, uint pk_length) +{ + NDBTAB::FragmentType ftype= NDBTAB::DistrKeyHash; + ushort node_group[MAX_PARTITIONS]; + uint no_nodes= g_ndb_cluster_connection->no_db_nodes(), no_fragments, i; + DBUG_ENTER("ndb_set_fragmentation"); + + if (form->s->max_rows == (ha_rows) 0) + { + no_fragments= no_nodes; + } + else + { + /* + Ensure that we get enough fragments to handle all rows and ensure that + the table is fully distributed by keeping the number of fragments a + multiple of the number of nodes. + */ + uint fragments= get_no_fragments(form->s->max_rows); + if (adjusted_frag_count(fragments, no_nodes, no_fragments)) + { + push_warning(current_thd, + MYSQL_ERROR::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR, + "Ndb might have problems storing the max amount of rows specified"); + } + } + /* + Always start with node group 0 and continue with next node group from + there + */ + node_group[0]= 0; + for (i= 1; i < no_fragments; i++) + node_group[i]= UNDEF_NODEGROUP; + switch (opt_ndb_distribution_id) + { + case ND_KEYHASH: + ftype= NDBTAB::DistrKeyHash; + break; + case ND_LINHASH: + ftype= NDBTAB::DistrKeyLin; + break; + } + tab.setFragmentType(ftype); + tab.setNodeGroupIds(&node_group, no_fragments); + DBUG_VOID_RETURN; +} + +bool ha_ndbcluster::check_if_incompatible_data(HA_CREATE_INFO *info, + uint table_changes) +{ + /* + TODO: Remove the dummy return below, when cluster gets + signal from alter table when only .frm is changed. Cluster + needs it to manage the copies. + */ + return COMPATIBLE_DATA_NO; + + if (table_changes != IS_EQUAL_YES) + return COMPATIBLE_DATA_NO; + + /* Check that auto_increment value was not changed */ + if ((info->used_fields & HA_CREATE_USED_AUTO) && + info->auto_increment_value != 0) + return COMPATIBLE_DATA_NO; + + /* Check that row format didn't change */ + if ((info->used_fields & HA_CREATE_USED_AUTO) && + get_row_type() != info->row_type) + return COMPATIBLE_DATA_NO; + + return COMPATIBLE_DATA_YES; +} + +#ifdef NDB_DISKDATA +bool set_up_tablespace(st_alter_tablespace *info, + NdbDictionary::Tablespace *ndb_ts) +{ + ndb_ts->setName(info->tablespace_name); + ndb_ts->setExtentSize(info->extent_size); + ndb_ts->setDefaultLogfileGroup(info->logfile_group_name); + return false; +} + +bool set_up_datafile(st_alter_tablespace *info, + NdbDictionary::Datafile *ndb_df) +{ + if (info->max_size > 0) + { + my_error(ER_TABLESPACE_AUTO_EXTEND_ERROR, MYF(0)); + return true; + } + ndb_df->setPath(info->data_file_name); + ndb_df->setSize(info->initial_size); + ndb_df->setTablespace(info->tablespace_name); + return false; +} + +bool set_up_logfile_group(st_alter_tablespace *info, + NdbDictionary::LogfileGroup *ndb_lg) +{ + ndb_lg->setName(info->logfile_group_name); + ndb_lg->setUndoBufferSize(info->undo_buffer_size); + return false; +} + +bool set_up_undofile(st_alter_tablespace *info, + NdbDictionary::Undofile *ndb_uf) +{ + ndb_uf->setPath(info->undo_file_name); + ndb_uf->setSize(info->initial_size); + ndb_uf->setLogfileGroup(info->logfile_group_name); + return false; +} + +int ha_ndbcluster::alter_tablespace(st_alter_tablespace *info) +{ + Ndb *ndb; + NDBDICT *dict; + int error; + DBUG_ENTER("ha_ndbcluster::alter_tablespace"); + if (check_ndb_connection()) + { + DBUG_RETURN(my_errno= HA_ERR_NO_CONNECTION); + } + ndb= get_ndb(); + dict= ndb->getDictionary(); + switch (info->ts_cmd_type){ + case (CREATE_TABLESPACE): + { + NdbDictionary::Tablespace ndb_ts; + NdbDictionary::Datafile ndb_df; + if (set_up_tablespace(info, &ndb_ts)) + { + DBUG_RETURN(1); + } + if (set_up_datafile(info, &ndb_df)) + { + DBUG_RETURN(1); + } + if (error= dict->createTablespace(ndb_ts)) + { + DBUG_PRINT("error", ("createTablespace returned %d", error)); + my_error(ER_CREATE_TABLESPACE_FAILED, MYF(0), "TABLESPACE"); + DBUG_RETURN(1); + } + DBUG_PRINT("info", ("Successfully created Tablespace")); + if (error= dict->createDatafile(ndb_df)) + { + DBUG_PRINT("error", ("createDatafile returned %d", error)); + my_error(ER_CREATE_TABLESPACE_FAILED, MYF(0), "DATAFILE"); + DBUG_RETURN(1); + } + break; + } + case (ALTER_TABLESPACE): + { + if (info->ts_alter_tablespace_type == ALTER_TABLESPACE_ADD_FILE) + { + NdbDictionary::Datafile ndb_df; + if (set_up_datafile(info, &ndb_df)) + { + DBUG_RETURN(1); + } + if (error= dict->createDatafile(ndb_df)) + { + DBUG_PRINT("error", ("createDatafile returned %d", error)); + my_error(ER_ALTER_TABLESPACE_FAILED, MYF(0), "CREATE DATAFILE"); + DBUG_RETURN(1); + } + } + else if(info->ts_alter_tablespace_type == ALTER_TABLESPACE_DROP_FILE) + { + NdbDictionary::Datafile df = dict->getDatafile(0, + info->data_file_name); + if (strcmp(df.getPath(), info->data_file_name) == 0) + { + if (error= dict->dropDatafile(df)) + { + DBUG_PRINT("error", ("createDatafile returned %d", error)); + my_error(ER_ALTER_TABLESPACE_FAILED, MYF(0), " DROP DATAFILE"); + DBUG_RETURN(1); + } + } + else + { + DBUG_PRINT("error", ("No such datafile")); + my_error(ER_ALTER_TABLESPACE_FAILED, MYF(0), " NO SUCH FILE"); + DBUG_RETURN(1); + } + } + else + { + DBUG_PRINT("error", ("Unsupported alter tablespace: %d", + info->ts_alter_tablespace_type)); + DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED); + } + break; + } + case (CREATE_LOGFILE_GROUP): + { + NdbDictionary::LogfileGroup ndb_lg; + NdbDictionary::Undofile ndb_uf; + if (info->undo_file_name == NULL) + { + /* + REDO files in LOGFILE GROUP not supported yet + */ + DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED); + } + if (set_up_logfile_group(info, &ndb_lg)) + { + DBUG_RETURN(1); + } + if (error= dict->createLogfileGroup(ndb_lg)) + { + DBUG_PRINT("error", ("createLogfileGroup returned %d", error)); + my_error(ER_CREATE_TABLESPACE_FAILED, MYF(0), "LOGFILE GROUP"); + DBUG_RETURN(1); + } + DBUG_PRINT("info", ("Successfully created Logfile Group")); + if (set_up_undofile(info, &ndb_uf)) + { + DBUG_RETURN(1); + } + if (error= dict->createUndofile(ndb_uf)) + { + DBUG_PRINT("error", ("createUndofile returned %d", error)); + my_error(ER_CREATE_TABLESPACE_FAILED, MYF(0), "UNDOFILE"); + DBUG_RETURN(1); + } + break; + } + case (ALTER_LOGFILE_GROUP): + { + if (info->undo_file_name == NULL) + { + /* + REDO files in LOGFILE GROUP not supported yet + */ + DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED); + } + NdbDictionary::Undofile ndb_uf; + if (set_up_undofile(info, &ndb_uf)) + { + DBUG_RETURN(1); + } + if (error= dict->createUndofile(ndb_uf)) + { + DBUG_PRINT("error", ("createUndofile returned %d", error)); + my_error(ER_ALTER_TABLESPACE_FAILED, MYF(0), "CREATE UNDOFILE"); + DBUG_RETURN(1); + } + break; + } + case (DROP_TABLESPACE): + { + if (error= dict->dropTablespace( + dict->getTablespace(info->tablespace_name))) + { + DBUG_PRINT("error", ("dropTablespace returned %d", error)); + my_error(ER_DROP_TABLESPACE_FAILED, MYF(0), "TABLESPACE"); + DBUG_RETURN(1); + } + break; + } + case (DROP_LOGFILE_GROUP): + { + if (error= dict->dropLogfileGroup(dict->getLogfileGroup(info->logfile_group_name))) + { + DBUG_PRINT("error", ("dropLogfileGroup returned %d", error)); + my_error(ER_DROP_TABLESPACE_FAILED, MYF(0), "LOGFILE GROUP"); + DBUG_RETURN(1); + } + break; + } + case (CHANGE_FILE_TABLESPACE): + { + DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED); + } + case (ALTER_ACCESS_MODE_TABLESPACE): + { + DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED); + } + default: + { + DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED); + } + } + DBUG_RETURN(FALSE); +} + +#endif /* NDB_DISKDATA */ diff --git a/sql/ha_ndbcluster.h b/sql/ha_ndbcluster.h index 466d7b7044b..fb506375797 100644 --- a/sql/ha_ndbcluster.h +++ b/sql/ha_ndbcluster.h @@ -25,6 +25,7 @@ #pragma interface /* gcc class implementation */ #endif +#include <NdbApi.hpp> #include <ndbapi_limits.h> class Ndb; // Forward declaration @@ -35,10 +36,14 @@ class NdbScanOperation; class NdbScanFilter; class NdbIndexScanOperation; class NdbBlob; +class NdbIndexStat; +class NdbEventOperation; // connectstring to cluster if given by mysqld extern const char *ndbcluster_connectstring; extern ulong ndb_cache_check_time; +extern ulong ndb_report_thresh_binlog_epoch_slip; +extern ulong ndb_report_thresh_binlog_mem_usage; typedef enum ndb_index_type { UNDEFINED_INDEX = 0, @@ -54,15 +59,33 @@ typedef struct ndb_index_data { void *index; void *unique_index; unsigned char *unique_index_attrid_map; + // In this version stats are not shared between threads + NdbIndexStat* index_stat; + uint index_stat_cache_entries; + // Simple counter mechanism to decide when to connect to db + uint index_stat_update_freq; + uint index_stat_query_count; } NDB_INDEX_DATA; +typedef union { const NdbRecAttr *rec; NdbBlob *blob; void *ptr; } NdbValue; + +typedef enum { + NSS_INITIAL= 0, + NSS_DROPPED +} NDB_SHARE_STATE; + typedef struct st_ndbcluster_share { + MEM_ROOT mem_root; THR_LOCK lock; pthread_mutex_t mutex; - char *table_name; - uint table_name_length,use_count; + char *key; + uint key_length; + THD *util_lock; + uint use_count; uint commit_count_lock; ulonglong commit_count; + char *db; + char *table_name; } NDB_SHARE; typedef enum ndb_item_type { @@ -113,6 +136,7 @@ struct negated_function_mapping NDB_FUNC_TYPE neg_fun; }; + /* Define what functions can be negated in condition pushdown. Note, these HAVE to be in the same order as in definition enum @@ -454,7 +478,7 @@ class Thd_ndb class ha_ndbcluster: public handler { public: - ha_ndbcluster(TABLE *table); + ha_ndbcluster(TABLE_SHARE *table); ~ha_ndbcluster(); int open(const char *name, int mode, uint test_if_locked); @@ -463,7 +487,7 @@ class ha_ndbcluster: public handler int write_row(byte *buf); int update_row(const byte *old_data, byte *new_data); int delete_row(const byte *buf); - int index_init(uint index); + int index_init(uint index, bool sorted); int index_end(); int index_read(byte *buf, const byte *key, uint key_len, enum ha_rkey_function find_flag); @@ -505,6 +529,12 @@ class ha_ndbcluster: public handler const char * table_type() const; const char ** bas_ext() const; ulong table_flags(void) const; + ulong partition_flags(void) const + { + return (HA_CAN_PARTITION | HA_CAN_UPDATE_PARTITION_KEY | + HA_CAN_PARTITION_UNIQUE); + } + void set_part_info(partition_info *part_info); ulong index_flags(uint idx, uint part, bool all_parts) const; uint max_supported_record_length() const; uint max_supported_keys() const; @@ -514,6 +544,7 @@ class ha_ndbcluster: public handler int rename_table(const char *from, const char *to); int delete_table(const char *name); int create(const char *name, TABLE *form, HA_CREATE_INFO *info); + int get_default_no_partitions(ulonglong max_rows); THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to, enum thr_lock_type lock_type); @@ -577,9 +608,20 @@ static void set_tabname(const char *pathname, char *tabname); uint key_length, qc_engine_callback *engine_callback, ulonglong *engine_data); + + bool check_if_incompatible_data(HA_CREATE_INFO *info, + uint table_changes); + static void invalidate_dictionary_cache(TABLE *table, Ndb *ndb, + const char *tabname, bool global); + private: + friend int ndbcluster_drop_database_impl(const char *path); int alter_table_name(const char *to); - int drop_table(); + static int delete_table(ha_ndbcluster *h, Ndb *ndb, + const char *path, + const char *db, + const char *table_name); + int intern_drop_table(); int create_index(const char *name, KEY *key_info, bool unique); int create_ordered_index(const char *name, KEY *key_info); int create_unique_index(const char *name, KEY *key_info); @@ -592,15 +634,21 @@ private: NDB_INDEX_TYPE get_index_type_from_table(uint index_no) const; int check_index_fields_not_null(uint index_no); - int pk_read(const byte *key, uint key_len, byte *buf); - int complemented_pk_read(const byte *old_data, byte *new_data); - int peek_row(const byte *record); - int unique_index_read(const byte *key, uint key_len, - byte *buf); + uint set_up_partition_info(partition_info *part_info, + TABLE *table, + void *tab); + int complemented_pk_read(const byte *old_data, byte *new_data, + uint32 old_part_id); + int pk_read(const byte *key, uint key_len, byte *buf, uint32 part_id); int ordered_index_scan(const key_range *start_key, const key_range *end_key, - bool sorted, bool descending, byte* buf); + bool sorted, bool descending, byte* buf, + part_id_range *part_spec); int full_table_scan(byte * buf); + + int peek_row(const byte *record); + int unique_index_read(const byte *key, uint key_len, + byte *buf); int fetch_next(NdbScanOperation* op); int next_result(byte *buf); int define_read_attrs(byte* buf, NdbOperation* op); @@ -618,13 +666,15 @@ private: uint fieldnr, const byte* field_ptr); int set_ndb_key(NdbOperation*, Field *field, uint fieldnr, const byte* field_ptr); - int set_ndb_value(NdbOperation*, Field *field, uint fieldnr, bool *set_blob_value= 0); + int set_ndb_value(NdbOperation*, Field *field, uint fieldnr, + int row_offset= 0, bool *set_blob_value= 0); int get_ndb_value(NdbOperation*, Field *field, uint fieldnr, byte*); friend int g_get_ndb_blobs_value(NdbBlob *ndb_blob, void *arg); int get_ndb_blobs_value(NdbBlob *last_ndb_blob); int set_primary_key(NdbOperation *op, const byte *key); int set_primary_key_from_record(NdbOperation *op, const byte *record); - int set_bounds(NdbIndexScanOperation*, const key_range *keys[2], uint= 0); + int set_bounds(NdbIndexScanOperation*, uint inx, bool rir, + const key_range *keys[2], uint= 0); int key_cmp(uint keynr, const byte * old_row, const byte * new_row); int set_index_key(NdbOperation *, const KEY *key_info, const byte *key_ptr); void print_results(); @@ -632,7 +682,7 @@ private: ulonglong get_auto_increment(); void invalidate_dictionary_cache(bool global); int ndb_err(NdbTransaction*); - bool uses_blob_value(bool all_fields); + bool uses_blob_value(); char *update_table_comment(const char * comment); @@ -662,6 +712,7 @@ private: NdbScanOperation* op); friend int execute_commit(ha_ndbcluster*, NdbTransaction*); + friend int execute_no_commit_ignore_no_key(ha_ndbcluster*, NdbTransaction*); friend int execute_no_commit(ha_ndbcluster*, NdbTransaction*); friend int execute_no_commit_ie(ha_ndbcluster*, NdbTransaction*); @@ -678,13 +729,17 @@ private: NDB_SHARE *m_share; NDB_INDEX_DATA m_index[MAX_KEY]; // NdbRecAttr has no reference to blob - typedef union { const NdbRecAttr *rec; NdbBlob *blob; void *ptr; } NdbValue; NdbValue m_value[NDB_MAX_ATTRIBUTES_IN_TABLE]; + partition_info *m_part_info; + byte *m_rec0; + Field **m_part_field_array; + bool m_use_partition_function; + bool m_sorted; bool m_use_write; bool m_ignore_dup_key; bool m_primary_key_update; - bool m_retrieve_all_fields; - bool m_retrieve_primary_key; + bool m_write_op; + bool m_ignore_no_key; ha_rows m_rows_to_insert; ha_rows m_rows_inserted; ha_rows m_bulk_insert_rows; @@ -717,7 +772,7 @@ private: extern struct show_var_st ndb_status_variables[]; bool ndbcluster_init(void); -bool ndbcluster_end(void); +int ndbcluster_end(ha_panic_function flag); int ndbcluster_discover(THD* thd, const char* dbname, const char* name, const void** frmblob, uint* frmlen); @@ -725,8 +780,9 @@ int ndbcluster_find_files(THD *thd,const char *db,const char *path, const char *wild, bool dir, List<char> *files); int ndbcluster_table_exists_in_engine(THD* thd, const char *db, const char *name); -int ndbcluster_drop_database(const char* path); +void ndbcluster_drop_database(char* path); void ndbcluster_print_error(int error, const NdbOperation *error_op); -int ndbcluster_show_status(THD*); +bool ndbcluster_show_status(THD*,stat_print_fn *,enum ha_stat_type); + diff --git a/sql/ha_partition.cc b/sql/ha_partition.cc new file mode 100644 index 00000000000..4784a0c7530 --- /dev/null +++ b/sql/ha_partition.cc @@ -0,0 +1,3281 @@ +/* Copyright (C) 2005 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + This handler was developed by Mikael Ronstrom for version 5.1 of MySQL. + It is an abstraction layer on top of other handlers such as MyISAM, + InnoDB, Federated, Berkeley DB and so forth. Partitioned tables can also + be handled by a storage engine. The current example of this is NDB + Cluster that has internally handled partitioning. This have benefits in + that many loops needed in the partition handler can be avoided. + + Partitioning has an inherent feature which in some cases is positive and + in some cases is negative. It splits the data into chunks. This makes + the data more manageable, queries can easily be parallelised towards the + parts and indexes are split such that there are less levels in the + index trees. The inherent disadvantage is that to use a split index + one has to scan all index parts which is ok for large queries but for + small queries it can be a disadvantage. + + Partitioning lays the foundation for more manageable databases that are + extremely large. It does also lay the foundation for more parallelism + in the execution of queries. This functionality will grow with later + versions of MySQL. + + You can enable it in your buld by doing the following during your build + process: + ./configure --with-partition + + The partition is setup to use table locks. It implements an partition "SHARE" + that is inserted into a hash by table name. You can use this to store + information of state that any partition handler object will be able to see + if it is using the same table. + + Please read the object definition in ha_partition.h before reading the rest + if this file. +*/ + +#ifdef __GNUC__ +#pragma implementation // gcc: Class implementation +#endif + +#include "mysql_priv.h" + +#include "ha_partition.h" + +static const char *ha_par_ext= ".par"; +#ifdef NOT_USED +static int free_share(PARTITION_SHARE * share); +static PARTITION_SHARE *get_share(const char *table_name, TABLE * table); +#endif + +/**************************************************************************** + MODULE create/delete handler object +****************************************************************************/ + +static handler *partition_create_handler(TABLE_SHARE *share); + +handlerton partition_hton = { + MYSQL_HANDLERTON_INTERFACE_VERSION, + "partition", + SHOW_OPTION_YES, + "Partition Storage Engine Helper", /* A comment used by SHOW to describe an engine */ + DB_TYPE_PARTITION_DB, + 0, /* Method that initializes a storage engine */ + 0, /* slot */ + 0, /* savepoint size */ + NULL /*ndbcluster_close_connection*/, + NULL, /* savepoint_set */ + NULL, /* savepoint_rollback */ + NULL, /* savepoint_release */ + NULL /*ndbcluster_commit*/, + NULL /*ndbcluster_rollback*/, + NULL, /* prepare */ + NULL, /* recover */ + NULL, /* commit_by_xid */ + NULL, /* rollback_by_xid */ + NULL, + NULL, + NULL, + partition_create_handler, /* Create a new handler */ + NULL, /* Drop a database */ + NULL, /* Panic call */ + NULL, /* Start Consistent Snapshot */ + NULL, /* Flush logs */ + NULL, /* Show status */ + HTON_NOT_USER_SELECTABLE | HTON_HIDDEN +}; + +static handler *partition_create_handler(TABLE_SHARE *share) +{ + return new ha_partition(share); +} + + +ha_partition::ha_partition(TABLE_SHARE *share) + :handler(&partition_hton, share), m_part_info(NULL), m_create_handler(FALSE), + m_is_sub_partitioned(0) +{ + DBUG_ENTER("ha_partition::ha_partition(table)"); + init_handler_variables(); + DBUG_VOID_RETURN; +} + + +ha_partition::ha_partition(partition_info *part_info) + :handler(&partition_hton, NULL), m_part_info(part_info), + m_create_handler(TRUE), + m_is_sub_partitioned(is_sub_partitioned(m_part_info)) + +{ + DBUG_ENTER("ha_partition::ha_partition(part_info)"); + init_handler_variables(); + DBUG_ASSERT(m_part_info); + DBUG_VOID_RETURN; +} + + +void ha_partition::init_handler_variables() +{ + active_index= MAX_KEY; + m_file_buffer= NULL; + m_name_buffer_ptr= NULL; + m_engine_array= NULL; + m_file= NULL; + m_tot_parts= 0; + m_has_transactions= 0; + m_pkey_is_clustered= 0; + m_lock_type= F_UNLCK; + m_part_spec.start_part= NO_CURRENT_PART_ID; + m_scan_value= 2; + m_ref_length= 0; + m_part_spec.end_part= NO_CURRENT_PART_ID; + m_index_scan_type= partition_no_index_scan; + m_start_key.key= NULL; + m_start_key.length= 0; + m_myisam= FALSE; + m_innodb= FALSE; + m_extra_cache= FALSE; + m_extra_cache_size= 0; + m_table_flags= HA_FILE_BASED | HA_REC_NOT_IN_SEQ; + m_low_byte_first= 1; + m_part_field_array= NULL; + m_ordered_rec_buffer= NULL; + m_top_entry= NO_CURRENT_PART_ID; + m_rec_length= 0; + m_last_part= 0; + m_rec0= 0; + m_curr_key_info= 0; + /* + this allows blackhole to work properly + */ + m_no_locks= 0; + +#ifdef DONT_HAVE_TO_BE_INITALIZED + m_start_key.flag= 0; + m_ordered= TRUE; +#endif +} + + +ha_partition::~ha_partition() +{ + DBUG_ENTER("ha_partition::~ha_partition()"); + if (m_file != NULL) + { + uint i; + for (i= 0; i < m_tot_parts; i++) + delete m_file[i]; + } + my_free((char*) m_ordered_rec_buffer, MYF(MY_ALLOW_ZERO_PTR)); + + clear_handler_file(); + DBUG_VOID_RETURN; +} + + +/* + The partition handler is only a layer on top of other engines. Thus it + can't really perform anything without the underlying handlers. Thus we + add this method as part of the allocation of a handler object. + + 1) Allocation of underlying handlers + If we have access to the partition info we will allocate one handler + instance for each partition. + 2) Allocation without partition info + The cases where we don't have access to this information is when called + in preparation for delete_table and rename_table and in that case we + only need to set HA_FILE_BASED. In that case we will use the .par file + that contains information about the partitions and their engines and + the names of each partition. + 3) Table flags initialisation + We need also to set table flags for the partition handler. This is not + static since it depends on what storage engines are used as underlying + handlers. + The table flags is set in this routine to simulate the behaviour of a + normal storage engine + The flag HA_FILE_BASED will be set independent of the underlying handlers + 4) Index flags initialisation + When knowledge exists on the indexes it is also possible to initialise the + index flags. Again the index flags must be initialised by using the under- + lying handlers since this is storage engine dependent. + The flag HA_READ_ORDER will be reset for the time being to indicate no + ordered output is available from partition handler indexes. Later a merge + sort will be performed using the underlying handlers. + 5) primary_key_is_clustered, has_transactions and low_byte_first is + calculated here. +*/ + +int ha_partition::ha_initialise() +{ + handler **file_array, *file; + DBUG_ENTER("ha_partition::ha_initialise"); + + if (m_create_handler) + { + m_tot_parts= get_tot_partitions(m_part_info); + DBUG_ASSERT(m_tot_parts > 0); + if (new_handlers_from_part_info()) + DBUG_RETURN(1); + } + else if (!table_share || !table_share->normalized_path.str) + { + /* + Called with dummy table share (delete, rename and alter table) + Don't need to set-up table flags other than + HA_FILE_BASED here + */ + m_table_flags|= HA_FILE_BASED | HA_REC_NOT_IN_SEQ; + DBUG_RETURN(0); + } + else if (get_from_handler_file(table_share->normalized_path.str)) + { + my_error(ER_OUTOFMEMORY, MYF(0), 129); //Temporary fix TODO print_error + DBUG_RETURN(1); + } + /* + We create all underlying table handlers here. We do it in this special + method to be able to report allocation errors. + + Set up table_flags, low_byte_first, primary_key_is_clustered and + has_transactions since they are called often in all kinds of places, + other parameters are calculated on demand. + HA_FILE_BASED is always set for partition handler since we use a + special file for handling names of partitions, engine types. + HA_CAN_GEOMETRY, HA_CAN_FULLTEXT, HA_CAN_SQL_HANDLER, + HA_CAN_INSERT_DELAYED is disabled until further investigated. + */ + m_table_flags= m_file[0]->table_flags(); + m_low_byte_first= m_file[0]->low_byte_first(); + m_has_transactions= TRUE; + m_pkey_is_clustered= TRUE; + file_array= m_file; + do + { + file= *file_array; + if (m_low_byte_first != file->low_byte_first()) + { + // Cannot have handlers with different endian + my_error(ER_MIX_HANDLER_ERROR, MYF(0)); + DBUG_RETURN(1); + } + if (!file->has_transactions()) + m_has_transactions= FALSE; + if (!file->primary_key_is_clustered()) + m_pkey_is_clustered= FALSE; + m_table_flags&= file->table_flags(); + } while (*(++file_array)); + m_table_flags&= ~(HA_CAN_GEOMETRY & HA_CAN_FULLTEXT & + HA_CAN_SQL_HANDLER & HA_CAN_INSERT_DELAYED); + m_table_flags|= HA_FILE_BASED | HA_REC_NOT_IN_SEQ; + DBUG_RETURN(0); +} + +/**************************************************************************** + MODULE meta data changes +****************************************************************************/ +/* + This method is used to calculate the partition name, service routine to + the del_ren_cre_table method. +*/ + +static void create_partition_name(char *out, const char *in1, const char *in2) +{ + strxmov(out, in1, "_", in2, NullS); +} + +/* + This method is used to calculate the partition name, service routine to + the del_ren_cre_table method. +*/ + +static void create_subpartition_name(char *out, const char *in1, + const char *in2, const char *in3) +{ + strxmov(out, in1, "_", in2, "_", in3, NullS); +} + + +/* + Used to delete a table. By the time delete_table() has been called all + opened references to this table will have been closed (and your globally + shared references released. The variable name will just be the name of + the table. You will need to remove any files you have created at this + point. + + If you do not implement this, the default delete_table() is called from + handler.cc and it will delete all files with the file extentions returned + by bas_ext(). + + Called from handler.cc by delete_table and ha_create_table(). Only used + during create if the table_flag HA_DROP_BEFORE_CREATE was specified for + the storage engine. +*/ + +int ha_partition::delete_table(const char *name) +{ + int error; + DBUG_ENTER("ha_partition::delete_table"); + if ((error= del_ren_cre_table(name, NULL, NULL, NULL))) + DBUG_RETURN(error); + DBUG_RETURN(handler::delete_table(name)); +} + + +/* + Renames a table from one name to another from alter table call. + + If you do not implement this, the default rename_table() is called from + handler.cc and it will delete all files with the file extentions returned + by bas_ext(). + + Called from sql_table.cc by mysql_rename_table(). +*/ + +int ha_partition::rename_table(const char *from, const char *to) +{ + int error; + DBUG_ENTER("ha_partition::rename_table"); + if ((error= del_ren_cre_table(from, to, NULL, NULL))) + DBUG_RETURN(error); + DBUG_RETURN(handler::rename_table(from, to)); +} + + +/* + create_handler_files is called to create any handler specific files + before opening the file with openfrm to later call ::create on the + file object. + In the partition handler this is used to store the names of partitions + and types of engines in the partitions. +*/ + +int ha_partition::create_handler_files(const char *name) +{ + DBUG_ENTER("ha_partition::create_handler_files()"); + + /* + We need to update total number of parts since we might write the handler + file as part of a partition management command + */ + m_tot_parts= get_tot_partitions(m_part_info); + if (create_handler_file(name)) + { + my_error(ER_CANT_CREATE_HANDLER_FILE, MYF(0)); + DBUG_RETURN(1); + } + DBUG_RETURN(0); +} + + +/* + create() is called to create a table. The variable name will have the name + of the table. When create() is called you do not need to worry about + opening the table. Also, the FRM file will have already been created so + adjusting create_info will not do you any good. You can overwrite the frm + file at this point if you wish to change the table definition, but there + are no methods currently provided for doing that. + + Called from handle.cc by ha_create_table(). +*/ + +int ha_partition::create(const char *name, TABLE *table_arg, + HA_CREATE_INFO *create_info) +{ + char t_name[FN_REFLEN]; + DBUG_ENTER("ha_partition::create"); + + strmov(t_name, name); +// *fn_ext(t_name)= 0; + DBUG_ASSERT(*fn_rext((char*)name) == '\0'); + if (del_ren_cre_table(t_name, NULL, table_arg, create_info)) + { + handler::delete_table(t_name); + DBUG_RETURN(1); + } + DBUG_RETURN(0); +} + +int ha_partition::drop_partitions(const char *path) +{ + List_iterator<partition_element> part_it(m_part_info->partitions); + char part_name_buff[FN_REFLEN]; + uint no_parts= m_part_info->no_parts; + uint no_subparts= m_part_info->no_subparts, i= 0; + int error= 1; + DBUG_ENTER("ha_partition::drop_partitions()"); + + do + { + partition_element *part_elem= part_it++; + if (part_elem->part_state == PART_IS_DROPPED) + { + /* + This part is to be dropped, meaning the part or all its subparts. + */ + if (is_sub_partitioned(m_part_info)) + { + List_iterator<partition_element> sub_it(part_elem->subpartitions); + uint j= 0, part; + do + { + partition_element *sub_elem= sub_it++; + create_subpartition_name(part_name_buff, path, + part_elem->partition_name, + sub_elem->partition_name); + part= i * no_subparts + j; + DBUG_PRINT("info", ("Drop subpartition %s", part_name_buff)); + error= m_file[part]->delete_table((const char *) part_name_buff); + } while (++j < no_subparts); + } + else + { + create_partition_name(part_name_buff, path, + part_elem->partition_name); + DBUG_PRINT("info", ("Drop partition %s", part_name_buff)); + error= m_file[i]->delete_table((const char *) part_name_buff); + } + } + } while (++i < no_parts); + DBUG_RETURN(error); +} + +void ha_partition::update_create_info(HA_CREATE_INFO *create_info) +{ + return; +} + + +char *ha_partition::update_table_comment(const char *comment) +{ + return (char*) comment; // Nothing to change +} + + + +/* + Common routine to handle delete_table and rename_table. + The routine uses the partition handler file to get the + names of the partition instances. Both these routines + are called after creating the handler without table + object and thus the file is needed to discover the + names of the partitions and the underlying storage engines. +*/ + +uint ha_partition::del_ren_cre_table(const char *from, + const char *to, + TABLE *table_arg, + HA_CREATE_INFO *create_info) +{ + int save_error= 0, error; + char from_buff[FN_REFLEN], to_buff[FN_REFLEN]; + char *name_buffer_ptr; + uint i; + handler **file; + DBUG_ENTER("del_ren_cre_table()"); + + if (get_from_handler_file(from)) + DBUG_RETURN(TRUE); + DBUG_ASSERT(m_file_buffer); + name_buffer_ptr= m_name_buffer_ptr; + file= m_file; + i= 0; + do + { + create_partition_name(from_buff, from, name_buffer_ptr); + if (to != NULL) + { // Rename branch + create_partition_name(to_buff, to, name_buffer_ptr); + error= (*file)->rename_table((const char*) from_buff, + (const char*) to_buff); + } + else if (table_arg == NULL) // delete branch + error= (*file)->delete_table((const char*) from_buff); + else + { + set_up_table_before_create(table_arg, create_info, i); + error= (*file)->create(from_buff, table_arg, create_info); + } + name_buffer_ptr= strend(name_buffer_ptr) + 1; + if (error) + save_error= error; + i++; + } while (*(++file)); + DBUG_RETURN(save_error); +} + + +partition_element *ha_partition::find_partition_element(uint part_id) +{ + uint i; + uint curr_part_id= 0; + List_iterator_fast < partition_element > part_it(m_part_info->partitions); + + for (i= 0; i < m_part_info->no_parts; i++) + { + partition_element *part_elem; + part_elem= part_it++; + if (m_is_sub_partitioned) + { + uint j; + List_iterator_fast <partition_element> sub_it(part_elem->subpartitions); + for (j= 0; j < m_part_info->no_subparts; j++) + { + part_elem= sub_it++; + if (part_id == curr_part_id++) + return part_elem; + } + } + else if (part_id == curr_part_id++) + return part_elem; + } + DBUG_ASSERT(0); + current_thd->fatal_error(); // Abort + return NULL; +} + + +void ha_partition::set_up_table_before_create(TABLE *table, + HA_CREATE_INFO *info, + uint part_id) +{ + /* + Set up + 1) Comment on partition + 2) MAX_ROWS, MIN_ROWS on partition + 3) Index file name on partition + 4) Data file name on partition + */ + partition_element *part_elem= find_partition_element(part_id); + if (!part_elem) + return; // Fatal error + table->s->max_rows= part_elem->part_max_rows; + table->s->min_rows= part_elem->part_min_rows; + info->index_file_name= part_elem->index_file_name; + info->data_file_name= part_elem->data_file_name; +} + + +/* + Routine used to add two names with '_' in between then. Service routine + to create_handler_file + Include the NULL in the count of characters since it is needed as separator + between the partition names. +*/ + +static uint name_add(char *dest, const char *first_name, const char *sec_name) +{ + return (uint) (strxmov(dest, first_name, "_", sec_name, NullS) -dest) + 1; +} + + +/* + Method used to create handler file with names of partitions, their + engine types and the number of partitions. +*/ + +bool ha_partition::create_handler_file(const char *name) +{ + partition_element *part_elem, *subpart_elem; + uint i, j, part_name_len, subpart_name_len; + uint tot_partition_words, tot_name_len; + uint tot_len_words, tot_len_byte, chksum, tot_name_words; + char *name_buffer_ptr; + uchar *file_buffer, *engine_array; + bool result= TRUE; + char file_name[FN_REFLEN]; + File file; + List_iterator_fast < partition_element > part_it(m_part_info->partitions); + DBUG_ENTER("create_handler_file"); + + DBUG_PRINT("info", ("table name = %s", name)); + tot_name_len= 0; + for (i= 0; i < m_part_info->no_parts; i++) + { + part_elem= part_it++; + part_name_len= strlen(part_elem->partition_name); + if (!m_is_sub_partitioned) + tot_name_len+= part_name_len + 1; + else + { + List_iterator_fast<partition_element> sub_it(part_elem->subpartitions); + for (j= 0; j < m_part_info->no_subparts; j++) + { + subpart_elem= sub_it++; + subpart_name_len= strlen(subpart_elem->partition_name); + tot_name_len+= part_name_len + subpart_name_len + 2; + } + } + } + /* + File format: + Length in words 4 byte + Checksum 4 byte + Total number of partitions 4 byte + Array of engine types n * 4 bytes where + n = (m_tot_parts + 3)/4 + Length of name part in bytes 4 bytes + Name part m * 4 bytes where + m = ((length_name_part + 3)/4)*4 + + All padding bytes are zeroed + */ + tot_partition_words= (m_tot_parts + 3) / 4; + tot_name_words= (tot_name_len + 3) / 4; + tot_len_words= 4 + tot_partition_words + tot_name_words; + tot_len_byte= 4 * tot_len_words; + if (!(file_buffer= (uchar *) my_malloc(tot_len_byte, MYF(MY_ZEROFILL)))) + DBUG_RETURN(TRUE); + engine_array= (file_buffer + 12); + name_buffer_ptr= (char*) (file_buffer + ((4 + tot_partition_words) * 4)); + part_it.rewind(); + for (i= 0; i < m_part_info->no_parts; i++) + { + part_elem= part_it++; + if (!m_is_sub_partitioned) + { + name_buffer_ptr= strmov(name_buffer_ptr, part_elem->partition_name)+1; + *engine_array= (uchar) ha_legacy_type(part_elem->engine_type); + DBUG_PRINT("info", ("engine: %u", *engine_array)); + engine_array++; + } + else + { + List_iterator_fast<partition_element> sub_it(part_elem->subpartitions); + for (j= 0; j < m_part_info->no_subparts; j++) + { + subpart_elem= sub_it++; + name_buffer_ptr+= name_add(name_buffer_ptr, + part_elem->partition_name, + subpart_elem->partition_name); + *engine_array= (uchar) ha_legacy_type(part_elem->engine_type); + engine_array++; + } + } + } + chksum= 0; + int4store(file_buffer, tot_len_words); + int4store(file_buffer + 8, m_tot_parts); + int4store(file_buffer + 12 + (tot_partition_words * 4), tot_name_len); + for (i= 0; i < tot_len_words; i++) + chksum^= uint4korr(file_buffer + 4 * i); + int4store(file_buffer + 4, chksum); + /* + Remove .frm extension and replace with .par + Create and write and close file + to be used at open, delete_table and rename_table + */ + fn_format(file_name, name, "", ".par", MY_APPEND_EXT); + if ((file= my_create(file_name, CREATE_MODE, O_RDWR | O_TRUNC, + MYF(MY_WME))) >= 0) + { + result= my_write(file, (byte *) file_buffer, tot_len_byte, + MYF(MY_WME | MY_NABP)); + VOID(my_close(file, MYF(0))); + } + else + result= TRUE; + my_free((char*) file_buffer, MYF(0)); + DBUG_RETURN(result); +} + + +void ha_partition::clear_handler_file() +{ + my_free((char*) m_file_buffer, MYF(MY_ALLOW_ZERO_PTR)); + my_free((char*) m_engine_array, MYF(MY_ALLOW_ZERO_PTR)); + m_file_buffer= NULL; + m_name_buffer_ptr= NULL; + m_engine_array= NULL; +} + + +bool ha_partition::create_handlers() +{ + uint i; + uint alloc_len= (m_tot_parts + 1) * sizeof(handler*); + DBUG_ENTER("create_handlers"); + + if (!(m_file= (handler **) sql_alloc(alloc_len))) + DBUG_RETURN(TRUE); + bzero(m_file, alloc_len); + for (i= 0; i < m_tot_parts; i++) + { + if (!(m_file[i]= get_new_handler(table_share, current_thd->mem_root, + m_engine_array[i]))) + DBUG_RETURN(TRUE); + DBUG_PRINT("info", ("engine_type: %u", m_engine_array[i])); + } + m_file[m_tot_parts]= 0; + /* For the moment we only support partition over the same table engine */ + if (m_engine_array[0] == &myisam_hton) + { + DBUG_PRINT("info", ("MyISAM")); + m_myisam= TRUE; + } + /* INNODB may not be compiled in... */ + else if (ha_legacy_type(m_engine_array[0]) == DB_TYPE_INNODB) + { + DBUG_PRINT("info", ("InnoDB")); + m_innodb= TRUE; + } + DBUG_RETURN(FALSE); +} + + +bool ha_partition::new_handlers_from_part_info() +{ + uint i, j; + partition_element *part_elem; + uint alloc_len= (m_tot_parts + 1) * sizeof(handler*); + List_iterator_fast <partition_element> part_it(m_part_info->partitions); + THD *thd= current_thd; + DBUG_ENTER("ha_partition::new_handlers_from_part_info"); + + if (!(m_file= (handler **) sql_alloc(alloc_len))) + goto error; + bzero(m_file, alloc_len); + DBUG_ASSERT(m_part_info->no_parts > 0); + + i= 0; + /* + Don't know the size of the underlying storage engine, invent a number of + bytes allocated for error message if allocation fails + */ + alloc_len= 128; + do + { + part_elem= part_it++; + if (!(m_file[i]= get_new_handler(table_share, thd->mem_root, + part_elem->engine_type))) + goto error; + DBUG_PRINT("info", ("engine_type: %u", (uint) ha_legacy_type(part_elem->engine_type))); + if (m_is_sub_partitioned) + { + for (j= 0; j < m_part_info->no_subparts; j++) + { + if (!(m_file[i]= get_new_handler(table_share, thd->mem_root, + part_elem->engine_type))) + goto error; + DBUG_PRINT("info", ("engine_type: %u", (uint) ha_legacy_type(part_elem->engine_type))); + } + } + } while (++i < m_part_info->no_parts); + if (part_elem->engine_type == &myisam_hton) + { + DBUG_PRINT("info", ("MyISAM")); + m_myisam= TRUE; + } + DBUG_RETURN(FALSE); +error: + my_error(ER_OUTOFMEMORY, MYF(0), alloc_len); + DBUG_RETURN(TRUE); +} + + +/* + Open handler file to get partition names, engine types and number of + partitions. +*/ + +bool ha_partition::get_from_handler_file(const char *name) +{ + char buff[FN_REFLEN], *address_tot_name_len; + File file; + char *file_buffer, *name_buffer_ptr; + handlerton **engine_array; + uint i, len_bytes, len_words, tot_partition_words, tot_name_words, chksum; + DBUG_ENTER("ha_partition::get_from_handler_file"); + DBUG_PRINT("enter", ("table name: '%s'", name)); + + if (m_file_buffer) + DBUG_RETURN(FALSE); + fn_format(buff, name, "", ha_par_ext, MY_APPEND_EXT); + + /* Following could be done with my_stat to read in whole file */ + if ((file= my_open(buff, O_RDONLY | O_SHARE, MYF(0))) < 0) + DBUG_RETURN(TRUE); + if (my_read(file, (byte *) & buff[0], 8, MYF(MY_NABP))) + goto err1; + len_words= uint4korr(buff); + len_bytes= 4 * len_words; + if (!(file_buffer= my_malloc(len_bytes, MYF(0)))) + goto err1; + VOID(my_seek(file, 0, MY_SEEK_SET, MYF(0))); + if (my_read(file, (byte *) file_buffer, len_bytes, MYF(MY_NABP))) + goto err2; + + chksum= 0; + for (i= 0; i < len_words; i++) + chksum ^= uint4korr((file_buffer) + 4 * i); + if (chksum) + goto err2; + m_tot_parts= uint4korr((file_buffer) + 8); + tot_partition_words= (m_tot_parts + 3) / 4; + if (!(engine_array= (handlerton **) my_malloc(m_tot_parts * sizeof(handlerton*),MYF(0)))) + goto err2; + for (i= 0; i < m_tot_parts; i++) + engine_array[i]= ha_resolve_by_legacy_type(current_thd, + (enum legacy_db_type) *(uchar *) ((file_buffer) + 12 + i)); + address_tot_name_len= file_buffer + 12 + 4 * tot_partition_words; + tot_name_words= (uint4korr(address_tot_name_len) + 3) / 4; + if (len_words != (tot_partition_words + tot_name_words + 4)) + goto err2; + name_buffer_ptr= file_buffer + 16 + 4 * tot_partition_words; + VOID(my_close(file, MYF(0))); + m_file_buffer= file_buffer; // Will be freed in clear_handler_file() + m_name_buffer_ptr= name_buffer_ptr; + m_engine_array= engine_array; + if (!m_file && create_handlers()) + { + clear_handler_file(); + DBUG_RETURN(TRUE); + } + DBUG_RETURN(FALSE); + +err2: + my_free(file_buffer, MYF(0)); +err1: + VOID(my_close(file, MYF(0))); + DBUG_RETURN(TRUE); +} + +/**************************************************************************** + MODULE open/close object +****************************************************************************/ +/* + Used for opening tables. The name will be the name of the file. + A table is opened when it needs to be opened. For instance + when a request comes in for a select on the table (tables are not + open and closed for each request, they are cached). + + Called from handler.cc by handler::ha_open(). The server opens all tables + by calling ha_open() which then calls the handler specific open(). +*/ + +int ha_partition::open(const char *name, int mode, uint test_if_locked) +{ + int error; + char name_buff[FN_REFLEN]; + char *name_buffer_ptr= m_name_buffer_ptr; + handler **file; + uint alloc_len; + DBUG_ENTER("ha_partition::open"); + + ref_length= 0; + m_part_field_array= m_part_info->full_part_field_array; + if (get_from_handler_file(name)) + DBUG_RETURN(1); + m_start_key.length= 0; + m_rec0= table->record[0]; + m_rec_length= table->s->reclength; + alloc_len= m_tot_parts * (m_rec_length + PARTITION_BYTES_IN_POS); + alloc_len+= table->s->max_key_length; + if (!m_ordered_rec_buffer) + { + if (!(m_ordered_rec_buffer= (byte*)my_malloc(alloc_len, MYF(MY_WME)))) + { + DBUG_RETURN(1); + } + { + /* + We set-up one record per partition and each record has 2 bytes in + front where the partition id is written. This is used by ordered + index_read. + We also set-up a reference to the first record for temporary use in + setting up the scan. + */ + char *ptr= (char*)m_ordered_rec_buffer; + uint i= 0; + do + { + int2store(ptr, i); + ptr+= m_rec_length + PARTITION_BYTES_IN_POS; + } while (++i < m_tot_parts); + m_start_key.key= (const byte*)ptr; + } + } + file= m_file; + do + { + create_partition_name(name_buff, name, name_buffer_ptr); + if ((error= (*file)->ha_open(table, (const char*) name_buff, mode, + test_if_locked))) + goto err_handler; + m_no_locks+= (*file)->lock_count(); + name_buffer_ptr+= strlen(name_buffer_ptr) + 1; + set_if_bigger(ref_length, ((*file)->ref_length)); + } while (*(++file)); + /* + Add 2 bytes for partition id in position ref length. + ref_length=max_in_all_partitions(ref_length) + PARTITION_BYTES_IN_POS + */ + ref_length+= PARTITION_BYTES_IN_POS; + m_ref_length= ref_length; + /* + Release buffer read from .par file. It will not be reused again after + being opened once. + */ + clear_handler_file(); + /* + Initialise priority queue, initialised to reading forward. + */ + if ((error= init_queue(&queue, m_tot_parts, (uint) PARTITION_BYTES_IN_POS, + 0, key_rec_cmp, (void*)this))) + goto err_handler; + /* + Some handlers update statistics as part of the open call. This will in + some cases corrupt the statistics of the partition handler and thus + to ensure we have correct statistics we call info from open after + calling open on all individual handlers. + */ + info(HA_STATUS_VARIABLE | HA_STATUS_CONST); + DBUG_RETURN(0); + +err_handler: + while (file-- != m_file) + (*file)->close(); + DBUG_RETURN(error); +} + +/* + Closes a table. We call the free_share() function to free any resources + that we have allocated in the "shared" structure. + + Called from sql_base.cc, sql_select.cc, and table.cc. + In sql_select.cc it is only used to close up temporary tables or during + the process where a temporary table is converted over to being a + myisam table. + For sql_base.cc look at close_data_tables(). +*/ + +int ha_partition::close(void) +{ + handler **file; + DBUG_ENTER("ha_partition::close"); + + delete_queue(&queue); + file= m_file; + do + { + (*file)->close(); + } while (*(++file)); + DBUG_RETURN(0); +} + + +/**************************************************************************** + MODULE start/end statement +****************************************************************************/ +/* + A number of methods to define various constants for the handler. In + the case of the partition handler we need to use some max and min + of the underlying handlers in most cases. +*/ + +/* + First you should go read the section "locking functions for mysql" in + lock.cc to understand this. + This create a lock on the table. If you are implementing a storage engine + that can handle transactions look at ha_berkely.cc to see how you will + want to goo about doing this. Otherwise you should consider calling + flock() here. + Originally this method was used to set locks on file level to enable + several MySQL Servers to work on the same data. For transactional + engines it has been "abused" to also mean start and end of statements + to enable proper rollback of statements and transactions. When LOCK + TABLES has been issued the start_stmt method takes over the role of + indicating start of statement but in this case there is no end of + statement indicator(?). + + Called from lock.cc by lock_external() and unlock_external(). Also called + from sql_table.cc by copy_data_between_tables(). +*/ + +int ha_partition::external_lock(THD *thd, int lock_type) +{ + uint error; + handler **file; + DBUG_ENTER("ha_partition::external_lock"); + file= m_file; + do + { + if ((error= (*file)->external_lock(thd, lock_type))) + { + if (lock_type != F_UNLCK) + goto err_handler; + } + } while (*(++file)); + m_lock_type= lock_type; // For the future (2009?) + DBUG_RETURN(0); + +err_handler: + while (file-- != m_file) + (*file)->external_lock(thd, F_UNLCK); + DBUG_RETURN(error); +} + + +/* + The idea with handler::store_lock() is the following: + + The statement decided which locks we should need for the table + for updates/deletes/inserts we get WRITE locks, for SELECT... we get + read locks. + + Before adding the lock into the table lock handler (see thr_lock.c) + mysqld calls store lock with the requested locks. Store lock can now + modify a write lock to a read lock (or some other lock), ignore the + lock (if we don't want to use MySQL table locks at all) or add locks + for many tables (like we do when we are using a MERGE handler). + + Berkeley DB for partition changes all WRITE locks to TL_WRITE_ALLOW_WRITE + (which signals that we are doing WRITES, but we are still allowing other + reader's and writer's. + + When releasing locks, store_lock() are also called. In this case one + usually doesn't have to do anything. + + store_lock is called when holding a global mutex to ensure that only + one thread at a time changes the locking information of tables. + + In some exceptional cases MySQL may send a request for a TL_IGNORE; + This means that we are requesting the same lock as last time and this + should also be ignored. (This may happen when someone does a flush + table when we have opened a part of the tables, in which case mysqld + closes and reopens the tables and tries to get the same locks at last + time). In the future we will probably try to remove this. + + Called from lock.cc by get_lock_data(). +*/ + +THR_LOCK_DATA **ha_partition::store_lock(THD *thd, + THR_LOCK_DATA **to, + enum thr_lock_type lock_type) +{ + handler **file; + DBUG_ENTER("ha_partition::store_lock"); + file= m_file; + do + { + to= (*file)->store_lock(thd, to, lock_type); + } while (*(++file)); + DBUG_RETURN(to); +} + + +int ha_partition::start_stmt(THD *thd, thr_lock_type lock_type) +{ + int error= 0; + handler **file; + DBUG_ENTER("ha_partition::start_stmt"); + file= m_file; + do + { + if ((error= (*file)->start_stmt(thd, lock_type))) + break; + } while (*(++file)); + DBUG_RETURN(error); +} + + +/* + Returns the number of store locks needed in call to store lock. + We return number of partitions since we call store_lock on each + underlying handler. Assists the above functions in allocating + sufficient space for lock structures. +*/ + +uint ha_partition::lock_count() const +{ + DBUG_ENTER("ha_partition::lock_count"); + DBUG_RETURN(m_no_locks); +} + + +/* + Record currently processed was not in the result set of the statement + and is thus unlocked. Used for UPDATE and DELETE queries. +*/ + +void ha_partition::unlock_row() +{ + m_file[m_last_part]->unlock_row(); + return; +} + + +/**************************************************************************** + MODULE change record +****************************************************************************/ + +/* + write_row() inserts a row. buf() is a byte array of data, normally record[0]. + + You can use the field information to extract the data from the native byte + array type. + + Example of this would be: + for (Field **field=table->field ; *field ; field++) + { + ... + } + + See ha_tina.cc for an partition of extracting all of the data as strings. + ha_berekly.cc has an partition of how to store it intact by "packing" it + for ha_berkeley's own native storage type. + + See the note for update_row() on auto_increments and timestamps. This + case also applied to write_row(). + + Called from item_sum.cc, item_sum.cc, sql_acl.cc, sql_insert.cc, + sql_insert.cc, sql_select.cc, sql_table.cc, sql_udf.cc, and sql_update.cc. + + ADDITIONAL INFO: + + Most handlers set timestamp when calling write row if any such fields + exists. Since we are calling an underlying handler we assume the + underlying handler will assume this responsibility. + + Underlying handlers will also call update_auto_increment to calculate + the new auto increment value. We will catch the call to + get_auto_increment and ensure this increment value is maintained by + only one of the underlying handlers. +*/ + +int ha_partition::write_row(byte * buf) +{ + uint32 part_id; + int error; +#ifdef NOT_NEEDED + byte *rec0= m_rec0; +#endif + DBUG_ENTER("ha_partition::write_row"); + DBUG_ASSERT(buf == m_rec0); + +#ifdef NOT_NEEDED + if (likely(buf == rec0)) +#endif + error= m_part_info->get_partition_id(m_part_info, &part_id); +#ifdef NOT_NEEDED + else + { + set_field_ptr(m_part_field_array, buf, rec0); + error= m_part_info->get_partition_id(m_part_info, &part_id); + set_field_ptr(m_part_field_array, rec0, buf); + } +#endif + if (unlikely(error)) + DBUG_RETURN(HA_ERR_NO_PARTITION_FOUND); + m_last_part= part_id; + DBUG_PRINT("info", ("Insert in partition %d", part_id)); + DBUG_RETURN(m_file[part_id]->write_row(buf)); +} + + +/* + Yes, update_row() does what you expect, it updates a row. old_data will + have the previous row record in it, while new_data will have the newest + data in it. + Keep in mind that the server can do updates based on ordering if an + ORDER BY clause was used. Consecutive ordering is not guarenteed. + + Currently new_data will not have an updated auto_increament record, or + and updated timestamp field. You can do these for partition by doing these: + if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE) + table->timestamp_field->set_time(); + if (table->next_number_field && record == table->record[0]) + update_auto_increment(); + + Called from sql_select.cc, sql_acl.cc, sql_update.cc, and sql_insert.cc. + new_data is always record[0] + old_data is normally record[1] but may be anything + +*/ + +int ha_partition::update_row(const byte *old_data, byte *new_data) +{ + uint32 new_part_id, old_part_id; + int error; + DBUG_ENTER("ha_partition::update_row"); + + if ((error= get_parts_for_update(old_data, new_data, table->record[0], + m_part_info, &old_part_id, &new_part_id))) + { + DBUG_RETURN(error); + } + + /* + TODO: + set_internal_auto_increment= + max(set_internal_auto_increment, new_data->auto_increment) + */ + m_last_part= new_part_id; + if (new_part_id == old_part_id) + { + DBUG_PRINT("info", ("Update in partition %d", new_part_id)); + DBUG_RETURN(m_file[new_part_id]->update_row(old_data, new_data)); + } + else + { + DBUG_PRINT("info", ("Update from partition %d to partition %d", + old_part_id, new_part_id)); + if ((error= m_file[new_part_id]->write_row(new_data))) + DBUG_RETURN(error); + if ((error= m_file[old_part_id]->delete_row(old_data))) + { +#ifdef IN_THE_FUTURE + (void) m_file[new_part_id]->delete_last_inserted_row(new_data); +#endif + DBUG_RETURN(error); + } + } + DBUG_RETURN(0); +} + + +/* + This will delete a row. buf will contain a copy of the row to be deleted. + The server will call this right after the current row has been read + (from either a previous rnd_xxx() or index_xxx() call). + If you keep a pointer to the last row or can access a primary key it will + make doing the deletion quite a bit easier. + Keep in mind that the server does no guarentee consecutive deletions. + ORDER BY clauses can be used. + + Called in sql_acl.cc and sql_udf.cc to manage internal table information. + Called in sql_delete.cc, sql_insert.cc, and sql_select.cc. In sql_select + it is used for removing duplicates while in insert it is used for REPLACE + calls. + + buf is either record[0] or record[1] + +*/ + +int ha_partition::delete_row(const byte *buf) +{ + uint32 part_id; + int error; + DBUG_ENTER("ha_partition::delete_row"); + + if ((error= get_part_for_delete(buf, m_rec0, m_part_info, &part_id))) + { + DBUG_RETURN(error); + } + m_last_part= part_id; + DBUG_RETURN(m_file[part_id]->delete_row(buf)); +} + + +/* + Used to delete all rows in a table. Both for cases of truncate and + for cases where the optimizer realizes that all rows will be + removed as a result of a SQL statement. + + Called from item_sum.cc by Item_func_group_concat::clear(), + Item_sum_count_distinct::clear(), and Item_func_group_concat::clear(). + Called from sql_delete.cc by mysql_delete(). + Called from sql_select.cc by JOIN::reinit(). + Called from sql_union.cc by st_select_lex_unit::exec(). +*/ + +int ha_partition::delete_all_rows() +{ + int error; + handler **file; + DBUG_ENTER("ha_partition::delete_all_rows"); + file= m_file; + do + { + if ((error= (*file)->delete_all_rows())) + DBUG_RETURN(error); + } while (*(++file)); + DBUG_RETURN(0); +} + +/* + rows == 0 means we will probably insert many rows +*/ + +void ha_partition::start_bulk_insert(ha_rows rows) +{ + handler **file; + DBUG_ENTER("ha_partition::start_bulk_insert"); + if (!rows) + { + /* Avoid allocation big caches in all underlaying handlers */ + DBUG_VOID_RETURN; + } + rows= rows/m_tot_parts + 1; + file= m_file; + do + { + (*file)->start_bulk_insert(rows); + } while (*(++file)); + DBUG_VOID_RETURN; +} + + +int ha_partition::end_bulk_insert() +{ + int error= 0; + handler **file; + DBUG_ENTER("ha_partition::end_bulk_insert"); + + file= m_file; + do + { + int tmp; + /* We want to execute end_bulk_insert() on all handlers */ + if ((tmp= (*file)->end_bulk_insert())) + error= tmp; + } while (*(++file)); + DBUG_RETURN(error); +} + +/**************************************************************************** + MODULE full table scan +****************************************************************************/ +/* + Initialize engine for random reads + + SYNOPSIS + ha_partition::rnd_init() + scan 0 Initialize for random reads through rnd_pos() + 1 Initialize for random scan through rnd_next() + + NOTES + rnd_init() is called when the server wants the storage engine to do a + table scan or when the server wants to access data through rnd_pos. + + When scan is used we will scan one handler partition at a time. + When preparing for rnd_pos we will init all handler partitions. + No extra cache handling is needed when scannning is not performed. + + Before initialising we will call rnd_end to ensure that we clean up from + any previous incarnation of a table scan. + Called from filesort.cc, records.cc, sql_handler.cc, sql_select.cc, + sql_table.cc, and sql_update.cc. +*/ + +int ha_partition::rnd_init(bool scan) +{ + int error; + handler **file; + DBUG_ENTER("ha_partition::rnd_init"); + + include_partition_fields_in_used_fields(); + if (scan) + { + /* + rnd_end() is needed for partitioning to reset internal data if scan + is already in use + */ + + rnd_end(); + if (partition_scan_set_up(rec_buf(0), FALSE)) + { + /* + The set of partitions to scan is empty. We return success and return + end of file on first rnd_next. + */ + DBUG_RETURN(0); + } + /* + We will use the partition set in our scan, using the start and stop + partition and checking each scan before start dependent on bittfields. + */ + late_extra_cache(m_part_spec.start_part); + DBUG_PRINT("info", ("rnd_init on partition %d",m_part_spec.start_part)); + error= m_file[m_part_spec.start_part]->ha_rnd_init(1); + m_scan_value= 1; // Scan active + if (error) + m_scan_value= 2; // No scan active + DBUG_RETURN(error); + } + file= m_file; + do + { + if ((error= (*file)->ha_rnd_init(0))) + goto err; + } while (*(++file)); + m_scan_value= 0; + DBUG_RETURN(0); + +err: + while (file--) + (*file)->ha_rnd_end(); + DBUG_RETURN(error); +} + + +int ha_partition::rnd_end() +{ + handler **file; + DBUG_ENTER("ha_partition::rnd_end"); + switch (m_scan_value) { + case 2: // Error + break; + case 1: // Table scan + if (m_part_spec.start_part != NO_CURRENT_PART_ID) + { + late_extra_no_cache(m_part_spec.start_part); + m_file[m_part_spec.start_part]->ha_rnd_end(); + } + break; + case 0: + file= m_file; + do + { + (*file)->ha_rnd_end(); + } while (*(++file)); + break; + } + m_part_spec.start_part= NO_CURRENT_PART_ID; + m_scan_value= 2; + DBUG_RETURN(0); +} + + +/* + read next row during full table scan (scan in random row order) + + SYNOPSIS + rnd_next() + buf buffer that should be filled with data + + This is called for each row of the table scan. When you run out of records + you should return HA_ERR_END_OF_FILE. + The Field structure for the table is the key to getting data into buf + in a manner that will allow the server to understand it. + + Called from filesort.cc, records.cc, sql_handler.cc, sql_select.cc, + sql_table.cc, and sql_update.cc. +*/ + +int ha_partition::rnd_next(byte *buf) +{ + DBUG_ASSERT(m_scan_value); + uint part_id= m_part_spec.start_part; // Cache of this variable + handler *file= m_file[part_id]; + int result= HA_ERR_END_OF_FILE; + DBUG_ENTER("ha_partition::rnd_next"); + + DBUG_ASSERT(m_scan_value == 1); + + if (part_id > m_part_spec.end_part) + { + /* + The original set of partitions to scan was empty and thus we report + the result here. + */ + goto end; + } + while (TRUE) + { + if ((result= file->rnd_next(buf))) + { + if (result == HA_ERR_RECORD_DELETED) + continue; // Probably MyISAM + + if (result != HA_ERR_END_OF_FILE) + break; // Return error + + /* End current partition */ + late_extra_no_cache(part_id); + DBUG_PRINT("info", ("rnd_end on partition %d", part_id)); + if ((result= file->ha_rnd_end())) + break; + /* Shift to next partition */ + if (++part_id > m_part_spec.end_part) + { + result= HA_ERR_END_OF_FILE; + break; + } + file= m_file[part_id]; + DBUG_PRINT("info", ("rnd_init on partition %d", part_id)); + if ((result= file->ha_rnd_init(1))) + break; + late_extra_cache(part_id); + } + else + { + m_part_spec.start_part= part_id; + m_last_part= part_id; + table->status= 0; + DBUG_RETURN(0); + } + } + +end: + m_part_spec.start_part= NO_CURRENT_PART_ID; + table->status= STATUS_NOT_FOUND; + DBUG_RETURN(result); +} + + +inline void store_part_id_in_pos(byte *pos, uint part_id) +{ + int2store(pos, part_id); +} + +inline uint get_part_id_from_pos(const byte *pos) +{ + return uint2korr(pos); +} + +/* + position() is called after each call to rnd_next() if the data needs + to be ordered. You can do something like the following to store + the position: + ha_store_ptr(ref, ref_length, current_position); + + The server uses ref to store data. ref_length in the above case is + the size needed to store current_position. ref is just a byte array + that the server will maintain. If you are using offsets to mark rows, then + current_position should be the offset. If it is a primary key like in + BDB, then it needs to be a primary key. + + Called from filesort.cc, sql_select.cc, sql_delete.cc and sql_update.cc. +*/ + +void ha_partition::position(const byte *record) +{ + handler *file= m_file[m_last_part]; + DBUG_ENTER("ha_partition::position"); + file->position(record); + store_part_id_in_pos(ref, m_last_part); + memcpy((ref + PARTITION_BYTES_IN_POS), file->ref, + (ref_length - PARTITION_BYTES_IN_POS)); + +#ifdef SUPPORTING_PARTITION_OVER_DIFFERENT_ENGINES +#ifdef HAVE_purify + bzero(ref + PARTITION_BYTES_IN_POS + ref_length, max_ref_length-ref_length); +#endif /* HAVE_purify */ +#endif + DBUG_VOID_RETURN; +} + +/* + This is like rnd_next, but you are given a position to use + to determine the row. The position will be of the type that you stored in + ref. You can use ha_get_ptr(pos,ref_length) to retrieve whatever key + or position you saved when position() was called. + Called from filesort.cc records.cc sql_insert.cc sql_select.cc + sql_update.cc. +*/ + +int ha_partition::rnd_pos(byte * buf, byte *pos) +{ + uint part_id; + handler *file; + DBUG_ENTER("ha_partition::rnd_pos"); + + part_id= get_part_id_from_pos((const byte *) pos); + DBUG_ASSERT(part_id < m_tot_parts); + file= m_file[part_id]; + m_last_part= part_id; + DBUG_RETURN(file->rnd_pos(buf, (pos + PARTITION_BYTES_IN_POS))); +} + + +/**************************************************************************** + MODULE index scan +****************************************************************************/ +/* + Positions an index cursor to the index specified in the handle. Fetches the + row if available. If the key value is null, begin at the first key of the + index. + + There are loads of optimisations possible here for the partition handler. + The same optimisations can also be checked for full table scan although + only through conditions and not from index ranges. + Phase one optimisations: + Check if the fields of the partition function are bound. If so only use + the single partition it becomes bound to. + Phase two optimisations: + If it can be deducted through range or list partitioning that only a + subset of the partitions are used, then only use those partitions. +*/ + +/* + index_init is always called before starting index scans (except when + starting through index_read_idx and using read_range variants). +*/ + +int ha_partition::index_init(uint inx, bool sorted) +{ + int error= 0; + handler **file; + DBUG_ENTER("ha_partition::index_init"); + + active_index= inx; + m_part_spec.start_part= NO_CURRENT_PART_ID; + m_start_key.length= 0; + m_ordered= sorted; + m_curr_key_info= table->key_info+inx; + include_partition_fields_in_used_fields(); + + file= m_file; + do + { + /* TODO RONM: Change to index_init() when code is stable */ + if ((error= (*file)->ha_index_init(inx, sorted))) + { + DBUG_ASSERT(0); // Should never happen + break; + } + } while (*(++file)); + DBUG_RETURN(error); +} + + +/* + index_end is called at the end of an index scan to clean up any + things needed to clean up. +*/ + +int ha_partition::index_end() +{ + int error= 0; + handler **file; + DBUG_ENTER("ha_partition::index_end"); + + active_index= MAX_KEY; + m_part_spec.start_part= NO_CURRENT_PART_ID; + file= m_file; + do + { + int tmp; + /* We want to execute index_end() on all handlers */ + /* TODO RONM: Change to index_end() when code is stable */ + if ((tmp= (*file)->ha_index_end())) + error= tmp; + } while (*(++file)); + DBUG_RETURN(error); +} + + +/* + index_read starts a new index scan using a start key. The MySQL Server + will check the end key on its own. Thus to function properly the + partitioned handler need to ensure that it delivers records in the sort + order of the MySQL Server. + index_read can be restarted without calling index_end on the previous + index scan and without calling index_init. In this case the index_read + is on the same index as the previous index_scan. This is particularly + used in conjuntion with multi read ranges. +*/ + +int ha_partition::index_read(byte * buf, const byte * key, + uint key_len, enum ha_rkey_function find_flag) +{ + DBUG_ENTER("ha_partition::index_read"); + end_range= 0; + DBUG_RETURN(common_index_read(buf, key, key_len, find_flag)); +} + + +int ha_partition::common_index_read(byte *buf, const byte *key, uint key_len, + enum ha_rkey_function find_flag) +{ + int error; + DBUG_ENTER("ha_partition::common_index_read"); + + memcpy((void*)m_start_key.key, key, key_len); + m_start_key.length= key_len; + m_start_key.flag= find_flag; + m_index_scan_type= partition_index_read; + + if ((error= partition_scan_set_up(buf, TRUE))) + { + DBUG_RETURN(error); + } + + if (!m_ordered_scan_ongoing || + (find_flag == HA_READ_KEY_EXACT && + (key_len >= m_curr_key_info->key_length || + key_len == 0))) + { + /* + We use unordered index scan either when read_range is used and flag + is set to not use ordered or when an exact key is used and in this + case all records will be sorted equal and thus the sort order of the + resulting records doesn't matter. + We also use an unordered index scan when the number of partitions to + scan is only one. + The unordered index scan will use the partition set created. + Need to set unordered scan ongoing since we can come here even when + it isn't set. + */ + m_ordered_scan_ongoing= FALSE; + error= handle_unordered_scan_next_partition(buf); + } + else + { + /* + In all other cases we will use the ordered index scan. This will use + the partition set created by the get_partition_set method. + */ + error= handle_ordered_index_scan(buf); + } + DBUG_RETURN(error); +} + + +/* + index_first() asks for the first key in the index. + This is similar to index_read except that there is no start key since + the scan starts from the leftmost entry and proceeds forward with + index_next. + + Called from opt_range.cc, opt_sum.cc, sql_handler.cc, + and sql_select.cc. +*/ + +int ha_partition::index_first(byte * buf) +{ + DBUG_ENTER("ha_partition::index_first"); + end_range= 0; + m_index_scan_type= partition_index_first; + DBUG_RETURN(common_first_last(buf)); +} + + +/* + index_last() asks for the last key in the index. + This is similar to index_read except that there is no start key since + the scan starts from the rightmost entry and proceeds forward with + index_prev. + + Called from opt_range.cc, opt_sum.cc, sql_handler.cc, + and sql_select.cc. +*/ + +int ha_partition::index_last(byte * buf) +{ + DBUG_ENTER("ha_partition::index_last"); + m_index_scan_type= partition_index_last; + DBUG_RETURN(common_first_last(buf)); +} + +int ha_partition::common_first_last(byte *buf) +{ + int error; + if ((error= partition_scan_set_up(buf, FALSE))) + return error; + if (!m_ordered_scan_ongoing) + return handle_unordered_scan_next_partition(buf); + return handle_ordered_index_scan(buf); +} + +/* + Positions an index cursor to the index specified in key. Fetches the + row if any. This is only used to read whole keys. + TODO: Optimise this code to avoid index_init and index_end +*/ + +int ha_partition::index_read_idx(byte * buf, uint index, const byte * key, + uint key_len, + enum ha_rkey_function find_flag) +{ + int res; + DBUG_ENTER("ha_partition::index_read_idx"); + index_init(index, 0); + res= index_read(buf, key, key_len, find_flag); + index_end(); + DBUG_RETURN(res); +} + +/* + This is used in join_read_last_key to optimise away an ORDER BY. + Can only be used on indexes supporting HA_READ_ORDER +*/ + +int ha_partition::index_read_last(byte *buf, const byte *key, uint keylen) +{ + DBUG_ENTER("ha_partition::index_read_last"); + m_ordered= TRUE; // Safety measure + DBUG_RETURN(index_read(buf, key, keylen, HA_READ_PREFIX_LAST)); +} + + +/* + Used to read forward through the index. +*/ + +int ha_partition::index_next(byte * buf) +{ + DBUG_ENTER("ha_partition::index_next"); + /* + TODO(low priority): + If we want partition to work with the HANDLER commands, we + must be able to do index_last() -> index_prev() -> index_next() + */ + DBUG_ASSERT(m_index_scan_type != partition_index_last); + if (!m_ordered_scan_ongoing) + { + DBUG_RETURN(handle_unordered_next(buf, FALSE)); + } + DBUG_RETURN(handle_ordered_next(buf, FALSE)); +} + + +/* + This routine is used to read the next but only if the key is the same + as supplied in the call. +*/ + +int ha_partition::index_next_same(byte *buf, const byte *key, uint keylen) +{ + DBUG_ENTER("ha_partition::index_next_same"); + DBUG_ASSERT(keylen == m_start_key.length); + DBUG_ASSERT(m_index_scan_type != partition_index_last); + if (!m_ordered_scan_ongoing) + DBUG_RETURN(handle_unordered_next(buf, TRUE)); + DBUG_RETURN(handle_ordered_next(buf, TRUE)); +} + +/* + Used to read backwards through the index. +*/ + +int ha_partition::index_prev(byte * buf) +{ + DBUG_ENTER("ha_partition::index_prev"); + /* TODO: read comment in index_next */ + DBUG_ASSERT(m_index_scan_type != partition_index_first); + DBUG_RETURN(handle_ordered_prev(buf)); +} + + +/* + We reimplement read_range_first since we don't want the compare_key + check at the end. This is already performed in the partition handler. + read_range_next is very much different due to that we need to scan + all underlying handlers. +*/ + +int ha_partition::read_range_first(const key_range *start_key, + const key_range *end_key, + bool eq_range_arg, bool sorted) +{ + int error; + DBUG_ENTER("ha_partition::read_range_first"); + m_ordered= sorted; + eq_range= eq_range_arg; + end_range= 0; + if (end_key) + { + end_range= &save_end_range; + save_end_range= *end_key; + key_compare_result_on_equal= + ((end_key->flag == HA_READ_BEFORE_KEY) ? 1 : + (end_key->flag == HA_READ_AFTER_KEY) ? -1 : 0); + } + range_key_part= m_curr_key_info->key_part; + + if (!start_key) // Read first record + { + m_index_scan_type= partition_index_first; + error= common_first_last(m_rec0); + } + else + { + error= common_index_read(m_rec0, + start_key->key, + start_key->length, start_key->flag); + } + DBUG_RETURN(error); +} + + +int ha_partition::read_range_next() +{ + DBUG_ENTER("ha_partition::read_range_next"); + if (m_ordered) + { + DBUG_RETURN(handler::read_range_next()); + } + DBUG_RETURN(handle_unordered_next(m_rec0, eq_range)); +} + + +int ha_partition::partition_scan_set_up(byte * buf, bool idx_read_flag) +{ + DBUG_ENTER("ha_partition::partition_scan_set_up"); + + if (idx_read_flag) + get_partition_set(table,buf,active_index,&m_start_key,&m_part_spec); + else + get_partition_set(table, buf, MAX_KEY, 0, &m_part_spec); + if (m_part_spec.start_part > m_part_spec.end_part) + { + /* + We discovered a partition set but the set was empty so we report + key not found. + */ + DBUG_PRINT("info", ("scan with no partition to scan")); + DBUG_RETURN(HA_ERR_END_OF_FILE); + } + if (m_part_spec.start_part == m_part_spec.end_part) + { + /* + We discovered a single partition to scan, this never needs to be + performed using the ordered index scan. + */ + DBUG_PRINT("info", ("index scan using the single partition %d", + m_part_spec.start_part)); + m_ordered_scan_ongoing= FALSE; + } + else + { + /* + Set m_ordered_scan_ongoing according how the scan should be done + */ + m_ordered_scan_ongoing= m_ordered; + } + DBUG_ASSERT(m_part_spec.start_part < m_tot_parts && + m_part_spec.end_part < m_tot_parts); + DBUG_RETURN(0); +} + + +/**************************************************************************** + Unordered Index Scan Routines +****************************************************************************/ +/* + These routines are used to scan partitions without considering order. + This is performed in two situations. + 1) In read_multi_range this is the normal case + 2) When performing any type of index_read, index_first, index_last where + all fields in the partition function is bound. In this case the index + scan is performed on only one partition and thus it isn't necessary to + perform any sort. +*/ + +int ha_partition::handle_unordered_next(byte *buf, bool next_same) +{ + handler *file= file= m_file[m_part_spec.start_part]; + int error; + DBUG_ENTER("ha_partition::handle_unordered_next"); + + /* + We should consider if this should be split into two functions as + next_same is alwas a local constant + */ + if (next_same) + { + if (!(error= file->index_next_same(buf, m_start_key.key, + m_start_key.length))) + { + m_last_part= m_part_spec.start_part; + DBUG_RETURN(0); + } + } + else if (!(error= file->index_next(buf))) + { + if (compare_key(end_range) <= 0) + { + m_last_part= m_part_spec.start_part; + DBUG_RETURN(0); // Row was in range + } + error= HA_ERR_END_OF_FILE; + } + + if (error == HA_ERR_END_OF_FILE) + { + m_part_spec.start_part++; // Start using next part + error= handle_unordered_scan_next_partition(buf); + } + DBUG_RETURN(error); +} + + +/* + This routine is used to start the index scan on the next partition. + Both initial start and after completing scan on one partition. +*/ + +int ha_partition::handle_unordered_scan_next_partition(byte * buf) +{ + uint i; + DBUG_ENTER("ha_partition::handle_unordered_scan_next_partition"); + + for (i= m_part_spec.start_part; i <= m_part_spec.end_part; i++) + { + int error; + handler *file= m_file[i]; + + m_part_spec.start_part= i; + switch (m_index_scan_type) { + case partition_index_read: + DBUG_PRINT("info", ("index_read on partition %d", i)); + error= file->index_read(buf, m_start_key.key, + m_start_key.length, + m_start_key.flag); + break; + case partition_index_first: + DBUG_PRINT("info", ("index_first on partition %d", i)); + error= file->index_first(buf); + break; + default: + DBUG_ASSERT(FALSE); + DBUG_RETURN(1); + } + if (!error) + { + if (compare_key(end_range) <= 0) + { + m_last_part= i; + DBUG_RETURN(0); + } + error= HA_ERR_END_OF_FILE; + } + if ((error != HA_ERR_END_OF_FILE) && (error != HA_ERR_KEY_NOT_FOUND)) + DBUG_RETURN(error); + DBUG_PRINT("info", ("HA_ERR_END_OF_FILE on partition %d", i)); + } + m_part_spec.start_part= NO_CURRENT_PART_ID; + DBUG_RETURN(HA_ERR_END_OF_FILE); +} + + +/* + This part contains the logic to handle index scans that require ordered + output. This includes all except those started by read_range_first with + the flag ordered set to FALSE. Thus most direct index_read and all + index_first and index_last. + + We implement ordering by keeping one record plus a key buffer for each + partition. Every time a new entry is requested we will fetch a new + entry from the partition that is currently not filled with an entry. + Then the entry is put into its proper sort position. + + Returning a record is done by getting the top record, copying the + record to the request buffer and setting the partition as empty on + entries. +*/ + +int ha_partition::handle_ordered_index_scan(byte *buf) +{ + uint i, j= 0; + bool found= FALSE; + bool reverse_order= FALSE; + DBUG_ENTER("ha_partition::handle_ordered_index_scan"); + + m_top_entry= NO_CURRENT_PART_ID; + queue_remove_all(&queue); + for (i= m_part_spec.start_part; i <= m_part_spec.end_part; i++) + { + int error; + byte *rec_buf_ptr= rec_buf(i); + handler *file= m_file[i]; + + switch (m_index_scan_type) { + case partition_index_read: + error= file->index_read(rec_buf_ptr, + m_start_key.key, + m_start_key.length, + m_start_key.flag); + reverse_order= FALSE; + break; + case partition_index_first: + error= file->index_first(rec_buf_ptr); + reverse_order= FALSE; + break; + case partition_index_last: + error= file->index_last(rec_buf_ptr); + reverse_order= TRUE; + break; + default: + DBUG_ASSERT(FALSE); + DBUG_RETURN(HA_ERR_END_OF_FILE); + } + if (!error) + { + found= TRUE; + /* + Initialise queue without order first, simply insert + */ + queue_element(&queue, j++)= (byte*)queue_buf(i); + } + else if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) + { + DBUG_RETURN(error); + } + } + if (found) + { + /* + We found at least one partition with data, now sort all entries and + after that read the first entry and copy it to the buffer to return in. + */ + queue_set_max_at_top(&queue, reverse_order); + queue_set_cmp_arg(&queue, (void*)m_curr_key_info); + queue.elements= j; + queue_fix(&queue); + return_top_record(buf); + DBUG_PRINT("info", ("Record returned from partition %d", m_top_entry)); + DBUG_RETURN(0); + } + DBUG_RETURN(HA_ERR_END_OF_FILE); +} + + +void ha_partition::return_top_record(byte *buf) +{ + uint part_id; + byte *key_buffer= queue_top(&queue); + byte *rec_buffer= key_buffer + PARTITION_BYTES_IN_POS; + part_id= uint2korr(key_buffer); + memcpy(buf, rec_buffer, m_rec_length); + m_last_part= part_id; + m_top_entry= part_id; +} + + +int ha_partition::handle_ordered_next(byte *buf, bool next_same) +{ + int error; + uint part_id= m_top_entry; + handler *file= m_file[part_id]; + DBUG_ENTER("ha_partition::handle_ordered_next"); + + if (!next_same) + error= file->index_next(rec_buf(part_id)); + else + error= file->index_next_same(rec_buf(part_id), m_start_key.key, + m_start_key.length); + if (error) + { + if (error == HA_ERR_END_OF_FILE) + { + /* Return next buffered row */ + queue_remove(&queue, (uint) 0); + if (queue.elements) + { + DBUG_PRINT("info", ("Record returned from partition %u (2)", + m_top_entry)); + return_top_record(buf); + error= 0; + } + } + DBUG_RETURN(error); + } + queue_replaced(&queue); + return_top_record(buf); + DBUG_PRINT("info", ("Record returned from partition %u", m_top_entry)); + DBUG_RETURN(0); +} + + +int ha_partition::handle_ordered_prev(byte *buf) +{ + int error; + uint part_id= m_top_entry; + handler *file= m_file[part_id]; + DBUG_ENTER("ha_partition::handle_ordered_prev"); + if ((error= file->index_prev(rec_buf(part_id)))) + { + if (error == HA_ERR_END_OF_FILE) + { + queue_remove(&queue, (uint) 0); + if (queue.elements) + { + return_top_record(buf); + DBUG_PRINT("info", ("Record returned from partition %d (2)", + m_top_entry)); + error= 0; + } + } + DBUG_RETURN(error); + } + queue_replaced(&queue); + return_top_record(buf); + DBUG_PRINT("info", ("Record returned from partition %d", m_top_entry)); + DBUG_RETURN(0); +} + + +void ha_partition::include_partition_fields_in_used_fields() +{ + DBUG_ENTER("ha_partition::include_partition_fields_in_used_fields"); + Field **ptr= m_part_field_array; + do + { + ha_set_bit_in_read_set((*ptr)->fieldnr); + } while (*(++ptr)); + DBUG_VOID_RETURN; +} + + +/**************************************************************************** + MODULE information calls +****************************************************************************/ + +/* + These are all first approximations of the extra, info, scan_time + and read_time calls +*/ + +/* + ::info() is used to return information to the optimizer. + Currently this table handler doesn't implement most of the fields + really needed. SHOW also makes use of this data + Another note, if your handler doesn't proved exact record count, + you will probably want to have the following in your code: + if (records < 2) + records = 2; + The reason is that the server will optimize for cases of only a single + record. If in a table scan you don't know the number of records + it will probably be better to set records to two so you can return + as many records as you need. + + Along with records a few more variables you may wish to set are: + records + deleted + data_file_length + index_file_length + delete_length + check_time + Take a look at the public variables in handler.h for more information. + + Called in: + filesort.cc + ha_heap.cc + item_sum.cc + opt_sum.cc + sql_delete.cc + sql_delete.cc + sql_derived.cc + sql_select.cc + sql_select.cc + sql_select.cc + sql_select.cc + sql_select.cc + sql_show.cc + sql_show.cc + sql_show.cc + sql_show.cc + sql_table.cc + sql_union.cc + sql_update.cc + + Some flags that are not implemented + HA_STATUS_POS: + This parameter is never used from the MySQL Server. It is checked in a + place in MyISAM so could potentially be used by MyISAM specific programs. + HA_STATUS_NO_LOCK: + This is declared and often used. It's only used by MyISAM. + It means that MySQL doesn't need the absolute latest statistics + information. This may save the handler from doing internal locks while + retrieving statistics data. +*/ + +void ha_partition::info(uint flag) +{ + handler *file, **file_array; + DBUG_ENTER("ha_partition:info"); + + if (flag & HA_STATUS_AUTO) + { + DBUG_PRINT("info", ("HA_STATUS_AUTO")); + /* + The auto increment value is only maintained by the first handler + so we will only call this. + */ + m_file[0]->info(HA_STATUS_AUTO); + } + if (flag & HA_STATUS_VARIABLE) + { + DBUG_PRINT("info", ("HA_STATUS_VARIABLE")); + /* + Calculates statistical variables + records: Estimate of number records in table + We report sum (always at least 2) + deleted: Estimate of number holes in the table due to + deletes + We report sum + data_file_length: Length of data file, in principle bytes in table + We report sum + index_file_length: Length of index file, in principle bytes in + indexes in the table + We report sum + mean_record_length:Mean record length in the table + We calculate this + check_time: Time of last check (only applicable to MyISAM) + We report last time of all underlying handlers + */ + records= 0; + deleted= 0; + data_file_length= 0; + index_file_length= 0; + check_time= 0; + file_array= m_file; + do + { + file= *file_array; + file->info(HA_STATUS_VARIABLE); + records+= file->records; + deleted+= file->deleted; + data_file_length+= file->data_file_length; + index_file_length+= file->index_file_length; + if (file->check_time > check_time) + check_time= file->check_time; + } while (*(++file_array)); + if (records < 2 && + m_table_flags & HA_NOT_EXACT_COUNT) + records= 2; + if (records > 0) + mean_rec_length= (ulong) (data_file_length / records); + else + mean_rec_length= 1; //? What should we set here + } + if (flag & HA_STATUS_CONST) + { + DBUG_PRINT("info", ("HA_STATUS_CONST")); + /* + Recalculate loads of constant variables. MyISAM also sets things + directly on the table share object. + + Check whether this should be fixed since handlers should not + change things directly on the table object. + + Monty comment: This should NOT be changed! It's the handlers + responsibility to correct table->s->keys_xxxx information if keys + have been disabled. + + The most important parameters set here is records per key on + all indexes. block_size and primar key ref_length. + + For each index there is an array of rec_per_key. + As an example if we have an index with three attributes a,b and c + we will have an array of 3 rec_per_key. + rec_per_key[0] is an estimate of number of records divided by + number of unique values of the field a. + rec_per_key[1] is an estimate of the number of records divided + by the number of unique combinations of the fields a and b. + rec_per_key[2] is an estimate of the number of records divided + by the number of unique combinations of the fields a,b and c. + + Many handlers only set the value of rec_per_key when all fields + are bound (rec_per_key[2] in the example above). + + If the handler doesn't support statistics, it should set all of the + above to 0. + + We will allow the first handler to set the rec_per_key and use + this as an estimate on the total table. + + max_data_file_length: Maximum data file length + We ignore it, is only used in + SHOW TABLE STATUS + max_index_file_length: Maximum index file length + We ignore it since it is never used + block_size: Block size used + We set it to the value of the first handler + sortkey: Never used at any place so ignored + ref_length: We set this to the value calculated + and stored in local object + raid_type: Set by first handler (MyISAM) + raid_chunks: Set by first handler (MyISAM) + raid_chunksize: Set by first handler (MyISAM) + create_time: Creation time of table + Set by first handler + + So we calculate these constants by using the variables on the first + handler. + */ + + file= m_file[0]; + file->info(HA_STATUS_CONST); + create_time= file->create_time; + raid_type= file->raid_type; + raid_chunks= file->raid_chunks; + raid_chunksize= file->raid_chunksize; + ref_length= m_ref_length; + } + if (flag & HA_STATUS_ERRKEY) + { + handler *file= m_file[m_last_part]; + DBUG_PRINT("info", ("info: HA_STATUS_ERRKEY")); + /* + This flag is used to get index number of the unique index that + reported duplicate key + We will report the errkey on the last handler used and ignore the rest + */ + file->info(HA_STATUS_ERRKEY); + if (file->errkey != (uint) -1) + errkey= file->errkey; + } + if (flag & HA_STATUS_TIME) + { + DBUG_PRINT("info", ("info: HA_STATUS_TIME")); + /* + This flag is used to set the latest update time of the table. + Used by SHOW commands + We will report the maximum of these times + */ + update_time= 0; + file_array= m_file; + do + { + file= *file_array; + file->info(HA_STATUS_TIME); + if (file->update_time > update_time) + update_time= file->update_time; + } while (*(++file_array)); + } + DBUG_VOID_RETURN; +} + + +/* + extra() is called whenever the server wishes to send a hint to + the storage engine. The MyISAM engine implements the most hints. + + We divide the parameters into the following categories: + 1) Parameters used by most handlers + 2) Parameters used by some non-MyISAM handlers + 3) Parameters used only by MyISAM + 4) Parameters only used by temporary tables for query processing + 5) Parameters only used by MyISAM internally + 6) Parameters not used at all + + The partition handler need to handle category 1), 2) and 3). + + 1) Parameters used by most handlers + ----------------------------------- + HA_EXTRA_RESET: + This option is used by most handlers and it resets the handler state + to the same state as after an open call. This includes releasing + any READ CACHE or WRITE CACHE or other internal buffer used. + + It is called from the reset method in the handler interface. There are + three instances where this is called. + 1) After completing a INSERT ... SELECT ... query the handler for the + table inserted into is reset + 2) It is called from close_thread_table which in turn is called from + close_thread_tables except in the case where the tables are locked + in which case ha_commit_stmt is called instead. + It is only called from here if flush_version hasn't changed and the + table is not an old table when calling close_thread_table. + close_thread_tables is called from many places as a general clean up + function after completing a query. + 3) It is called when deleting the QUICK_RANGE_SELECT object if the + QUICK_RANGE_SELECT object had its own handler object. It is called + immediatley before close of this local handler object. + HA_EXTRA_KEYREAD: + HA_EXTRA_NO_KEYREAD: + These parameters are used to provide an optimisation hint to the handler. + If HA_EXTRA_KEYREAD is set it is enough to read the index fields, for + many handlers this means that the index-only scans can be used and it + is not necessary to use the real records to satisfy this part of the + query. Index-only scans is a very important optimisation for disk-based + indexes. For main-memory indexes most indexes contain a reference to the + record and thus KEYREAD only says that it is enough to read key fields. + HA_EXTRA_NO_KEYREAD disables this for the handler, also HA_EXTRA_RESET + will disable this option. + The handler will set HA_KEYREAD_ONLY in its table flags to indicate this + feature is supported. + HA_EXTRA_FLUSH: + Indication to flush tables to disk, called at close_thread_table to + ensure disk based tables are flushed at end of query execution. + + 2) Parameters used by some non-MyISAM handlers + ---------------------------------------------- + HA_EXTRA_RETRIEVE_ALL_COLS: + Many handlers have implemented optimisations to avoid fetching all + fields when retrieving data. In certain situations all fields need + to be retrieved even though the query_id is not set on all field + objects. + + It is called from copy_data_between_tables where all fields are + copied without setting query_id before calling the handlers. + It is called from UPDATE statements when the fields of the index + used is updated or ORDER BY is used with UPDATE. + And finally when calculating checksum of a table using the CHECKSUM + command. + HA_EXTRA_RETRIEVE_PRIMARY_KEY: + In some situations it is mandatory to retrieve primary key fields + independent of the query id's. This extra flag specifies that fetch + of primary key fields is mandatory. + HA_EXTRA_KEYREAD_PRESERVE_FIELDS: + This is a strictly InnoDB feature that is more or less undocumented. + When it is activated InnoDB copies field by field from its fetch + cache instead of all fields in one memcpy. Have no idea what the + purpose of this is. + Cut from include/my_base.h: + When using HA_EXTRA_KEYREAD, overwrite only key member fields and keep + other fields intact. When this is off (by default) InnoDB will use memcpy + to overwrite entire row. + HA_EXTRA_IGNORE_DUP_KEY: + HA_EXTRA_NO_IGNORE_DUP_KEY: + Informs the handler to we will not stop the transaction if we get an + duplicate key errors during insert/upate. + Always called in pair, triggered by INSERT IGNORE and other similar + SQL constructs. + Not used by MyISAM. + + 3) Parameters used only by MyISAM + --------------------------------- + HA_EXTRA_NORMAL: + Only used in MyISAM to reset quick mode, not implemented by any other + handler. Quick mode is also reset in MyISAM by HA_EXTRA_RESET. + + It is called after completing a successful DELETE query if the QUICK + option is set. + + HA_EXTRA_QUICK: + When the user does DELETE QUICK FROM table where-clause; this extra + option is called before the delete query is performed and + HA_EXTRA_NORMAL is called after the delete query is completed. + Temporary tables used internally in MySQL always set this option + + The meaning of quick mode is that when deleting in a B-tree no merging + of leafs is performed. This is a common method and many large DBMS's + actually only support this quick mode since it is very difficult to + merge leaves in a tree used by many threads concurrently. + + HA_EXTRA_CACHE: + This flag is usually set with extra_opt along with a cache size. + The size of this buffer is set by the user variable + record_buffer_size. The value of this cache size is the amount of + data read from disk in each fetch when performing a table scan. + This means that before scanning a table it is normal to call + extra with HA_EXTRA_CACHE and when the scan is completed to call + HA_EXTRA_NO_CACHE to release the cache memory. + + Some special care is taken when using this extra parameter since there + could be a write ongoing on the table in the same statement. In this + one has to take special care since there might be a WRITE CACHE as + well. HA_EXTRA_CACHE specifies using a READ CACHE and using + READ CACHE and WRITE CACHE at the same time is not possible. + + Only MyISAM currently use this option. + + It is set when doing full table scans using rr_sequential and + reset when completing such a scan with end_read_record + (resetting means calling extra with HA_EXTRA_NO_CACHE). + + It is set in filesort.cc for MyISAM internal tables and it is set in + a multi-update where HA_EXTRA_CACHE is called on a temporary result + table and after that ha_rnd_init(0) on table to be updated + and immediately after that HA_EXTRA_NO_CACHE on table to be updated. + + Apart from that it is always used from init_read_record but not when + used from UPDATE statements. It is not used from DELETE statements + with ORDER BY and LIMIT but it is used in normal scan loop in DELETE + statements. The reason here is that DELETE's in MyISAM doesn't move + existings data rows. + + It is also set in copy_data_between_tables when scanning the old table + to copy over to the new table. + And it is set in join_init_read_record where quick objects are used + to perform a scan on the table. In this case the full table scan can + even be performed multiple times as part of the nested loop join. + + For purposes of the partition handler it is obviously necessary to have + special treatment of this extra call. If we would simply pass this + extra call down to each handler we would allocate + cache size * no of partitions amount of memory and this is not + necessary since we will only scan one partition at a time when doing + full table scans. + + Thus we treat it by first checking whether we have MyISAM handlers in + the table, if not we simply ignore the call and if we have we will + record the call but will not call any underlying handler yet. Then + when performing the sequential scan we will check this recorded value + and call extra_opt whenever we start scanning a new partition. + + monty: Neads to be fixed so that it's passed to all handlers when we + move to another partition during table scan. + + HA_EXTRA_NO_CACHE: + When performing a UNION SELECT HA_EXTRA_NO_CACHE is called from the + flush method in the select_union class. + It is used to some extent when insert delayed inserts. + See HA_EXTRA_RESET_STATE for use in conjunction with delete_all_rows(). + + It should be ok to call HA_EXTRA_NO_CACHE on all underlying handlers + if they are MyISAM handlers. Other handlers we can ignore the call + for. If no cache is in use they will quickly return after finding + this out. And we also ensure that all caches are disabled and no one + is left by mistake. + In the future this call will probably be deleted an we will instead call + ::reset(); + + HA_EXTRA_WRITE_CACHE: + See above, called from various places. It is mostly used when we + do INSERT ... SELECT + No special handling to save cache space is developed currently. + + HA_EXTRA_PREPARE_FOR_UPDATE: + This is called as part of a multi-table update. When the table to be + updated is also scanned then this informs MyISAM handler to drop any + caches if dynamic records are used (fixed size records do not care + about this call). We pass this along to all underlying MyISAM handlers + and ignore it for the rest. + + HA_EXTRA_PREPARE_FOR_DELETE: + Only used by MyISAM, called in preparation for a DROP TABLE. + It's used mostly by Windows that cannot handle dropping an open file. + On other platforms it has the same effect as HA_EXTRA_FORCE_REOPEN. + + HA_EXTRA_READCHECK: + HA_EXTRA_NO_READCHECK: + Only one call to HA_EXTRA_NO_READCHECK from ha_open where it says that + this is not needed in SQL. The reason for this call is that MyISAM sets + the READ_CHECK_USED in the open call so the call is needed for MyISAM + to reset this feature. + The idea with this parameter was to inform of doing/not doing a read + check before applying an update. Since SQL always performs a read before + applying the update No Read Check is needed in MyISAM as well. + + This is a cut from Docs/myisam.txt + Sometimes you might want to force an update without checking whether + another user has changed the record since you last read it. This is + somewhat dangerous, so it should ideally not be used. That can be + accomplished by wrapping the mi_update() call in two calls to mi_extra(), + using these functions: + HA_EXTRA_NO_READCHECK=5 No readcheck on update + HA_EXTRA_READCHECK=6 Use readcheck (def) + + HA_EXTRA_FORCE_REOPEN: + Only used by MyISAM, called when altering table, closing tables to + enforce a reopen of the table files. + + 4) Parameters only used by temporary tables for query processing + ---------------------------------------------------------------- + HA_EXTRA_RESET_STATE: + Same as HA_EXTRA_RESET except that buffers are not released. If there is + a READ CACHE it is reinit'ed. A cache is reinit'ed to restart reading + or to change type of cache between READ CACHE and WRITE CACHE. + + This extra function is always called immediately before calling + delete_all_rows on the handler for temporary tables. + There are cases however when HA_EXTRA_RESET_STATE isn't called in + a similar case for a temporary table in sql_union.cc and in two other + cases HA_EXTRA_NO_CACHE is called before and HA_EXTRA_WRITE_CACHE + called afterwards. + The case with HA_EXTRA_NO_CACHE and HA_EXTRA_WRITE_CACHE means + disable caching, delete all rows and enable WRITE CACHE. This is + used for temporary tables containing distinct sums and a + functional group. + + The only case that delete_all_rows is called on non-temporary tables + is in sql_delete.cc when DELETE FROM table; is called by a user. + In this case no special extra calls are performed before or after this + call. + + The partition handler should not need to bother about this one. It + should never be called. + + HA_EXTRA_NO_ROWS: + Don't insert rows indication to HEAP and MyISAM, only used by temporary + tables used in query processing. + Not handled by partition handler. + + 5) Parameters only used by MyISAM internally + -------------------------------------------- + HA_EXTRA_REINIT_CACHE: + This call reinitialises the READ CACHE described above if there is one + and otherwise the call is ignored. + + We can thus safely call it on all underlying handlers if they are + MyISAM handlers. It is however never called so we don't handle it at all. + HA_EXTRA_FLUSH_CACHE: + Flush WRITE CACHE in MyISAM. It is only from one place in the code. + This is in sql_insert.cc where it is called if the table_flags doesn't + contain HA_DUPP_POS. The only handler having the HA_DUPP_POS set is the + MyISAM handler and so the only handler not receiving this call is MyISAM. + Thus in effect this call is called but never used. Could be removed + from sql_insert.cc + HA_EXTRA_NO_USER_CHANGE: + Only used by MyISAM, never called. + Simulates lock_type as locked. + HA_EXTRA_WAIT_LOCK: + HA_EXTRA_WAIT_NOLOCK: + Only used by MyISAM, called from MyISAM handler but never from server + code on top of the handler. + Sets lock_wait on/off + HA_EXTRA_NO_KEYS: + Only used MyISAM, only used internally in MyISAM handler, never called + from server level. + HA_EXTRA_KEYREAD_CHANGE_POS: + HA_EXTRA_REMEMBER_POS: + HA_EXTRA_RESTORE_POS: + HA_EXTRA_PRELOAD_BUFFER_SIZE: + HA_EXTRA_CHANGE_KEY_TO_DUP: + HA_EXTRA_CHANGE_KEY_TO_UNIQUE: + Only used by MyISAM, never called. + + 6) Parameters not used at all + ----------------------------- + HA_EXTRA_KEY_CACHE: + HA_EXTRA_NO_KEY_CACHE: + This parameters are no longer used and could be removed. +*/ + +int ha_partition::extra(enum ha_extra_function operation) +{ + DBUG_ENTER("ha_partition:extra"); + DBUG_PRINT("info", ("operation: %d", (int) operation)); + + switch (operation) { + /* Category 1), used by most handlers */ + case HA_EXTRA_KEYREAD: + case HA_EXTRA_NO_KEYREAD: + case HA_EXTRA_FLUSH: + DBUG_RETURN(loop_extra(operation)); + + /* Category 2), used by non-MyISAM handlers */ + case HA_EXTRA_IGNORE_DUP_KEY: + case HA_EXTRA_NO_IGNORE_DUP_KEY: + case HA_EXTRA_RETRIEVE_ALL_COLS: + case HA_EXTRA_RETRIEVE_PRIMARY_KEY: + case HA_EXTRA_KEYREAD_PRESERVE_FIELDS: + { + if (!m_myisam) + DBUG_RETURN(loop_extra(operation)); + break; + } + + /* Category 3), used by MyISAM handlers */ + case HA_EXTRA_NORMAL: + case HA_EXTRA_QUICK: + case HA_EXTRA_NO_READCHECK: + case HA_EXTRA_PREPARE_FOR_UPDATE: + case HA_EXTRA_PREPARE_FOR_DELETE: + case HA_EXTRA_FORCE_REOPEN: + { + if (m_myisam) + DBUG_RETURN(loop_extra(operation)); + break; + } + case HA_EXTRA_CACHE: + { + prepare_extra_cache(0); + break; + } + case HA_EXTRA_NO_CACHE: + { + m_extra_cache= FALSE; + m_extra_cache_size= 0; + DBUG_RETURN(loop_extra(operation)); + } + default: + { + /* Temporary crash to discover what is wrong */ + DBUG_ASSERT(0); + break; + } + } + DBUG_RETURN(0); +} + + +/* + This will in the future be called instead of extra(HA_EXTRA_RESET) as this + is such a common call +*/ + +int ha_partition::reset(void) +{ + int result= 0, tmp; + handler **file; + DBUG_ENTER("ha_partition::reset"); + file= m_file; + if (m_part_info) + bitmap_clear_all(&m_part_info->used_partitions); + do + { + if ((tmp= (*file)->reset())) + result= tmp; + } while (*(++file)); + DBUG_RETURN(result); +} + + +int ha_partition::extra_opt(enum ha_extra_function operation, ulong cachesize) +{ + DBUG_ENTER("ha_partition::extra_opt()"); + DBUG_ASSERT(HA_EXTRA_CACHE == operation); + prepare_extra_cache(cachesize); + DBUG_RETURN(0); +} + + +void ha_partition::prepare_extra_cache(uint cachesize) +{ + DBUG_ENTER("ha_partition::prepare_extra_cache()"); + + m_extra_cache= TRUE; + m_extra_cache_size= cachesize; + if (m_part_spec.start_part != NO_CURRENT_PART_ID) + { + DBUG_ASSERT(m_part_spec.start_part == 0); + late_extra_cache(0); + } + DBUG_VOID_RETURN; +} + + +int ha_partition::loop_extra(enum ha_extra_function operation) +{ + int result= 0, tmp; + handler **file; + DBUG_ENTER("ha_partition::loop_extra()"); + for (file= m_file; *file; file++) + { + if ((tmp= (*file)->extra(operation))) + result= tmp; + } + DBUG_RETURN(result); +} + + +void ha_partition::late_extra_cache(uint partition_id) +{ + handler *file; + DBUG_ENTER("ha_partition::late_extra_cache"); + if (!m_extra_cache) + DBUG_VOID_RETURN; + file= m_file[partition_id]; + if (m_extra_cache_size == 0) + VOID(file->extra(HA_EXTRA_CACHE)); + else + VOID(file->extra_opt(HA_EXTRA_CACHE, m_extra_cache_size)); + DBUG_VOID_RETURN; +} + + +void ha_partition::late_extra_no_cache(uint partition_id) +{ + handler *file; + DBUG_ENTER("ha_partition::late_extra_no_cache"); + if (!m_extra_cache) + DBUG_VOID_RETURN; + file= m_file[partition_id]; + VOID(file->extra(HA_EXTRA_NO_CACHE)); + DBUG_VOID_RETURN; +} + + +/**************************************************************************** + MODULE optimiser support +****************************************************************************/ + +const key_map *ha_partition::keys_to_use_for_scanning() +{ + DBUG_ENTER("ha_partition::keys_to_use_for_scanning"); + DBUG_RETURN(m_file[0]->keys_to_use_for_scanning()); +} + +double ha_partition::scan_time() +{ + double scan_time= 0; + handler **file; + DBUG_ENTER("ha_partition::scan_time"); + + for (file= m_file; *file; file++) + scan_time+= (*file)->scan_time(); + DBUG_RETURN(scan_time); +} + + +/* + This will be optimised later to include whether or not the index can + be used with partitioning. To achieve we need to add another parameter + that specifies how many of the index fields that are bound in the ranges. + Possibly added as a new call to handlers. +*/ + +double ha_partition::read_time(uint index, uint ranges, ha_rows rows) +{ + DBUG_ENTER("ha_partition::read_time"); + DBUG_RETURN(m_file[0]->read_time(index, ranges, rows)); +} + +/* + Given a starting key, and an ending key estimate the number of rows that + will exist between the two. end_key may be empty which in case determine + if start_key matches any rows. + + Called from opt_range.cc by check_quick_keys(). + + monty: MUST be called for each range and added. + Note that MySQL will assume that if this returns 0 there is no + matching rows for the range! +*/ + +ha_rows ha_partition::records_in_range(uint inx, key_range *min_key, + key_range *max_key) +{ + ha_rows in_range= 0; + handler **file; + DBUG_ENTER("ha_partition::records_in_range"); + + file= m_file; + do + { + in_range+= (*file)->records_in_range(inx, min_key, max_key); + } while (*(++file)); + DBUG_RETURN(in_range); +} + + +ha_rows ha_partition::estimate_rows_upper_bound() +{ + ha_rows rows, tot_rows= 0; + handler **file; + DBUG_ENTER("ha_partition::estimate_rows_upper_bound"); + + file= m_file; + do + { + rows= (*file)->estimate_rows_upper_bound(); + if (rows == HA_POS_ERROR) + DBUG_RETURN(HA_POS_ERROR); + tot_rows+= rows; + } while (*(++file)); + DBUG_RETURN(tot_rows); +} + + +uint8 ha_partition::table_cache_type() +{ + DBUG_ENTER("ha_partition::table_cache_type"); + DBUG_RETURN(m_file[0]->table_cache_type()); +} + + +/**************************************************************************** + MODULE print messages +****************************************************************************/ + +const char *ha_partition::index_type(uint inx) +{ + DBUG_ENTER("ha_partition::index_type"); + DBUG_RETURN(m_file[0]->index_type(inx)); +} + + +void ha_partition::print_error(int error, myf errflag) +{ + DBUG_ENTER("ha_partition::print_error"); + /* Should probably look for my own errors first */ + /* monty: needs to be called for the last used partition ! */ + if (error == HA_ERR_NO_PARTITION_FOUND) + my_error(ER_NO_PARTITION_FOR_GIVEN_VALUE, MYF(0), + m_part_info->part_expr->val_int()); + else + m_file[0]->print_error(error, errflag); + DBUG_VOID_RETURN; +} + + +bool ha_partition::get_error_message(int error, String *buf) +{ + DBUG_ENTER("ha_partition::get_error_message"); + /* Should probably look for my own errors first */ + /* monty: needs to be called for the last used partition ! */ + DBUG_RETURN(m_file[0]->get_error_message(error, buf)); +} + + +/**************************************************************************** + MODULE handler characteristics +****************************************************************************/ +/* + If frm_error() is called then we will use this to to find out what file + extensions exist for the storage engine. This is also used by the default + rename_table and delete_table method in handler.cc. +*/ + +static const char *ha_partition_ext[]= +{ + ha_par_ext, NullS +}; + +const char **ha_partition::bas_ext() const +{ return ha_partition_ext; } + + +uint ha_partition::min_of_the_max_uint(uint (handler::*operator_func)(void) const) const +{ + handler **file; + uint min_of_the_max= ((*m_file)->*operator_func)(); + + for (file= m_file+1; *file; file++) + { + uint tmp= ((*file)->*operator_func)(); + set_if_smaller(min_of_the_max, tmp); + } + return min_of_the_max; +} + + +uint ha_partition::max_supported_key_parts() const +{ + return min_of_the_max_uint(&handler::max_supported_key_parts); +} + + +uint ha_partition::max_supported_key_length() const +{ + return min_of_the_max_uint(&handler::max_supported_key_length); +} + + +uint ha_partition::max_supported_key_part_length() const +{ + return min_of_the_max_uint(&handler::max_supported_key_part_length); +} + + +uint ha_partition::max_supported_record_length() const +{ + return min_of_the_max_uint(&handler::max_supported_record_length); +} + + +uint ha_partition::max_supported_keys() const +{ + return min_of_the_max_uint(&handler::max_supported_keys); +} + + +uint ha_partition::extra_rec_buf_length() const +{ + handler **file; + uint max= (*m_file)->extra_rec_buf_length(); + for (file= m_file, file++; *file; file++) + if (max < (*file)->extra_rec_buf_length()) + max= (*file)->extra_rec_buf_length(); + return max; +} + + +uint ha_partition::min_record_length(uint options) const +{ + handler **file; + uint max= (*m_file)->min_record_length(options); + for (file= m_file, file++; *file; file++) + if (max < (*file)->min_record_length(options)) + max= (*file)->min_record_length(options); + return max; +} + + +/**************************************************************************** + MODULE compare records +****************************************************************************/ +/* + We get two references and need to check if those records are the same. + If they belong to different partitions we decide that they are not + the same record. Otherwise we use the particular handler to decide if + they are the same. Sort in partition id order if not equal. +*/ + +int ha_partition::cmp_ref(const byte *ref1, const byte *ref2) +{ + uint part_id; + my_ptrdiff_t diff1, diff2; + handler *file; + DBUG_ENTER("ha_partition::cmp_ref"); + if ((ref1[0] == ref2[0]) && (ref1[1] == ref2[1])) + { + part_id= get_part_id_from_pos(ref1); + file= m_file[part_id]; + DBUG_ASSERT(part_id < m_tot_parts); + DBUG_RETURN(file->cmp_ref((ref1 + PARTITION_BYTES_IN_POS), + (ref2 + PARTITION_BYTES_IN_POS))); + } + diff1= ref2[1] - ref1[1]; + diff2= ref2[0] - ref1[0]; + if (diff1 > 0) + { + DBUG_RETURN(-1); + } + if (diff1 < 0) + { + DBUG_RETURN(+1); + } + if (diff2 > 0) + { + DBUG_RETURN(-1); + } + DBUG_RETURN(+1); +} + + +/**************************************************************************** + MODULE auto increment +****************************************************************************/ + +void ha_partition::restore_auto_increment() +{ + DBUG_ENTER("ha_partition::restore_auto_increment"); + DBUG_VOID_RETURN; +} + + +/* + This method is called by update_auto_increment which in turn is called + by the individual handlers as part of write_row. We will always let + the first handler keep track of the auto increment value for all + partitions. +*/ + +ulonglong ha_partition::get_auto_increment() +{ + DBUG_ENTER("ha_partition::get_auto_increment"); + DBUG_RETURN(m_file[0]->get_auto_increment()); +} + + +/**************************************************************************** + MODULE initialise handler for HANDLER call +****************************************************************************/ + +void ha_partition::init_table_handle_for_HANDLER() +{ + return; +} + + +/**************************************************************************** + MODULE Partition Share +****************************************************************************/ +/* + Service routines for ... methods. +------------------------------------------------------------------------- + Variables for partition share methods. A hash used to track open tables. + A mutex for the hash table and an init variable to check if hash table + is initialised. + There is also a constant ending of the partition handler file name. +*/ + +#ifdef NOT_USED +static HASH partition_open_tables; +static pthread_mutex_t partition_mutex; +static int partition_init= 0; + + +/* + Function we use in the creation of our hash to get key. +*/ +static byte *partition_get_key(PARTITION_SHARE *share, uint *length, + my_bool not_used __attribute__ ((unused))) +{ + *length= share->table_name_length; + return (byte *) share->table_name; +} + +/* + Example of simple lock controls. The "share" it creates is structure we + will pass to each partition handler. Do you have to have one of these? + Well, you have pieces that are used for locking, and they are needed to + function. +*/ + + +static PARTITION_SHARE *get_share(const char *table_name, TABLE *table) +{ + PARTITION_SHARE *share; + uint length; + char *tmp_name; + + /* + So why does this exist? There is no way currently to init a storage + engine. + Innodb and BDB both have modifications to the server to allow them to + do this. Since you will not want to do this, this is probably the next + best method. + */ + if (!partition_init) + { + /* Hijack a mutex for init'ing the storage engine */ + pthread_mutex_lock(&LOCK_mysql_create_db); + if (!partition_init) + { + partition_init++; + VOID(pthread_mutex_init(&partition_mutex, MY_MUTEX_INIT_FAST)); + (void) hash_init(&partition_open_tables, system_charset_info, 32, 0, 0, + (hash_get_key) partition_get_key, 0, 0); + } + pthread_mutex_unlock(&LOCK_mysql_create_db); + } + pthread_mutex_lock(&partition_mutex); + length= (uint) strlen(table_name); + + if (!(share= (PARTITION_SHARE *) hash_search(&partition_open_tables, + (byte *) table_name, length))) + { + if (!(share= (PARTITION_SHARE *) + my_multi_malloc(MYF(MY_WME | MY_ZEROFILL), + &share, sizeof(*share), + &tmp_name, length + 1, NullS))) + { + pthread_mutex_unlock(&partition_mutex); + return NULL; + } + + share->use_count= 0; + share->table_name_length= length; + share->table_name= tmp_name; + strmov(share->table_name, table_name); + if (my_hash_insert(&partition_open_tables, (byte *) share)) + goto error; + thr_lock_init(&share->lock); + pthread_mutex_init(&share->mutex, MY_MUTEX_INIT_FAST); + } + share->use_count++; + pthread_mutex_unlock(&partition_mutex); + + return share; + +error: + pthread_mutex_unlock(&partition_mutex); + my_free((gptr) share, MYF(0)); + + return NULL; +} + + +/* + Free lock controls. We call this whenever we close a table. If the table + had the last reference to the share then we free memory associated with + it. +*/ + +static int free_share(PARTITION_SHARE *share) +{ + pthread_mutex_lock(&partition_mutex); + if (!--share->use_count) + { + hash_delete(&partition_open_tables, (byte *) share); + thr_lock_delete(&share->lock); + pthread_mutex_destroy(&share->mutex); + my_free((gptr) share, MYF(0)); + } + pthread_mutex_unlock(&partition_mutex); + + return 0; +} +#endif /* NOT_USED */ diff --git a/sql/ha_partition.h b/sql/ha_partition.h new file mode 100644 index 00000000000..760f99ad8aa --- /dev/null +++ b/sql/ha_partition.h @@ -0,0 +1,928 @@ +/* Copyright (C) 2005 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifdef __GNUC__ +#pragma interface /* gcc class implementation */ +#endif + +/* + PARTITION_SHARE is a structure that will be shared amoung all open handlers + The partition implements the minimum of what you will probably need. +*/ + +typedef struct st_partition_share +{ + char *table_name; + uint table_name_length, use_count; + pthread_mutex_t mutex; + THR_LOCK lock; +} PARTITION_SHARE; + + +#define PARTITION_BYTES_IN_POS 2 +class ha_partition :public handler +{ +private: + enum partition_index_scan_type + { + partition_index_read= 0, + partition_index_first= 1, + partition_index_last= 2, + partition_no_index_scan= 3 + }; + /* Data for the partition handler */ + char *m_file_buffer; // Buffer with names + char *m_name_buffer_ptr; // Pointer to first partition name + handlerton **m_engine_array; // Array of types of the handlers + handler **m_file; // Array of references to handler inst. + partition_info *m_part_info; // local reference to partition + byte *m_start_key_ref; // Reference of start key in current + // index scan info + Field **m_part_field_array; // Part field array locally to save acc + byte *m_ordered_rec_buffer; // Row and key buffer for ord. idx scan + KEY *m_curr_key_info; // Current index + byte *m_rec0; // table->record[0] + QUEUE queue; // Prio queue used by sorted read + /* + Since the partition handler is a handler on top of other handlers, it + is necessary to keep information about what the underlying handler + characteristics is. It is not possible to keep any handler instances + for this since the MySQL Server sometimes allocating the handler object + without freeing them. + */ + u_long m_table_flags; + u_long m_low_byte_first; + + uint m_tot_parts; // Total number of partitions; + uint m_no_locks; // For engines like ha_blackhole, which needs no locks + uint m_last_part; // Last file that we update,write + int m_lock_type; // Remembers type of last + // external_lock + part_id_range m_part_spec; // Which parts to scan + uint m_scan_value; // Value passed in rnd_init + // call + uint m_ref_length; // Length of position in this + // handler object + key_range m_start_key; // index read key range + enum partition_index_scan_type m_index_scan_type;// What type of index + // scan + uint m_top_entry; // Which partition is to + // deliver next result + uint m_rec_length; // Local copy of record length + + bool m_ordered; // Ordered/Unordered index scan + bool m_has_transactions; // Can we support transactions + bool m_pkey_is_clustered; // Is primary key clustered + bool m_create_handler; // Handler used to create table + bool m_is_sub_partitioned; // Is subpartitioned + bool m_ordered_scan_ongoing; + bool m_use_bit_array; + + /* + We keep track if all underlying handlers are MyISAM since MyISAM has a + great number of extra flags not needed by other handlers. + */ + bool m_myisam; // Are all underlying handlers + // MyISAM + /* + We keep track of InnoDB handlers below since it requires proper setting + of query_id in fields at index_init and index_read calls. + */ + bool m_innodb; // Are all underlying handlers + // InnoDB + /* + When calling extra(HA_EXTRA_CACHE) we do not pass this to the underlying + handlers immediately. Instead we cache it and call the underlying + immediately before starting the scan on the partition. This is to + prevent allocating a READ CACHE for each partition in parallel when + performing a full table scan on MyISAM partitioned table. + This state is cleared by extra(HA_EXTRA_NO_CACHE). + */ + bool m_extra_cache; + uint m_extra_cache_size; + + void init_handler_variables(); + /* + Variables for lock structures. + */ + THR_LOCK_DATA lock; /* MySQL lock */ + PARTITION_SHARE *share; /* Shared lock info */ + +public: + virtual void set_part_info(partition_info *part_info) + { + m_part_info= part_info; + m_is_sub_partitioned= is_sub_partitioned(part_info); + } + /* + ------------------------------------------------------------------------- + MODULE create/delete handler object + ------------------------------------------------------------------------- + Object create/delete methode. The normal called when a table object + exists. There is also a method to create the handler object with only + partition information. This is used from mysql_create_table when the + table is to be created and the engine type is deduced to be the + partition handler. + ------------------------------------------------------------------------- + */ + ha_partition(TABLE_SHARE * table); + ha_partition(partition_info * part_info); + ~ha_partition(); + /* + A partition handler has no characteristics in itself. It only inherits + those from the underlying handlers. Here we set-up those constants to + enable later calls of the methods to retrieve constants from the under- + lying handlers. Returns false if not successful. + */ + int ha_initialise(); + + /* + ------------------------------------------------------------------------- + MODULE meta data changes + ------------------------------------------------------------------------- + Meta data routines to CREATE, DROP, RENAME table and often used at + ALTER TABLE (update_create_info used from ALTER TABLE and SHOW ..). + + update_table_comment is used in SHOW TABLE commands to provide a + chance for the handler to add any interesting comments to the table + comments not provided by the users comment. + + create_handler_files is called before opening a new handler object + with openfrm to call create. It is used to create any local handler + object needed in opening the object in openfrm + ------------------------------------------------------------------------- + */ + virtual int delete_table(const char *from); + virtual int rename_table(const char *from, const char *to); + virtual int create(const char *name, TABLE * form, + HA_CREATE_INFO * create_info); + virtual int create_handler_files(const char *name); + virtual void update_create_info(HA_CREATE_INFO * create_info); + virtual char *update_table_comment(const char *comment); + virtual int drop_partitions(const char *path); +private: + /* + delete_table, rename_table and create uses very similar logic which + is packed into this routine. + */ + uint del_ren_cre_table(const char *from, + const char *to= NULL, + TABLE * table_arg= NULL, + HA_CREATE_INFO * create_info= NULL); + /* + One method to create the table_name.par file containing the names of the + underlying partitions, their engine and the number of partitions. + And one method to read it in. + */ + bool create_handler_file(const char *name); + bool get_from_handler_file(const char *name); + bool new_handlers_from_part_info(); + bool create_handlers(); + void clear_handler_file(); + void set_up_table_before_create(TABLE * table_arg, HA_CREATE_INFO * info, + uint part_id); + partition_element *find_partition_element(uint part_id); +public: + + /* + ------------------------------------------------------------------------- + MODULE open/close object + ------------------------------------------------------------------------- + Open and close handler object to ensure all underlying files and + objects allocated and deallocated for query handling is handled + properly. + ------------------------------------------------------------------------- + + A handler object is opened as part of its initialisation and before + being used for normal queries (not before meta-data changes always. + If the object was opened it will also be closed before being deleted. + */ + virtual int open(const char *name, int mode, uint test_if_locked); + virtual int close(void); + + /* + ------------------------------------------------------------------------- + MODULE start/end statement + ------------------------------------------------------------------------- + This module contains methods that are used to understand start/end of + statements, transaction boundaries, and aid for proper concurrency + control. + The partition handler need not implement abort and commit since this + will be handled by any underlying handlers implementing transactions. + There is only one call to each handler type involved per transaction + and these go directly to the handlers supporting transactions + currently InnoDB, BDB and NDB). + ------------------------------------------------------------------------- + */ + virtual THR_LOCK_DATA **store_lock(THD * thd, THR_LOCK_DATA ** to, + enum thr_lock_type lock_type); + virtual int external_lock(THD * thd, int lock_type); + /* + When table is locked a statement is started by calling start_stmt + instead of external_lock + */ + virtual int start_stmt(THD * thd, thr_lock_type lock_type); + /* + Lock count is number of locked underlying handlers (I assume) + */ + virtual uint lock_count(void) const; + /* + Call to unlock rows not to be updated in transaction + */ + virtual void unlock_row(); + + /* + ------------------------------------------------------------------------- + MODULE change record + ------------------------------------------------------------------------- + This part of the handler interface is used to change the records + after INSERT, DELETE, UPDATE, REPLACE method calls but also other + special meta-data operations as ALTER TABLE, LOAD DATA, TRUNCATE. + ------------------------------------------------------------------------- + + These methods are used for insert (write_row), update (update_row) + and delete (delete_row). All methods to change data always work on + one row at a time. update_row and delete_row also contains the old + row. + delete_all_rows will delete all rows in the table in one call as a + special optimisation for DELETE from table; + + Bulk inserts are supported if all underlying handlers support it. + start_bulk_insert and end_bulk_insert is called before and after a + number of calls to write_row. + Not yet though. + */ + virtual int write_row(byte * buf); + virtual int update_row(const byte * old_data, byte * new_data); + virtual int delete_row(const byte * buf); + virtual int delete_all_rows(void); + virtual void start_bulk_insert(ha_rows rows); + virtual int end_bulk_insert(); + + /* + ------------------------------------------------------------------------- + MODULE full table scan + ------------------------------------------------------------------------- + This module is used for the most basic access method for any table + handler. This is to fetch all data through a full table scan. No + indexes are needed to implement this part. + It contains one method to start the scan (rnd_init) that can also be + called multiple times (typical in a nested loop join). Then proceeding + to the next record (rnd_next) and closing the scan (rnd_end). + To remember a record for later access there is a method (position) + and there is a method used to retrieve the record based on the stored + position. + The position can be a file position, a primary key, a ROWID dependent + on the handler below. + ------------------------------------------------------------------------- + */ + /* + unlike index_init(), rnd_init() can be called two times + without rnd_end() in between (it only makes sense if scan=1). + then the second call should prepare for the new table scan + (e.g if rnd_init allocates the cursor, second call should + position it to the start of the table, no need to deallocate + and allocate it again + */ + virtual int rnd_init(bool scan); + virtual int rnd_end(); + virtual int rnd_next(byte * buf); + virtual int rnd_pos(byte * buf, byte * pos); + virtual void position(const byte * record); + + /* + ------------------------------------------------------------------------- + MODULE index scan + ------------------------------------------------------------------------- + This part of the handler interface is used to perform access through + indexes. The interface is defined as a scan interface but the handler + can also use key lookup if the index is a unique index or a primary + key index. + Index scans are mostly useful for SELECT queries but are an important + part also of UPDATE, DELETE, REPLACE and CREATE TABLE table AS SELECT + and so forth. + Naturally an index is needed for an index scan and indexes can either + be ordered, hash based. Some ordered indexes can return data in order + but not necessarily all of them. + There are many flags that define the behavior of indexes in the + various handlers. These methods are found in the optimizer module. + ------------------------------------------------------------------------- + + index_read is called to start a scan of an index. The find_flag defines + the semantics of the scan. These flags are defined in + include/my_base.h + index_read_idx is the same but also initializes index before calling doing + the same thing as index_read. Thus it is similar to index_init followed + by index_read. This is also how we implement it. + + index_read/index_read_idx does also return the first row. Thus for + key lookups, the index_read will be the only call to the handler in + the index scan. + + index_init initializes an index before using it and index_end does + any end processing needed. + */ + virtual int index_read(byte * buf, const byte * key, + uint key_len, enum ha_rkey_function find_flag); + virtual int index_read_idx(byte * buf, uint idx, const byte * key, + uint key_len, enum ha_rkey_function find_flag); + virtual int index_init(uint idx, bool sorted); + virtual int index_end(); + + /* + These methods are used to jump to next or previous entry in the index + scan. There are also methods to jump to first and last entry. + */ + virtual int index_next(byte * buf); + virtual int index_prev(byte * buf); + virtual int index_first(byte * buf); + virtual int index_last(byte * buf); + virtual int index_next_same(byte * buf, const byte * key, uint keylen); + virtual int index_read_last(byte * buf, const byte * key, uint keylen); + + /* + read_first_row is virtual method but is only implemented by + handler.cc, no storage engine has implemented it so neither + will the partition handler. + + virtual int read_first_row(byte *buf, uint primary_key); + */ + + /* + We don't implement multi read range yet, will do later. + virtual int read_multi_range_first(KEY_MULTI_RANGE **found_range_p, + KEY_MULTI_RANGE *ranges, uint range_count, + bool sorted, HANDLER_BUFFER *buffer); + virtual int read_multi_range_next(KEY_MULTI_RANGE **found_range_p); + */ + + + virtual int read_range_first(const key_range * start_key, + const key_range * end_key, + bool eq_range, bool sorted); + virtual int read_range_next(); + +private: + int common_index_read(byte * buf, const byte * key, + uint key_len, enum ha_rkey_function find_flag); + int common_first_last(byte * buf); + int partition_scan_set_up(byte * buf, bool idx_read_flag); + int handle_unordered_next(byte * buf, bool next_same); + int handle_unordered_scan_next_partition(byte * buf); + byte *queue_buf(uint part_id) + { + return (m_ordered_rec_buffer + + (part_id * (m_rec_length + PARTITION_BYTES_IN_POS))); + } + byte *rec_buf(uint part_id) + { + return (queue_buf(part_id) + + PARTITION_BYTES_IN_POS); + } + int handle_ordered_index_scan(byte * buf); + int handle_ordered_next(byte * buf, bool next_same); + int handle_ordered_prev(byte * buf); + void return_top_record(byte * buf); + void include_partition_fields_in_used_fields(); +public: + /* + ------------------------------------------------------------------------- + MODULE information calls + ------------------------------------------------------------------------- + This calls are used to inform the handler of specifics of the ongoing + scans and other actions. Most of these are used for optimisation + purposes. + ------------------------------------------------------------------------- + */ + virtual void info(uint); + virtual int extra(enum ha_extra_function operation); + virtual int extra_opt(enum ha_extra_function operation, ulong cachesize); + virtual int reset(void); + +private: + static const uint NO_CURRENT_PART_ID= 0xFFFFFFFF; + int loop_extra(enum ha_extra_function operation); + void late_extra_cache(uint partition_id); + void late_extra_no_cache(uint partition_id); + void prepare_extra_cache(uint cachesize); +public: + + /* + ------------------------------------------------------------------------- + MODULE optimiser support + ------------------------------------------------------------------------- + ------------------------------------------------------------------------- + */ + + /* + NOTE !!!!!! + ------------------------------------------------------------------------- + ------------------------------------------------------------------------- + One important part of the public handler interface that is not depicted in + the methods is the attribute records + + which is defined in the base class. This is looked upon directly and is + set by calling info(HA_STATUS_INFO) ? + ------------------------------------------------------------------------- + */ + + /* + keys_to_use_for_scanning can probably be implemented as the + intersection of all underlying handlers if mixed handlers are used. + This method is used to derive whether an index can be used for + index-only scanning when performing an ORDER BY query. + Only called from one place in sql_select.cc + */ + virtual const key_map *keys_to_use_for_scanning(); + + /* + Called in test_quick_select to determine if indexes should be used. + */ + virtual double scan_time(); + + /* + The next method will never be called if you do not implement indexes. + */ + virtual double read_time(uint index, uint ranges, ha_rows rows); + /* + For the given range how many records are estimated to be in this range. + Used by optimiser to calculate cost of using a particular index. + */ + virtual ha_rows records_in_range(uint inx, key_range * min_key, + key_range * max_key); + + /* + Upper bound of number records returned in scan is sum of all + underlying handlers. + */ + virtual ha_rows estimate_rows_upper_bound(); + + /* + table_cache_type is implemented by the underlying handler but all + underlying handlers must have the same implementation for it to work. + */ + virtual uint8 table_cache_type(); + + /* + ------------------------------------------------------------------------- + MODULE print messages + ------------------------------------------------------------------------- + This module contains various methods that returns text messages for + table types, index type and error messages. + ------------------------------------------------------------------------- + */ + /* + The name of the index type that will be used for display + Here we must ensure that all handlers use the same index type + for each index created. + */ + virtual const char *index_type(uint inx); + + /* The name of the table type that will be used for display purposes */ + virtual const char *table_type() const + { return "PARTITION"; } + + /* + Handler specific error messages + */ + virtual void print_error(int error, myf errflag); + virtual bool get_error_message(int error, String * buf); + /* + ------------------------------------------------------------------------- + MODULE handler characteristics + ------------------------------------------------------------------------- + This module contains a number of methods defining limitations and + characteristics of the handler. The partition handler will calculate + this characteristics based on underlying handler characteristics. + ------------------------------------------------------------------------- + + This is a list of flags that says what the storage engine + implements. The current table flags are documented in handler.h + The partition handler will support whatever the underlying handlers + support except when specifically mentioned below about exceptions + to this rule. + + HA_READ_RND_SAME: + Not currently used. (Means that the handler supports the rnd_same() call) + (MyISAM, HEAP) + + HA_TABLE_SCAN_ON_INDEX: + Used to avoid scanning full tables on an index. If this flag is set then + the handler always has a primary key (hidden if not defined) and this + index is used for scanning rather than a full table scan in all + situations. + (InnoDB, BDB, Federated) + + HA_REC_NOT_IN_SEQ: + This flag is set for handlers that cannot guarantee that the rows are + returned accroding to incremental positions (0, 1, 2, 3...). + This also means that rnd_next() should return HA_ERR_RECORD_DELETED + if it finds a deleted row. + (MyISAM (not fixed length row), BDB, HEAP, NDB, InooDB) + + HA_CAN_GEOMETRY: + Can the storage engine handle spatial data. + Used to check that no spatial attributes are declared unless + the storage engine is capable of handling it. + (MyISAM) + + HA_FAST_KEY_READ: + Setting this flag indicates that the handler is equally fast in + finding a row by key as by position. + This flag is used in a very special situation in conjunction with + filesort's. For further explanation see intro to init_read_record. + (BDB, HEAP, InnoDB) + + HA_NULL_IN_KEY: + Is NULL values allowed in indexes. + If this is not allowed then it is not possible to use an index on a + NULLable field. + (BDB, HEAP, MyISAM, NDB, InnoDB) + + HA_DUPP_POS: + Tells that we can the position for the conflicting duplicate key + record is stored in table->file->dupp_ref. (insert uses rnd_pos() on + this to find the duplicated row) + (MyISAM) + + HA_CAN_INDEX_BLOBS: + Is the storage engine capable of defining an index of a prefix on + a BLOB attribute. + (BDB, Federated, MyISAM, InnoDB) + + HA_AUTO_PART_KEY: + Auto increment fields can be part of a multi-part key. For second part + auto-increment keys, the auto_incrementing is done in handler.cc + (BDB, Federated, MyISAM, NDB) + + HA_REQUIRE_PRIMARY_KEY: + Can't define a table without primary key (and cannot handle a table + with hidden primary key) + (No handler has this limitation currently) + + HA_NOT_EXACT_COUNT: + Does the counter of records after the info call specify an exact + value or not. If it doesn't this flag is set. + Only MyISAM and HEAP uses exact count. + (MyISAM, HEAP, BDB, InnoDB, NDB, Federated) + + HA_CAN_INSERT_DELAYED: + Can the storage engine support delayed inserts. + To start with the partition handler will not support delayed inserts. + Further investigation needed. + (HEAP, MyISAM) + + HA_PRIMARY_KEY_IN_READ_INDEX: + This parameter is set when the handler will also return the primary key + when doing read-only-key on another index. + + HA_NOT_DELETE_WITH_CACHE: + Seems to be an old MyISAM feature that is no longer used. No handler + has it defined but it is checked in init_read_record. + Further investigation needed. + (No handler defines it) + + HA_NO_PREFIX_CHAR_KEYS: + Indexes on prefixes of character fields is not allowed. + (NDB) + + HA_CAN_FULLTEXT: + Does the storage engine support fulltext indexes + The partition handler will start by not supporting fulltext indexes. + (MyISAM) + + HA_CAN_SQL_HANDLER: + Can the HANDLER interface in the MySQL API be used towards this + storage engine. + (MyISAM, InnoDB) + + HA_NO_AUTO_INCREMENT: + Set if the storage engine does not support auto increment fields. + (Currently not set by any handler) + + HA_HAS_CHECKSUM: + Special MyISAM feature. Has special SQL support in CREATE TABLE. + No special handling needed by partition handler. + (MyISAM) + + HA_FILE_BASED: + Should file names always be in lower case (used by engines + that map table names to file names. + Since partition handler has a local file this flag is set. + (BDB, Federated, MyISAM) + + HA_CAN_BIT_FIELD: + Is the storage engine capable of handling bit fields? + (MyISAM, NDB) + + HA_NEED_READ_RANGE_BUFFER: + Is Read Multi-Range supported => need multi read range buffer + This parameter specifies whether a buffer for read multi range + is needed by the handler. Whether the handler supports this + feature or not is dependent of whether the handler implements + read_multi_range* calls or not. The only handler currently + supporting this feature is NDB so the partition handler need + not handle this call. There are methods in handler.cc that will + transfer those calls into index_read and other calls in the + index scan module. + (NDB) + */ + virtual ulong alter_table_flags(void) const + { + //return HA_ONLINE_ADD_EMPTY_PARTITION + HA_ONLINE_DROP_PARTITION; + return HA_ONLINE_DROP_PARTITION; + } + virtual ulong table_flags() const + { return m_table_flags; } + /* + HA_CAN_PARTITION: + Used by storage engines that can handle partitioning without this + partition handler + (Partition, NDB) + + HA_CAN_UPDATE_PARTITION_KEY: + Set if the handler can update fields that are part of the partition + function. + + HA_CAN_PARTITION_UNIQUE: + Set if the handler can handle unique indexes where the fields of the + unique key are not part of the fields of the partition function. Thus + a unique key can be set on all fields. + */ + virtual ulong partition_flags() const + { return HA_CAN_PARTITION; } + + /* + This is a bitmap of flags that says how the storage engine + implements indexes. The current index flags are documented in + handler.h. If you do not implement indexes, just return zero + here. + + part is the key part to check. First key part is 0 + If all_parts it's set, MySQL want to know the flags for the combined + index up to and including 'part'. + + HA_READ_NEXT: + Does the index support read next, this is assumed in the server + code and never checked so all indexes must support this. + Note that the handler can be used even if it doesn't have any index. + (BDB, HEAP, MyISAM, Federated, NDB, InnoDB) + + HA_READ_PREV: + Can the index be used to scan backwards. + (BDB, HEAP, MyISAM, NDB, InnoDB) + + HA_READ_ORDER: + Can the index deliver its record in index order. Typically true for + all ordered indexes and not true for hash indexes. + In first step this is not true for partition handler until a merge + sort has been implemented in partition handler. + Used to set keymap part_of_sortkey + This keymap is only used to find indexes usable for resolving an ORDER BY + in the query. Thus in most cases index_read will work just fine without + order in result production. When this flag is set it is however safe to + order all output started by index_read since most engines do this. With + read_multi_range calls there is a specific flag setting order or not + order so in those cases ordering of index output can be avoided. + (BDB, InnoDB, HEAP, MyISAM, NDB) + + HA_READ_RANGE: + Specify whether index can handle ranges, typically true for all + ordered indexes and not true for hash indexes. + Used by optimiser to check if ranges (as key >= 5) can be optimised + by index. + (BDB, InnoDB, NDB, MyISAM, HEAP) + + HA_ONLY_WHOLE_INDEX: + Can't use part key searches. This is typically true for hash indexes + and typically not true for ordered indexes. + (Federated, NDB, HEAP) + + HA_KEYREAD_ONLY: + Does the storage engine support index-only scans on this index. + Enables use of HA_EXTRA_KEYREAD and HA_EXTRA_NO_KEYREAD + Used to set key_map keys_for_keyread and to check in optimiser for + index-only scans. When doing a read under HA_EXTRA_KEYREAD the handler + only have to fill in the columns the key covers. If + HA_PRIMARY_KEY_IN_READ_INDEX is set then also the PRIMARY KEY columns + must be updated in the row. + (BDB, InnoDB, MyISAM) + */ + virtual ulong index_flags(uint inx, uint part, bool all_parts) const + { + return m_file[0]->index_flags(inx, part, all_parts); + } + + /* + extensions of table handler files + */ + virtual const char **bas_ext() const; + /* + unireg.cc will call the following to make sure that the storage engine + can handle the data it is about to send. + + The maximum supported values is the minimum of all handlers in the table + */ + uint min_of_the_max_uint(uint (handler::*operator_func)(void) const) const; + virtual uint max_supported_record_length() const; + virtual uint max_supported_keys() const; + virtual uint max_supported_key_parts() const; + virtual uint max_supported_key_length() const; + virtual uint max_supported_key_part_length() const; + + /* + All handlers in a partitioned table must have the same low_byte_first + */ + virtual bool low_byte_first() const + { return m_low_byte_first; } + + /* + The extra record buffer length is the maximum needed by all handlers. + The minimum record length is the maximum of all involved handlers. + */ + virtual uint extra_rec_buf_length() const; + virtual uint min_record_length(uint options) const; + + /* + Transactions on the table is supported if all handlers below support + transactions. + */ + virtual bool has_transactions() + { return m_has_transactions; } + + /* + Primary key is clustered can only be true if all underlying handlers have + this feature. + */ + virtual bool primary_key_is_clustered() + { return m_pkey_is_clustered; } + + /* + ------------------------------------------------------------------------- + MODULE compare records + ------------------------------------------------------------------------- + cmp_ref checks if two references are the same. For most handlers this is + a simple memcmp of the reference. However some handlers use primary key + as reference and this can be the same even if memcmp says they are + different. This is due to character sets and end spaces and so forth. + For the partition handler the reference is first two bytes providing the + partition identity of the referred record and then the reference of the + underlying handler. + Thus cmp_ref for the partition handler always returns FALSE for records + not in the same partition and uses cmp_ref on the underlying handler + to check whether the rest of the reference part is also the same. + ------------------------------------------------------------------------- + */ + virtual int cmp_ref(const byte * ref1, const byte * ref2); + /* + ------------------------------------------------------------------------- + MODULE auto increment + ------------------------------------------------------------------------- + This module is used to handle the support of auto increments. + + This variable in the handler is used as part of the handler interface + It is maintained by the parent handler object and should not be + touched by child handler objects (see handler.cc for its use). + + auto_increment_column_changed + ------------------------------------------------------------------------- + */ + virtual void restore_auto_increment(); + virtual ulonglong get_auto_increment(); + + /* + ------------------------------------------------------------------------- + MODULE initialise handler for HANDLER call + ------------------------------------------------------------------------- + This method is a special InnoDB method called before a HANDLER query. + ------------------------------------------------------------------------- + */ + virtual void init_table_handle_for_HANDLER(); + + /* + The remainder of this file defines the handler methods not implemented + by the partition handler + */ + + /* + ------------------------------------------------------------------------- + MODULE foreign key support + ------------------------------------------------------------------------- + The following methods are used to implement foreign keys as supported by + InnoDB. Implement this ?? + get_foreign_key_create_info is used by SHOW CREATE TABLE to get a textual + description of how the CREATE TABLE part to define FOREIGN KEY's is done. + free_foreign_key_create_info is used to free the memory area that provided + this description. + ------------------------------------------------------------------------- + + virtual char* get_foreign_key_create_info() + virtual void free_foreign_key_create_info(char* str) + + virtual int get_foreign_key_list(THD *thd, + List<FOREIGN_KEY_INFO> *f_key_list) + virtual uint referenced_by_foreign_key() + */ + + /* + ------------------------------------------------------------------------- + MODULE fulltext index + ------------------------------------------------------------------------- + Fulltext stuff not yet. + ------------------------------------------------------------------------- + virtual int ft_init() { return HA_ERR_WRONG_COMMAND; } + virtual FT_INFO *ft_init_ext(uint flags,uint inx,const byte *key, + uint keylen) + { return NULL; } + virtual int ft_read(byte *buf) { return HA_ERR_WRONG_COMMAND; } + */ + + /* + ------------------------------------------------------------------------- + MODULE restart full table scan at position (MyISAM) + ------------------------------------------------------------------------- + The following method is only used by MyISAM when used as + temporary tables in a join. + virtual int restart_rnd_next(byte *buf, byte *pos); + */ + + /* + ------------------------------------------------------------------------- + MODULE on-line ALTER TABLE + ------------------------------------------------------------------------- + These methods are in the handler interface but never used (yet) + They are to be used by on-line alter table add/drop index: + ------------------------------------------------------------------------- + virtual ulong index_ddl_flags(KEY *wanted_index) const + virtual int add_index(TABLE *table_arg,KEY *key_info,uint num_of_keys); + virtual int drop_index(TABLE *table_arg,uint *key_num,uint num_of_keys); + */ + + /* + ------------------------------------------------------------------------- + MODULE tablespace support + ------------------------------------------------------------------------- + Admin of table spaces is not applicable to the partition handler (InnoDB) + This means that the following method is not implemented: + ------------------------------------------------------------------------- + virtual int discard_or_import_tablespace(my_bool discard) + */ + + /* + ------------------------------------------------------------------------- + MODULE admin MyISAM + ------------------------------------------------------------------------- + Admin commands not supported currently (almost purely MyISAM routines) + This means that the following methods are not implemented: + ------------------------------------------------------------------------- + + virtual int check(THD* thd, HA_CHECK_OPT *check_opt); + virtual int backup(TD* thd, HA_CHECK_OPT *check_opt); + virtual int restore(THD* thd, HA_CHECK_OPT *check_opt); + virtual int repair(THD* thd, HA_CHECK_OPT *check_opt); + virtual int optimize(THD* thd, HA_CHECK_OPT *check_opt); + virtual int analyze(THD* thd, HA_CHECK_OPT *check_opt); + virtual int assign_to_keycache(THD* thd, HA_CHECK_OPT *check_opt); + virtual int preload_keys(THD *thd, HA_CHECK_OPT *check_opt); + virtual bool check_and_repair(THD *thd); + virtual int dump(THD* thd, int fd = -1); + virtual int net_read_dump(NET* net); + virtual uint checksum() const; + virtual bool is_crashed() const; + virtual bool auto_repair() const; + + ------------------------------------------------------------------------- + MODULE enable/disable indexes + ------------------------------------------------------------------------- + Enable/Disable Indexes are not supported currently (Heap, MyISAM) + This means that the following methods are not implemented: + ------------------------------------------------------------------------- + virtual int disable_indexes(uint mode); + virtual int enable_indexes(uint mode); + virtual int indexes_are_disabled(void); + */ + + /* + ------------------------------------------------------------------------- + MODULE append_create_info + ------------------------------------------------------------------------- + append_create_info is only used by MyISAM MERGE tables and the partition + handler will not support this handler as underlying handler. + Implement this?? + ------------------------------------------------------------------------- + virtual void append_create_info(String *packet) + */ +}; diff --git a/sql/handler.cc b/sql/handler.cc index 4e128eb5938..6f3cdc5a5cd 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -22,109 +22,51 @@ #endif #include "mysql_priv.h" +#include "rpl_filter.h" #include "ha_heap.h" #include "ha_myisam.h" #include "ha_myisammrg.h" -/* - We have dummy hanldertons in case the handler has not been compiled - in. This will be removed in 5.1. -*/ -#ifdef HAVE_BERKELEY_DB -#include "ha_berkeley.h" -extern handlerton berkeley_hton; -#else -handlerton berkeley_hton = { "BerkeleyDB", SHOW_OPTION_NO, - "Supports transactions and page-level locking", DB_TYPE_BERKELEY_DB, NULL, - 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, HTON_NO_FLAGS }; -#endif -#ifdef HAVE_BLACKHOLE_DB -#include "ha_blackhole.h" -extern handlerton blackhole_hton; -#else -handlerton blackhole_hton = { "BLACKHOLE", SHOW_OPTION_NO, - "/dev/null storage engine (anything you write to it disappears)", - DB_TYPE_BLACKHOLE_DB, NULL, 0, 0, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - HTON_NO_FLAGS }; -#endif -#ifdef HAVE_EXAMPLE_DB -#include "examples/ha_example.h" -extern handlerton example_hton; -#else -handlerton example_hton = { "EXAMPLE", SHOW_OPTION_NO, - "Example storage engine", - DB_TYPE_EXAMPLE_DB, NULL, 0, 0, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - HTON_NO_FLAGS }; -#endif -#if defined(HAVE_ARCHIVE_DB) -#include "ha_archive.h" -extern handlerton archive_hton; -#else -handlerton archive_hton = { "ARCHIVE", SHOW_OPTION_NO, - "Archive storage engine", DB_TYPE_ARCHIVE_DB, NULL, 0, 0, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - HTON_NO_FLAGS }; -#endif -#ifdef HAVE_CSV_DB -#include "examples/ha_tina.h" -extern handlerton tina_hton; -#else -handlerton tina_hton = { "CSV", SHOW_OPTION_NO, "CSV storage engine", - DB_TYPE_CSV_DB, NULL, 0, 0, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - HTON_NO_FLAGS }; -#endif -#ifdef HAVE_INNOBASE_DB -#include "ha_innodb.h" -extern handlerton innobase_hton; -#else -handlerton innobase_hton = { "InnoDB", SHOW_OPTION_NO, - "Supports transactions, row-level locking, and foreign keys", - DB_TYPE_INNODB, NULL, 0, 0, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - HTON_NO_FLAGS }; -#endif -#ifdef HAVE_NDBCLUSTER_DB +#include <myisampack.h> +#include <errno.h> + +#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE +#define NDB_MAX_ATTRIBUTES_IN_TABLE 128 #include "ha_ndbcluster.h" -extern handlerton ndbcluster_hton; -#else -handlerton ndbcluster_hton = { "ndbcluster", SHOW_OPTION_NO, - "Clustered, fault-tolerant, memory-based tables", - DB_TYPE_NDBCLUSTER, NULL, 0, 0, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - HTON_NO_FLAGS }; #endif -#ifdef HAVE_FEDERATED_DB -#include "ha_federated.h" -extern handlerton federated_hton; -#else -handlerton federated_hton = { "FEDERATED", SHOW_OPTION_NO, - "Federated MySQL storage engine", DB_TYPE_FEDERATED_DB, NULL, 0, 0, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - HTON_NO_FLAGS }; +#ifdef WITH_PARTITION_STORAGE_ENGINE +#include "ha_partition.h" #endif -#include <myisampack.h> -#include <errno.h> -extern handlerton myisam_hton; -extern handlerton myisammrg_hton; -extern handlerton heap_hton; -extern handlerton binlog_hton; +#ifdef WITH_INNOBASE_STORAGE_ENGINE +#include "ha_innodb.h" +#endif -/* - Obsolete -*/ -handlerton isam_hton = { "ISAM", SHOW_OPTION_NO, "Obsolete storage engine", - DB_TYPE_ISAM, NULL, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, HTON_NO_FLAGS }; +extern handlerton *sys_table_types[]; +#define BITMAP_STACKBUF_SIZE (128/8) /* static functions defined in this file */ +static handler *create_default(TABLE_SHARE *table); + +const handlerton default_hton = +{ + MYSQL_HANDLERTON_INTERFACE_VERSION, + "DEFAULT", + SHOW_OPTION_YES, + NULL, + DB_TYPE_DEFAULT, + NULL, + 0, 0, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, + create_default, + NULL, NULL, NULL, NULL, NULL, + HTON_NO_FLAGS +}; + static SHOW_COMP_OPTION have_yes= SHOW_OPTION_YES; /* number of entries in handlertons[] */ @@ -134,35 +76,14 @@ ulong total_ha_2pc; /* size of savepoint storage area (see ha_init) */ ulong savepoint_alloc_size; -/* - This array is used for processing compiled in engines. -*/ -handlerton *sys_table_types[]= -{ - &myisam_hton, - &heap_hton, - &innobase_hton, - &berkeley_hton, - &blackhole_hton, - &example_hton, - &archive_hton, - &tina_hton, - &ndbcluster_hton, - &federated_hton, - &myisammrg_hton, - &binlog_hton, - &isam_hton, - NULL -}; - struct show_table_alias_st sys_table_aliases[]= { - {"INNOBASE", "InnoDB"}, - {"NDB", "NDBCLUSTER"}, - {"BDB", "BERKELEYDB"}, - {"HEAP", "MEMORY"}, - {"MERGE", "MRG_MYISAM"}, - {NullS, NullS} + {"INNOBASE", DB_TYPE_INNODB}, + {"NDB", DB_TYPE_NDBCLUSTER}, + {"BDB", DB_TYPE_BERKELEY_DB}, + {"HEAP", DB_TYPE_HEAP}, + {"MERGE", DB_TYPE_MRG_MYISAM}, + {NullS, DB_TYPE_UNKNOWN} }; const char *ha_row_type[] = { @@ -178,24 +99,22 @@ TYPELIB tx_isolation_typelib= {array_elements(tx_isolation_names)-1,"", static TYPELIB known_extensions= {0,"known_exts", NULL, NULL}; uint known_extensions_id= 0; -enum db_type ha_resolve_by_name(const char *name, uint namelen) +handlerton *ha_resolve_by_name(THD *thd, LEX_STRING *name) { - THD *thd= current_thd; show_table_alias_st *table_alias; - handlerton **types; + st_plugin_int *plugin; if (thd && !my_strnncoll(&my_charset_latin1, - (const uchar *)name, namelen, + (const uchar *)name->str, name->length, (const uchar *)"DEFAULT", 7)) - return (enum db_type) thd->variables.table_type; + return ha_resolve_by_legacy_type(thd, DB_TYPE_DEFAULT); -retest: - for (types= sys_table_types; *types; types++) + if ((plugin= plugin_lock(name, MYSQL_STORAGE_ENGINE_PLUGIN))) { - if (!my_strnncoll(&my_charset_latin1, - (const uchar *)name, namelen, - (const uchar *)(*types)->name, strlen((*types)->name))) - return (enum db_type) (*types)->db_type; + handlerton *hton= (handlerton *) plugin->plugin->info; + if (!(hton->flags & HTON_NOT_USER_SELECTABLE)) + return hton; + plugin_unlock(plugin); } /* @@ -204,64 +123,98 @@ retest: for (table_alias= sys_table_aliases; table_alias->type; table_alias++) { if (!my_strnncoll(&my_charset_latin1, - (const uchar *)name, namelen, + (const uchar *)name->str, name->length, (const uchar *)table_alias->alias, strlen(table_alias->alias))) - { - name= table_alias->type; - namelen= strlen(name); - goto retest; - } + return ha_resolve_by_legacy_type(thd, table_alias->type); } - return DB_TYPE_UNKNOWN; + return NULL; } -const char *ha_get_storage_engine(enum db_type db_type) +struct plugin_find_dbtype_st { - handlerton **types; - for (types= sys_table_types; *types; types++) + enum legacy_db_type db_type; + handlerton *hton; +}; + + +static my_bool plugin_find_dbtype(THD *unused, st_plugin_int *plugin, + void *arg) +{ + handlerton *types= (handlerton *) plugin->plugin->info; + if (types->db_type == ((struct plugin_find_dbtype_st *)arg)->db_type) { - if (db_type == (*types)->db_type) - return (*types)->name; + ((struct plugin_find_dbtype_st *)arg)->hton= types; + return TRUE; } - return "*NONE*"; + return FALSE; } -bool ha_check_storage_engine_flag(enum db_type db_type, uint32 flag) +const char *ha_get_storage_engine(enum legacy_db_type db_type) { - handlerton **types; - for (types= sys_table_types; *types; types++) + struct plugin_find_dbtype_st info; + + switch (db_type) { - if (db_type == (*types)->db_type) - return test((*types)->flags & flag); + case DB_TYPE_DEFAULT: + return "DEFAULT"; + case DB_TYPE_UNKNOWN: + return "UNKNOWN"; + default: + info.db_type= db_type; + + if (!plugin_foreach(NULL, plugin_find_dbtype, + MYSQL_STORAGE_ENGINE_PLUGIN, &info)) + return "*NONE*"; + + return info.hton->name; } - return FALSE; // No matching engine } -my_bool ha_storage_engine_is_enabled(enum db_type database_type) +static handler *create_default(TABLE_SHARE *table) { - handlerton **types; - for (types= sys_table_types; *types; types++) + handlerton *hton=ha_resolve_by_legacy_type(current_thd, DB_TYPE_DEFAULT); + return (hton && hton != &default_hton && hton->create) ? + hton->create(table) : NULL; +} + + +handlerton *ha_resolve_by_legacy_type(THD *thd, enum legacy_db_type db_type) +{ + struct plugin_find_dbtype_st info; + + switch (db_type) { - if ((database_type == (*types)->db_type) && - ((*types)->state == SHOW_OPTION_YES)) - return TRUE; + case DB_TYPE_DEFAULT: + return (thd->variables.table_type != NULL) ? + thd->variables.table_type : + (global_system_variables.table_type != NULL ? + global_system_variables.table_type : &myisam_hton); + case DB_TYPE_UNKNOWN: + return NULL; + default: + info.db_type= db_type; + if (!plugin_foreach(NULL, plugin_find_dbtype, + MYSQL_STORAGE_ENGINE_PLUGIN, &info)) + return NULL; + + return info.hton; } - return FALSE; } /* Use other database handler if databasehandler is not compiled in */ -enum db_type ha_checktype(THD *thd, enum db_type database_type, +handlerton *ha_checktype(THD *thd, enum legacy_db_type database_type, bool no_substitute, bool report_error) { - if (ha_storage_engine_is_enabled(database_type)) - return database_type; + handlerton *hton= ha_resolve_by_legacy_type(thd, database_type); + if (ha_storage_engine_is_enabled(hton)) + return hton; if (no_substitute) { @@ -270,102 +223,82 @@ enum db_type ha_checktype(THD *thd, enum db_type database_type, const char *engine_name= ha_get_storage_engine(database_type); my_error(ER_FEATURE_DISABLED,MYF(0),engine_name,engine_name); } - return DB_TYPE_UNKNOWN; + return NULL; } switch (database_type) { #ifndef NO_HASH case DB_TYPE_HASH: - return (database_type); + return ha_resolve_by_legacy_type(thd, DB_TYPE_HASH); #endif case DB_TYPE_MRG_ISAM: - return (DB_TYPE_MRG_MYISAM); + return ha_resolve_by_legacy_type(thd, DB_TYPE_MRG_MYISAM); default: break; } - - return ((enum db_type) thd->variables.table_type != DB_TYPE_UNKNOWN ? - (enum db_type) thd->variables.table_type : - ((enum db_type) global_system_variables.table_type != - DB_TYPE_UNKNOWN ? - (enum db_type) global_system_variables.table_type : DB_TYPE_MYISAM) - ); + + return ha_resolve_by_legacy_type(thd, DB_TYPE_DEFAULT); } /* ha_checktype */ -handler *get_new_handler(TABLE *table, MEM_ROOT *alloc, enum db_type db_type) +handler *get_new_handler(TABLE_SHARE *share, MEM_ROOT *alloc, + handlerton *db_type) { - switch (db_type) { -#ifndef NO_HASH - case DB_TYPE_HASH: - return new (alloc) ha_hash(table); -#endif - case DB_TYPE_MRG_MYISAM: - case DB_TYPE_MRG_ISAM: - return new (alloc) ha_myisammrg(table); -#ifdef HAVE_BERKELEY_DB - case DB_TYPE_BERKELEY_DB: - if (have_berkeley_db == SHOW_OPTION_YES) - return new (alloc) ha_berkeley(table); - return NULL; -#endif -#ifdef HAVE_INNOBASE_DB - case DB_TYPE_INNODB: - if (have_innodb == SHOW_OPTION_YES) - return new (alloc) ha_innobase(table); - return NULL; -#endif -#ifdef HAVE_EXAMPLE_DB - case DB_TYPE_EXAMPLE_DB: - if (have_example_db == SHOW_OPTION_YES) - return new (alloc) ha_example(table); - return NULL; -#endif -#if defined(HAVE_ARCHIVE_DB) - case DB_TYPE_ARCHIVE_DB: - if (have_archive_db == SHOW_OPTION_YES) - return new (alloc) ha_archive(table); - return NULL; -#endif -#ifdef HAVE_BLACKHOLE_DB - case DB_TYPE_BLACKHOLE_DB: - if (have_blackhole_db == SHOW_OPTION_YES) - return new (alloc) ha_blackhole(table); - return NULL; -#endif -#ifdef HAVE_FEDERATED_DB - case DB_TYPE_FEDERATED_DB: - if (have_federated_db == SHOW_OPTION_YES) - return new (alloc) ha_federated(table); - return NULL; -#endif -#ifdef HAVE_CSV_DB - case DB_TYPE_CSV_DB: - if (have_csv_db == SHOW_OPTION_YES) - return new (alloc) ha_tina(table); - return NULL; -#endif -#ifdef HAVE_NDBCLUSTER_DB - case DB_TYPE_NDBCLUSTER: - if (have_ndbcluster == SHOW_OPTION_YES) - return new (alloc) ha_ndbcluster(table); - return NULL; -#endif - case DB_TYPE_HEAP: - return new (alloc) ha_heap(table); - default: // should never happen + handler *file= NULL; + /* + handlers are allocated with new in the handlerton create() function + we need to set the thd mem_root for these to be allocated correctly + */ + THD *thd= current_thd; + MEM_ROOT *thd_save_mem_root= thd->mem_root; + thd->mem_root= alloc; + + if (db_type != NULL && db_type->state == SHOW_OPTION_YES && db_type->create) + file= db_type->create(share); + + thd->mem_root= thd_save_mem_root; + + if (!file) { - enum db_type def=(enum db_type) current_thd->variables.table_type; + handlerton *def= current_thd->variables.table_type; /* Try first with 'default table type' */ if (db_type != def) - return get_new_handler(table, alloc, def); + return get_new_handler(share, alloc, def); } - /* Fall back to MyISAM */ - case DB_TYPE_MYISAM: - return new (alloc) ha_myisam(table); + if (file) + { + if (file->ha_initialise()) + { + delete file; + file=0; + } } + return file; } + +#ifdef WITH_PARTITION_STORAGE_ENGINE +handler *get_ha_partition(partition_info *part_info) +{ + ha_partition *partition; + DBUG_ENTER("get_ha_partition"); + if ((partition= new ha_partition(part_info))) + { + if (partition->ha_initialise()) + { + delete partition; + partition= 0; + } + } + else + { + my_error(ER_OUTOFMEMORY, MYF(0), sizeof(ha_partition)); + } + DBUG_RETURN(((handler*) partition)); +} +#endif + + /* Register handler error messages for use with my_error(). @@ -454,16 +387,55 @@ static int ha_finish_errors(void) } -static inline void ha_was_inited_ok(handlerton **ht) +static void ha_was_inited_ok(handlerton *ht) { - uint tmp= (*ht)->savepoint_offset; - (*ht)->savepoint_offset= savepoint_alloc_size; + uint tmp= ht->savepoint_offset; + ht->savepoint_offset= savepoint_alloc_size; savepoint_alloc_size+= tmp; - (*ht)->slot= total_ha++; - if ((*ht)->prepare) + ht->slot= total_ha++; + if (ht->prepare) total_ha_2pc++; } + +int ha_initialize_handlerton(handlerton *hton) +{ + DBUG_ENTER("ha_initialize_handlerton"); + + if (hton == NULL) + DBUG_RETURN(1); + + switch (hton->state) + { + case SHOW_OPTION_NO: + break; + case SHOW_OPTION_YES: + if (!hton->init || !hton->init()) + { + ha_was_inited_ok(hton); + break; + } + /* fall through */ + default: + hton->state= SHOW_OPTION_DISABLED; + break; + } + DBUG_RETURN(0); +} + + +static my_bool init_handlerton(THD *unused1, st_plugin_int *plugin, + void *unused2) +{ + if (plugin->state == PLUGIN_IS_UNINITIALIZED) + { + ha_initialize_handlerton((handlerton *) plugin->plugin->info); + plugin->state= PLUGIN_IS_READY; + } + return FALSE; +} + + int ha_init() { int error= 0; @@ -474,16 +446,8 @@ int ha_init() if (ha_init_errors()) return 1; - /* - We now initialize everything here. - */ - for (types= sys_table_types; *types; types++) - { - if (!(*types)->init || !(*types)->init()) - ha_was_inited_ok(types); - else - (*types)->state= SHOW_OPTION_DISABLED; - } + if (plugin_foreach(NULL, init_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, 0)) + return 1; DBUG_ASSERT(total_ha < MAX_HA); /* @@ -496,70 +460,97 @@ int ha_init() return error; } + +int ha_register_builtin_plugins() +{ + handlerton **hton; + uint size= 0; + struct st_mysql_plugin *plugin; + DBUG_ENTER("ha_register_builtin_plugins"); + + for (hton= sys_table_types; *hton; hton++) + size+= sizeof(struct st_mysql_plugin); + + if (!(plugin= (struct st_mysql_plugin *) + my_once_alloc(size, MYF(MY_WME | MY_ZEROFILL)))) + DBUG_RETURN(1); + + for (hton= sys_table_types; *hton; hton++, plugin++) + { + plugin->type= MYSQL_STORAGE_ENGINE_PLUGIN; + plugin->info= *hton; + plugin->version= 0; + plugin->name= (*hton)->name; + plugin->author= NULL; + plugin->descr= (*hton)->comment; + + if (plugin_register_builtin(plugin)) + DBUG_RETURN(1); + } + DBUG_RETURN(0); +} + + + + /* close, flush or restart databases */ /* Ignore this for other databases than ours */ +static my_bool panic_handlerton(THD *unused1, st_plugin_int *plugin, + void *arg) +{ + handlerton *hton= (handlerton *) plugin->plugin->info; + if (hton->state == SHOW_OPTION_YES && hton->panic) + ((int*)arg)[0]|= hton->panic((enum ha_panic_function)((int*)arg)[1]); + return FALSE; +} + + int ha_panic(enum ha_panic_function flag) { - int error=0; -#ifndef NO_HASH - error|=h_panic(flag); /* fix hash */ -#endif -#ifdef HAVE_ISAM - error|=mrg_panic(flag); - error|=nisam_panic(flag); -#endif - error|=heap_panic(flag); - error|=mi_panic(flag); - error|=myrg_panic(flag); -#ifdef HAVE_BERKELEY_DB - if (have_berkeley_db == SHOW_OPTION_YES) - error|=berkeley_end(); -#endif -#ifdef HAVE_INNOBASE_DB - if (have_innodb == SHOW_OPTION_YES) - error|=innobase_end(); -#endif -#ifdef HAVE_NDBCLUSTER_DB - if (have_ndbcluster == SHOW_OPTION_YES) - error|=ndbcluster_end(); -#endif -#ifdef HAVE_FEDERATED_DB - if (have_federated_db == SHOW_OPTION_YES) - error|= federated_db_end(); -#endif -#if defined(HAVE_ARCHIVE_DB) - if (have_archive_db == SHOW_OPTION_YES) - error|= archive_db_end(); -#endif -#ifdef HAVE_CSV_DB - if (have_csv_db == SHOW_OPTION_YES) - error|= tina_end(); -#endif - if (ha_finish_errors()) - error= 1; - return error; + int error[2]; + + error[0]= 0; error[1]= (int)flag; + plugin_foreach(NULL, panic_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, error); + + if (flag == HA_PANIC_CLOSE && ha_finish_errors()) + error[0]= 1; + return error[0]; } /* ha_panic */ +static my_bool dropdb_handlerton(THD *unused1, st_plugin_int *plugin, + void *path) +{ + handlerton *hton= (handlerton *) plugin->plugin->info; + if (hton->state == SHOW_OPTION_YES && hton->drop_database) + hton->drop_database((char *)path); + return FALSE; +} + + void ha_drop_database(char* path) { -#ifdef HAVE_INNOBASE_DB - if (have_innodb == SHOW_OPTION_YES) - innobase_drop_database(path); -#endif -#ifdef HAVE_NDBCLUSTER_DB - if (have_ndbcluster == SHOW_OPTION_YES) - ndbcluster_drop_database(path); -#endif + plugin_foreach(NULL, dropdb_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, path); } + +static my_bool closecon_handlerton(THD *thd, st_plugin_int *plugin, + void *unused) +{ + handlerton *hton= (handlerton *) plugin->plugin->info; + /* there's no need to rollback here as all transactions must + be rolled back already */ + if (hton->state == SHOW_OPTION_YES && hton->close_connection && + thd->ha_data[hton->slot]) + hton->close_connection(thd); + return FALSE; +} + + /* don't bother to rollback here, it's done already */ void ha_close_connection(THD* thd) { - handlerton **types; - for (types= sys_table_types; *types; types++) - if (thd->ha_data[(*types)->slot]) - (*types)->close_connection(thd); + plugin_foreach(thd, closecon_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, 0); } /* ======================================================================== @@ -869,21 +860,46 @@ int ha_autocommit_or_rollback(THD *thd, int error) } -int ha_commit_or_rollback_by_xid(XID *xid, bool commit) +struct xahton_st { + XID *xid; + int result; +}; + +static my_bool xacommit_handlerton(THD *unused1, st_plugin_int *plugin, + void *arg) { - handlerton **types; - int res= 1; + handlerton *hton= (handlerton *) plugin->plugin->info; + if (hton->state == SHOW_OPTION_YES && hton->recover) + { + hton->commit_by_xid(((struct xahton_st *)arg)->xid); + ((struct xahton_st *)arg)->result= 0; + } + return FALSE; +} - for (types= sys_table_types; *types; types++) +static my_bool xarollback_handlerton(THD *unused1, st_plugin_int *plugin, + void *arg) +{ + handlerton *hton= (handlerton *) plugin->plugin->info; + if (hton->state == SHOW_OPTION_YES && hton->recover) { - if ((*types)->state == SHOW_OPTION_YES && (*types)->recover) - { - if ((*(commit ? (*types)->commit_by_xid : - (*types)->rollback_by_xid))(xid)); - res= 0; - } + hton->rollback_by_xid(((struct xahton_st *)arg)->xid); + ((struct xahton_st *)arg)->result= 0; } - return res; + return FALSE; +} + + +int ha_commit_or_rollback_by_xid(XID *xid, bool commit) +{ + struct xahton_st xaop; + xaop.xid= xid; + xaop.result= 1; + + plugin_foreach(NULL, commit ? xacommit_handlerton : xarollback_handlerton, + MYSQL_STORAGE_ENGINE_PLUGIN, &xaop); + + return xaop.result; } @@ -959,99 +975,123 @@ static char* xid_to_str(char *buf, XID *xid) in this case commit_list==0, tc_heuristic_recover == 0 there should be no prepared transactions in this case. */ -int ha_recover(HASH *commit_list) -{ - int len, got, found_foreign_xids=0, found_my_xids=0; - handlerton **types; - XID *list=0; - bool dry_run=(commit_list==0 && tc_heuristic_recover==0); - DBUG_ENTER("ha_recover"); - - /* commit_list and tc_heuristic_recover cannot be set both */ - DBUG_ASSERT(commit_list==0 || tc_heuristic_recover==0); - /* if either is set, total_ha_2pc must be set too */ - DBUG_ASSERT(dry_run || total_ha_2pc>(ulong)opt_bin_log); - - if (total_ha_2pc <= (ulong)opt_bin_log) - DBUG_RETURN(0); - - if (commit_list) - sql_print_information("Starting crash recovery..."); - -#ifndef WILL_BE_DELETED_LATER - /* - for now, only InnoDB supports 2pc. It means we can always safely - rollback all pending transactions, without risking inconsistent data - */ - DBUG_ASSERT(total_ha_2pc == (ulong) opt_bin_log+1); // only InnoDB and binlog - tc_heuristic_recover= TC_HEURISTIC_RECOVER_ROLLBACK; // forcing ROLLBACK - dry_run=FALSE; -#endif - for (len= MAX_XID_LIST_SIZE ; list==0 && len > MIN_XID_LIST_SIZE; len/=2) - { - list=(XID *)my_malloc(len*sizeof(XID), MYF(0)); - } - if (!list) - { - sql_print_error(ER(ER_OUTOFMEMORY), len*sizeof(XID)); - DBUG_RETURN(1); - } +struct xarecover_st +{ + int len, found_foreign_xids, found_my_xids; + XID *list; + HASH *commit_list; + bool dry_run; +}; - for (types= sys_table_types; *types; types++) +static my_bool xarecover_handlerton(THD *unused, st_plugin_int *plugin, + void *arg) +{ + handlerton *hton= (handlerton *) plugin->plugin->info; + struct xarecover_st *info= (struct xarecover_st *) arg; + int got; + + if (hton->state == SHOW_OPTION_YES && hton->recover) { - if ((*types)->state != SHOW_OPTION_YES || !(*types)->recover) - continue; - while ((got=(*(*types)->recover)(list, len)) > 0 ) + while ((got= hton->recover(info->list, info->len)) > 0 ) { sql_print_information("Found %d prepared transaction(s) in %s", - got, (*types)->name); + got, hton->name); for (int i=0; i < got; i ++) { - my_xid x=list[i].get_my_xid(); + my_xid x=info->list[i].get_my_xid(); if (!x) // not "mine" - that is generated by external TM { #ifndef DBUG_OFF char buf[XIDDATASIZE*4+6]; // see xid_to_str - sql_print_information("ignore xid %s", xid_to_str(buf, list+i)); + sql_print_information("ignore xid %s", xid_to_str(buf, info->list+i)); #endif - xid_cache_insert(list+i, XA_PREPARED); - found_foreign_xids++; + xid_cache_insert(info->list+i, XA_PREPARED); + info->found_foreign_xids++; continue; } - if (dry_run) + if (info->dry_run) { - found_my_xids++; + info->found_my_xids++; continue; } // recovery mode - if (commit_list ? - hash_search(commit_list, (byte *)&x, sizeof(x)) != 0 : + if (info->commit_list ? + hash_search(info->commit_list, (byte *)&x, sizeof(x)) != 0 : tc_heuristic_recover == TC_HEURISTIC_RECOVER_COMMIT) { #ifndef DBUG_OFF char buf[XIDDATASIZE*4+6]; // see xid_to_str - sql_print_information("commit xid %s", xid_to_str(buf, list+i)); + sql_print_information("commit xid %s", xid_to_str(buf, info->list+i)); #endif - (*(*types)->commit_by_xid)(list+i); + hton->commit_by_xid(info->list+i); } else { #ifndef DBUG_OFF char buf[XIDDATASIZE*4+6]; // see xid_to_str - sql_print_information("rollback xid %s", xid_to_str(buf, list+i)); + sql_print_information("rollback xid %s", + xid_to_str(buf, info->list+i)); #endif - (*(*types)->rollback_by_xid)(list+i); + hton->rollback_by_xid(info->list+i); } } - if (got < len) + if (got < info->len) break; } } - my_free((gptr)list, MYF(0)); - if (found_foreign_xids) - sql_print_warning("Found %d prepared XA transactions", found_foreign_xids); - if (dry_run && found_my_xids) + return FALSE; +} + +int ha_recover(HASH *commit_list) +{ + struct xarecover_st info; + DBUG_ENTER("ha_recover"); + info.found_foreign_xids= info.found_my_xids= 0; + info.commit_list= commit_list; + info.dry_run= (info.commit_list==0 && tc_heuristic_recover==0); + info.list= NULL; + + /* commit_list and tc_heuristic_recover cannot be set both */ + DBUG_ASSERT(info.commit_list==0 || tc_heuristic_recover==0); + /* if either is set, total_ha_2pc must be set too */ + DBUG_ASSERT(info.dry_run || total_ha_2pc>(ulong)opt_bin_log); + + if (total_ha_2pc <= (ulong)opt_bin_log) + DBUG_RETURN(0); + + if (info.commit_list) + sql_print_information("Starting crash recovery..."); + +#ifndef WILL_BE_DELETED_LATER + /* + for now, only InnoDB supports 2pc. It means we can always safely + rollback all pending transactions, without risking inconsistent data + */ + DBUG_ASSERT(total_ha_2pc == (ulong) opt_bin_log+1); // only InnoDB and binlog + tc_heuristic_recover= TC_HEURISTIC_RECOVER_ROLLBACK; // forcing ROLLBACK + info.dry_run=FALSE; +#endif + + for (info.len= MAX_XID_LIST_SIZE ; + info.list==0 && info.len > MIN_XID_LIST_SIZE; info.len/=2) + { + info.list=(XID *)my_malloc(info.len*sizeof(XID), MYF(0)); + } + if (!info.list) + { + sql_print_error(ER(ER_OUTOFMEMORY), info.len*sizeof(XID)); + DBUG_RETURN(1); + } + + plugin_foreach(NULL, xarecover_handlerton, + MYSQL_STORAGE_ENGINE_PLUGIN, &info); + + my_free((gptr)info.list, MYF(0)); + if (info.found_foreign_xids) + sql_print_warning("Found %d prepared XA transactions", + info.found_foreign_xids); + if (info.dry_run && info.found_my_xids) { sql_print_error("Found %d prepared transactions! It means that mysqld was " "not shut down properly last time and critical recovery " @@ -1059,10 +1099,10 @@ int ha_recover(HASH *commit_list) "after a crash. You have to start mysqld with " "--tc-heuristic-recover switch to commit or rollback " "pending transactions.", - found_my_xids, opt_tc_log_file); + info.found_my_xids, opt_tc_log_file); DBUG_RETURN(1); } - if (commit_list) + if (info.commit_list) sql_print_information("Crash recovery finished."); DBUG_RETURN(0); } @@ -1135,24 +1175,17 @@ bool mysql_xa_recover(THD *thd) int ha_release_temporary_latches(THD *thd) { -#ifdef HAVE_INNOBASE_DB - if (opt_innodb) - innobase_release_temporary_latches(thd); +#ifdef WITH_INNOBASE_STORAGE_ENGINE + innobase_release_temporary_latches(thd); #endif return 0; } -/* - Export statistics for different engines. Currently we use it only for - InnoDB. -*/ - int ha_update_statistics() { -#ifdef HAVE_INNOBASE_DB - if (opt_innodb) - innodb_export_status(); +#ifdef WITH_INNOBASE_STORAGE_ENGINE + innodb_export_status(); #endif return 0; } @@ -1181,7 +1214,8 @@ int ha_rollback_to_savepoint(THD *thd, SAVEPOINT *sv) my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err); error=1; } - statistic_increment(thd->status_var.ha_savepoint_rollback_count,&LOCK_status); + statistic_increment(thd->status_var.ha_savepoint_rollback_count, + &LOCK_status); trans->no_2pc|=(*ht)->prepare == 0; } /* @@ -1261,38 +1295,62 @@ int ha_release_savepoint(THD *thd, SAVEPOINT *sv) } +static my_bool snapshot_handlerton(THD *thd, st_plugin_int *plugin, + void *arg) +{ + handlerton *hton= (handlerton *) plugin->plugin->info; + if (hton->state == SHOW_OPTION_YES && + hton->start_consistent_snapshot) + { + hton->start_consistent_snapshot(thd); + *((bool *)arg)= false; + } + return FALSE; +} + int ha_start_consistent_snapshot(THD *thd) { -#ifdef HAVE_INNOBASE_DB - if ((have_innodb == SHOW_OPTION_YES) && - !innobase_start_trx_and_assign_read_view(thd)) - return 0; -#endif + bool warn= true; + + plugin_foreach(thd, snapshot_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, &warn); + /* Same idea as when one wants to CREATE TABLE in one engine which does not exist: */ - push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR, - "This MySQL server does not support any " - "consistent-read capable storage engine"); + if (warn) + push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR, + "This MySQL server does not support any " + "consistent-read capable storage engine"); return 0; } -bool ha_flush_logs() +static my_bool flush_handlerton(THD *thd, st_plugin_int *plugin, + void *arg) { - bool result=0; -#ifdef HAVE_BERKELEY_DB - if ((have_berkeley_db == SHOW_OPTION_YES) && - berkeley_flush_logs()) - result=1; -#endif -#ifdef HAVE_INNOBASE_DB - if ((have_innodb == SHOW_OPTION_YES) && - innobase_flush_logs()) - result=1; -#endif - return result; + handlerton *hton= (handlerton *) plugin->plugin->info; + if (hton->state == SHOW_OPTION_YES && hton->flush_logs && hton->flush_logs()) + return TRUE; + return FALSE; +} + + +bool ha_flush_logs(handlerton *db_type) +{ + if (db_type == NULL) + { + if (plugin_foreach(NULL, flush_handlerton, + MYSQL_STORAGE_ENGINE_PLUGIN, 0)) + return TRUE; + } + else + { + if (db_type->state != SHOW_OPTION_YES || + (db_type->flush_logs && db_type->flush_logs())) + return TRUE; + } + return FALSE; } /* @@ -1300,8 +1358,8 @@ bool ha_flush_logs() The .frm file will be deleted only if we return 0 or ENOENT */ -int ha_delete_table(THD *thd, enum db_type table_type, const char *path, - const char *alias, bool generate_warning) +int ha_delete_table(THD *thd, handlerton *table_type, const char *path, + const char *db, const char *alias, bool generate_warning) { handler *file; char tmp_path[FN_REFLEN]; @@ -1315,8 +1373,8 @@ int ha_delete_table(THD *thd, enum db_type table_type, const char *path, dummy_table.s= &dummy_share; /* DB_TYPE_UNKNOWN is used in ALTER TABLE when renaming only .frm files */ - if (table_type == DB_TYPE_UNKNOWN || - ! (file=get_new_handler(&dummy_table, thd->mem_root, table_type))) + if (table_type == NULL || + ! (file=get_new_handler(&dummy_share, thd->mem_root, table_type))) DBUG_RETURN(ENOENT); if (lower_case_table_names == 2 && !(file->table_flags() & HA_FILE_BASED)) @@ -1349,7 +1407,12 @@ int ha_delete_table(THD *thd, enum db_type table_type, const char *path, thd->net.last_error[0]= 0; /* Fill up strucutures that print_error may need */ - dummy_table.s->path= path; + dummy_share.path.str= (char*) path; + dummy_share.path.length= strlen(path); + dummy_share.db.str= (char*) db; + dummy_share.db.length= strlen(db); + dummy_share.table_name.str= (char*) alias; + dummy_share.table_name.length= strlen(alias); dummy_table.alias= alias; file->print_error(error, 0); @@ -1371,16 +1434,26 @@ int ha_delete_table(THD *thd, enum db_type table_type, const char *path, ** General handler functions ****************************************************************************/ - /* Open database-handler. Try O_RDONLY if can't open as O_RDWR */ - /* Don't wait for locks if not HA_OPEN_WAIT_IF_LOCKED is set */ +/* + Open database-handler. -int handler::ha_open(const char *name, int mode, int test_if_locked) + IMPLEMENTATION + Try O_RDONLY if cannot open as O_RDWR + Don't wait for locks if not HA_OPEN_WAIT_IF_LOCKED is set +*/ + +int handler::ha_open(TABLE *table_arg, const char *name, int mode, + int test_if_locked) { int error; DBUG_ENTER("handler::ha_open"); - DBUG_PRINT("enter",("name: %s db_type: %d db_stat: %d mode: %d lock_test: %d", - name, table->s->db_type, table->db_stat, mode, - test_if_locked)); + DBUG_PRINT("enter", + ("name: %s db_type: %d db_stat: %d mode: %d lock_test: %d", + name, table_share->db_type, table_arg->db_stat, mode, + test_if_locked)); + + table= table_arg; + DBUG_ASSERT(table->s == table_share); if ((error=open(name,mode,test_if_locked))) { @@ -1393,7 +1466,7 @@ int handler::ha_open(const char *name, int mode, int test_if_locked) } if (error) { - my_errno=error; /* Safeguard */ + my_errno= error; /* Safeguard */ DBUG_PRINT("error",("error: %d errno: %d",error,errno)); } else @@ -1411,10 +1484,93 @@ int handler::ha_open(const char *name, int mode, int test_if_locked) } else dupp_ref=ref+ALIGN_SIZE(ref_length); + + if (ha_allocate_read_write_set(table->s->fields)) + error= 1; } DBUG_RETURN(error); } + +int handler::ha_initialise() +{ + DBUG_ENTER("ha_initialise"); + DBUG_RETURN(FALSE); +} + + +/* + Initalize bit maps for used fields + + Called from open_table_from_share() +*/ + +int handler::ha_allocate_read_write_set(ulong no_fields) +{ + uint bitmap_size= bitmap_buffer_size(no_fields+1); + uint32 *read_buf, *write_buf; + DBUG_ENTER("ha_allocate_read_write_set"); + DBUG_PRINT("enter", ("no_fields = %d", no_fields)); + + if (!multi_alloc_root(&table->mem_root, + &read_set, sizeof(MY_BITMAP), + &write_set, sizeof(MY_BITMAP), + &read_buf, bitmap_size, + &write_buf, bitmap_size, + NullS)) + { + DBUG_RETURN(TRUE); + } + bitmap_init(read_set, read_buf, no_fields+1, FALSE); + bitmap_init(write_set, write_buf, no_fields+1, FALSE); + table->read_set= read_set; + table->write_set= write_set; + ha_clear_all_set(); + DBUG_RETURN(FALSE); +} + +void handler::ha_clear_all_set() +{ + DBUG_ENTER("ha_clear_all_set"); + bitmap_clear_all(read_set); + bitmap_clear_all(write_set); + bitmap_set_bit(read_set, 0); + bitmap_set_bit(write_set, 0); + DBUG_VOID_RETURN; +} + +int handler::ha_retrieve_all_cols() +{ + DBUG_ENTER("handler::ha_retrieve_all_cols"); + bitmap_set_all(read_set); + DBUG_RETURN(0); +} + +int handler::ha_retrieve_all_pk() +{ + DBUG_ENTER("ha_retrieve_all_pk"); + ha_set_primary_key_in_read_set(); + DBUG_RETURN(0); +} + +void handler::ha_set_primary_key_in_read_set() +{ + ulong prim_key= table->s->primary_key; + DBUG_ENTER("handler::ha_set_primary_key_in_read_set"); + DBUG_PRINT("info", ("Primary key = %d", prim_key)); + if (prim_key != MAX_KEY) + { + KEY_PART_INFO *key_part= table->key_info[prim_key].key_part; + KEY_PART_INFO *key_part_end= key_part + + table->key_info[prim_key].key_parts; + for (;key_part != key_part_end; ++key_part) + ha_set_bit_in_read_set(key_part->fieldnr); + } + DBUG_VOID_RETURN; +} + + + /* Read first row (only) from a table This is never called for InnoDB or BDB tables, as these table types @@ -1426,7 +1582,8 @@ int handler::read_first_row(byte * buf, uint primary_key) register int error; DBUG_ENTER("handler::read_first_row"); - statistic_increment(current_thd->status_var.ha_read_first_count,&LOCK_status); + statistic_increment(table->in_use->status_var.ha_read_first_count, + &LOCK_status); /* If there is very few deleted rows in the table, find the first row by @@ -1443,7 +1600,7 @@ int handler::read_first_row(byte * buf, uint primary_key) else { /* Find the first row through the primary key */ - (void) ha_index_init(primary_key); + (void) ha_index_init(primary_key, 0); error=index_first(buf); (void) ha_index_end(); } @@ -1627,7 +1784,7 @@ ulonglong handler::get_auto_increment() int error; (void) extra(HA_EXTRA_KEYREAD); - index_init(table->s->next_number_index); + index_init(table->s->next_number_index, 1); if (!table->s->next_number_key_offset) { // Autoincrement at key-start error=index_last(table->record[1]); @@ -1692,9 +1849,10 @@ void handler::print_error(int error, myf errflag) uint key_nr=get_dup_key(error); if ((int) key_nr >= 0) { - /* Write the dupplicated key in the error message */ + /* Write the duplicated key in the error message */ char key[MAX_KEY_LENGTH]; String str(key,sizeof(key),system_charset_info); + /* Table is opened and defined at this point */ key_unpack(&str,table,(uint) key_nr); uint max_length=MYSQL_ERRMSG_SIZE-(uint) strlen(ER(ER_DUP_ENTRY)); if (str.length() >= max_length) @@ -1781,20 +1939,12 @@ void handler::print_error(int error, myf errflag) textno=ER_TABLE_DEF_CHANGED; break; case HA_ERR_NO_SUCH_TABLE: - { - /* - We have to use path to find database name instead of using - table->table_cache_key because if the table didn't exist, then - table_cache_key was not set up - */ - char *db; - char buff[FN_REFLEN]; - uint length= dirname_part(buff,table->s->path); - buff[length-1]=0; - db=buff+dirname_length(buff); - my_error(ER_NO_SUCH_TABLE, MYF(0), db, table->alias); + my_error(ER_NO_SUCH_TABLE, MYF(0), table_share->db.str, + table_share->table_name.str); + break; + case HA_ERR_RBR_LOGGING_FAILED: + textno= ER_BINLOG_ROW_LOGGING_FAILED; break; - } default: { /* The error was "unknown" to this function. @@ -1815,7 +1965,7 @@ void handler::print_error(int error, myf errflag) DBUG_VOID_RETURN; } } - my_error(textno, errflag, table->alias, error); + my_error(textno, errflag, table_share->table_name.str, error); DBUG_VOID_RETURN; } @@ -1874,7 +2024,7 @@ int handler::delete_table(const char *name) for (const char **ext=bas_ext(); *ext ; ext++) { - fn_format(buff, name, "", *ext, 2 | 4); + fn_format(buff, name, "", *ext, MY_UNPACK_FILENAME|MY_APPEND_EXT); if (my_delete_with_symlink(buff, MYF(0))) { if ((error= my_errno) != ENOENT) @@ -1903,6 +2053,14 @@ int handler::rename_table(const char * from, const char * to) return error; } + +void handler::drop_table(const char *name) +{ + close(); + delete_table(name); +} + + /* Tell the storage engine that it is allowed to "disable transaction" in the handler. It is a hint that ACID is not required - it is used in NDB for @@ -1953,23 +2111,37 @@ int handler::index_next_same(byte *buf, const byte *key, uint keylen) /* Initiates table-file and calls apropriate database-creator - Returns 1 if something got wrong + + NOTES + We must have a write lock on LOCK_open to be sure no other thread + interfers with table + + RETURN + 0 ok + 1 error */ -int ha_create_table(const char *name, HA_CREATE_INFO *create_info, +int ha_create_table(THD *thd, const char *path, + const char *db, const char *table_name, + HA_CREATE_INFO *create_info, bool update_create_info) { - int error; + int error= 1; TABLE table; char name_buff[FN_REFLEN]; + const char *name; + TABLE_SHARE share; DBUG_ENTER("ha_create_table"); + + init_tmp_table_share(&share, db, 0, table_name, path); + if (open_table_def(thd, &share, 0) || + open_table_from_share(thd, &share, "", 0, (uint) READ_ALL, 0, &table)) + goto err; - if (openfrm(current_thd, name,"",0,(uint) READ_ALL, 0, &table)) - DBUG_RETURN(1); if (update_create_info) - { update_create_info_from_table(create_info, &table); - } + + name= share.path.str; if (lower_case_table_names == 2 && !(table.file->table_flags() & HA_FILE_BASED)) { @@ -1979,27 +2151,32 @@ int ha_create_table(const char *name, HA_CREATE_INFO *create_info, name= name_buff; } - error=table.file->create(name,&table,create_info); - VOID(closefrm(&table)); + error= table.file->create(name, &table, create_info); + VOID(closefrm(&table, 0)); if (error) - my_error(ER_CANT_CREATE_TABLE, MYF(ME_BELL+ME_WAITTANG), name,error); + { + strxmov(name_buff, db, ".", table_name, NullS); + my_error(ER_CANT_CREATE_TABLE, MYF(ME_BELL+ME_WAITTANG), name_buff, error); + } +err: + free_table_share(&share); DBUG_RETURN(error != 0); } /* - Try to discover table from engine and - if found, write the frm file to disk. + Try to discover table from engine + + NOTES + If found, write the frm file to disk. RETURN VALUES: - -1 : Table did not exists - 0 : Table created ok - > 0 : Error, table existed but could not be created + -1 Table did not exists + 0 Table created ok + > 0 Error, table existed but could not be created */ -int ha_create_table_from_engine(THD* thd, - const char *db, - const char *name) +int ha_create_table_from_engine(THD* thd, const char *db, const char *name) { int error; const void *frmblob; @@ -2007,6 +2184,7 @@ int ha_create_table_from_engine(THD* thd, char path[FN_REFLEN]; HA_CREATE_INFO create_info; TABLE table; + TABLE_SHARE share; DBUG_ENTER("ha_create_table_from_engine"); DBUG_PRINT("enter", ("name '%s'.'%s'", db, name)); @@ -2022,15 +2200,23 @@ int ha_create_table_from_engine(THD* thd, frmblob and frmlen are set, write the frm to disk */ - (void)strxnmov(path,FN_REFLEN,mysql_data_home,"/",db,"/",name,NullS); + (void)strxnmov(path,FN_REFLEN-1,mysql_data_home,"/",db,"/",name,NullS); // Save the frm file error= writefrm(path, frmblob, frmlen); my_free((char*) frmblob, MYF(0)); if (error) DBUG_RETURN(2); - if (openfrm(thd, path,"",0,(uint) READ_ALL, 0, &table)) + init_tmp_table_share(&share, db, 0, name, path); + if (open_table_def(thd, &share, 0)) + { DBUG_RETURN(3); + } + if (open_table_from_share(thd, &share, "" ,0, 0, 0, &table)) + { + free_table_share(&share); + DBUG_RETURN(3); + } update_create_info_from_table(&create_info, &table); create_info.table_options|= HA_OPTION_CREATE_FROM_ENGINE; @@ -2042,7 +2228,7 @@ int ha_create_table_from_engine(THD* thd, my_casedn_str(files_charset_info, path); } error=table.file->create(path,&table,&create_info); - VOID(closefrm(&table)); + VOID(closefrm(&table, 1)); DBUG_RETURN(error != 0); } @@ -2160,7 +2346,7 @@ int ha_discover(THD *thd, const char *db, const char *name, DBUG_PRINT("enter", ("db: %s, name: %s", db, name)); if (is_prefix(name,tmp_file_prefix)) /* skip temporary tables */ DBUG_RETURN(error); -#ifdef HAVE_NDBCLUSTER_DB +#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE if (have_ndbcluster == SHOW_OPTION_YES) error= ndbcluster_discover(thd, db, name, frmblob, frmlen); #endif @@ -2184,7 +2370,7 @@ ha_find_files(THD *thd,const char *db,const char *path, DBUG_ENTER("ha_find_files"); DBUG_PRINT("enter", ("db: %s, path: %s, wild: %s, dir: %d", db, path, wild, dir)); -#ifdef HAVE_NDBCLUSTER_DB +#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE if (have_ndbcluster == SHOW_OPTION_YES) error= ndbcluster_find_files(thd, db, path, wild, dir, files); #endif @@ -2206,7 +2392,7 @@ int ha_table_exists_in_engine(THD* thd, const char* db, const char* name) int error= 0; DBUG_ENTER("ha_table_exists_in_engine"); DBUG_PRINT("enter", ("db: %s, name: %s", db, name)); -#ifdef HAVE_NDBCLUSTER_DB +#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE if (have_ndbcluster == SHOW_OPTION_YES) error= ndbcluster_table_exists_in_engine(thd, db, name); #endif @@ -2460,7 +2646,7 @@ int handler::compare_key(key_range *range) int handler::index_read_idx(byte * buf, uint index, const byte * key, uint key_len, enum ha_rkey_function find_flag) { - int error= ha_index_init(index); + int error= ha_index_init(index, 0); if (!error) error= index_read(buf, key, key_len, find_flag); if (!error) @@ -2484,40 +2670,50 @@ int handler::index_read_idx(byte * buf, uint index, const byte * key, pointer pointer to TYPELIB structure */ +static my_bool exts_handlerton(THD *unused, st_plugin_int *plugin, + void *arg) +{ + List<char> *found_exts= (List<char> *) arg; + handlerton *hton= (handlerton *) plugin->plugin->info; + handler *file; + if (hton->state == SHOW_OPTION_YES && hton->create && + (file= hton->create((TABLE_SHARE*) 0))) + { + List_iterator_fast<char> it(*found_exts); + const char **ext, *old_ext; + + for (ext= file->bas_ext(); *ext; ext++) + { + while ((old_ext= it++)) + { + if (!strcmp(old_ext, *ext)) + break; + } + if (!old_ext) + found_exts->push_back((char *) *ext); + + it.rewind(); + } + delete file; + } + return FALSE; +} + TYPELIB *ha_known_exts(void) { MEM_ROOT *mem_root= current_thd->mem_root; if (!known_extensions.type_names || mysys_usage_id != known_extensions_id) { - handlerton **types; List<char> found_exts; - List_iterator_fast<char> it(found_exts); const char **ext, *old_ext; known_extensions_id= mysys_usage_id; found_exts.push_back((char*) triggers_file_ext); found_exts.push_back((char*) trigname_file_ext); - for (types= sys_table_types; *types; types++) - { - if ((*types)->state == SHOW_OPTION_YES) - { - handler *file= get_new_handler(0, mem_root, - (enum db_type) (*types)->db_type); - for (ext= file->bas_ext(); *ext; ext++) - { - while ((old_ext= it++)) - { - if (!strcmp(old_ext, *ext)) - break; - } - if (!old_ext) - found_exts.push_back((char *) *ext); - - it.rewind(); - } - delete file; - } - } + + plugin_foreach(NULL, exts_handlerton, + MYSQL_STORAGE_ENGINE_PLUGIN, &found_exts); + ext= (const char **) my_once_alloc(sizeof(char *)* (found_exts.elements+1), MYF(MY_WME | MY_FAE)); @@ -2526,6 +2722,7 @@ TYPELIB *ha_known_exts(void) known_extensions.count= found_exts.elements; known_extensions.type_names= ext; + List_iterator_fast<char> it(found_exts); while ((old_ext= it++)) *ext++= old_ext; *ext= 0; @@ -2534,6 +2731,168 @@ TYPELIB *ha_known_exts(void) } +static bool stat_print(THD *thd, const char *type, uint type_len, + const char *file, uint file_len, + const char *status, uint status_len) +{ + Protocol *protocol= thd->protocol; + protocol->prepare_for_resend(); + protocol->store(type, type_len, system_charset_info); + protocol->store(file, file_len, system_charset_info); + protocol->store(status, status_len, system_charset_info); + if (protocol->write()) + return TRUE; + return FALSE; +} + + +static my_bool showstat_handlerton(THD *thd, st_plugin_int *plugin, + void *arg) +{ + enum ha_stat_type stat= *(enum ha_stat_type *) arg; + handlerton *hton= (handlerton *) plugin->plugin->info; + if (hton->state == SHOW_OPTION_YES && hton->show_status && + hton->show_status(thd, stat_print, stat)) + return TRUE; + return FALSE; +} + +bool ha_show_status(THD *thd, handlerton *db_type, enum ha_stat_type stat) +{ + List<Item> field_list; + Protocol *protocol= thd->protocol; + bool result; + + field_list.push_back(new Item_empty_string("Type",10)); + field_list.push_back(new Item_empty_string("Name",FN_REFLEN)); + field_list.push_back(new Item_empty_string("Status",10)); + + if (protocol->send_fields(&field_list, + Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF)) + return TRUE; + + if (db_type == NULL) + { + result= plugin_foreach(thd, showstat_handlerton, + MYSQL_STORAGE_ENGINE_PLUGIN, &stat); + } + else + { + if (db_type->state != SHOW_OPTION_YES) + result= stat_print(thd, db_type->name, strlen(db_type->name), + "", 0, "DISABLED", 8) ? 1 : 0; + else + result= db_type->show_status && + db_type->show_status(thd, stat_print, stat) ? 1 : 0; + } + + if (!result) + send_eof(thd); + return result; +} + +/* + Function to check if the conditions for row-based binlogging is + correct for the table. + + A row in the given table should be replicated if: + - Row-based replication is on + - It is not a temporary table + - The binlog is enabled + - The table shall be binlogged (binlog_*_db rules) +*/ + +#ifdef HAVE_ROW_BASED_REPLICATION +static bool check_table_binlog_row_based(THD *thd, TABLE *table) +{ + return + binlog_row_based && + thd && (thd->options & OPTION_BIN_LOG) && + (table->s->tmp_table == NO_TMP_TABLE) && + binlog_filter->db_ok(table->s->db.str); +} + +template<class RowsEventT> int binlog_log_row(TABLE* table, + const byte *before_record, + const byte *after_record) +{ + bool error= 0; + THD *const thd= current_thd; + + if (check_table_binlog_row_based(thd, table)) + { + MY_BITMAP cols; + /* Potential buffer on the stack for the bitmap */ + uint32 bitbuf[BITMAP_STACKBUF_SIZE/sizeof(uint32)]; + uint n_fields= table->s->fields; + my_bool use_bitbuf= n_fields <= sizeof(bitbuf)*8; + if (likely(!(error= bitmap_init(&cols, + use_bitbuf ? bitbuf : NULL, + (n_fields + 7) & ~7UL, + false)))) + { + bitmap_set_all(&cols); + error= + RowsEventT::binlog_row_logging_function(thd, table, + table->file->has_transactions(), + &cols, table->s->fields, + before_record, after_record); + if (!use_bitbuf) + bitmap_free(&cols); + } + } + return error ? HA_ERR_RBR_LOGGING_FAILED : 0; +} + + +/* + Instantiate the versions we need for the above template function, because we + have -fno-implicit-template as compiling option. +*/ + +template int binlog_log_row<Write_rows_log_event>(TABLE *, const byte *, const byte *); +template int binlog_log_row<Delete_rows_log_event>(TABLE *, const byte *, const byte *); +template int binlog_log_row<Update_rows_log_event>(TABLE *, const byte *, const byte *); + +#endif /* HAVE_ROW_BASED_REPLICATION */ + +int handler::ha_write_row(byte *buf) +{ + int error; + if (likely(!(error= write_row(buf)))) + { +#ifdef HAVE_ROW_BASED_REPLICATION + error= binlog_log_row<Write_rows_log_event>(table, 0, buf); +#endif + } + return error; +} + +int handler::ha_update_row(const byte *old_data, byte *new_data) +{ + int error; + if (likely(!(error= update_row(old_data, new_data)))) + { +#ifdef HAVE_ROW_BASED_REPLICATION + error= binlog_log_row<Update_rows_log_event>(table, old_data, new_data); +#endif + } + return error; +} + +int handler::ha_delete_row(const byte *buf) +{ + int error; + if (likely(!(error= delete_row(buf)))) + { +#ifdef HAVE_ROW_BASED_REPLICATION + error= binlog_log_row<Delete_rows_log_event>(table, buf, 0); +#endif + } + return error; +} + + #ifdef HAVE_REPLICATION /* Reports to table handlers up to which position we have sent the binlog @@ -2556,11 +2915,10 @@ TYPELIB *ha_known_exts(void) int ha_repl_report_sent_binlog(THD *thd, char *log_file_name, my_off_t end_offset) { -#ifdef HAVE_INNOBASE_DB - return innobase_repl_report_sent_binlog(thd,log_file_name,end_offset); -#else - return 0; +#ifdef WITH_INNOBASE_STORAGE_ENGINE + innobase_repl_report_sent_binlog(thd, log_file_name, end_offset); #endif + return 0; } @@ -2585,3 +2943,4 @@ int ha_repl_report_replication_stop(THD *thd) return 0; } #endif /* HAVE_REPLICATION */ + diff --git a/sql/handler.h b/sql/handler.h index 91c5be9ba39..eff4ecdc4d2 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -28,10 +28,7 @@ #define NO_HASH /* Not yet implemented */ #endif -#if defined(HAVE_BERKELEY_DB) || defined(HAVE_INNOBASE_DB) || \ - defined(HAVE_NDBCLUSTER_DB) #define USING_TRANSACTIONS -#endif // the following is for checking tables @@ -77,6 +74,13 @@ */ #define HA_CAN_INSERT_DELAYED (1 << 14) #define HA_PRIMARY_KEY_IN_READ_INDEX (1 << 15) +/* + If HA_PRIMARY_KEY_ALLOW_RANDOM_ACCESS is set, it means that the engine can + do this: the position of an arbitrary record can be retrieved using + position() when the table has a primary key, effectively allowing random + access on the table based on a given record. +*/ +#define HA_PRIMARY_KEY_ALLOW_RANDOM_ACCESS (1 << 16) #define HA_NOT_DELETE_WITH_CACHE (1 << 18) #define HA_NO_PREFIX_CHAR_KEYS (1 << 20) #define HA_CAN_FULLTEXT (1 << 21) @@ -89,6 +93,12 @@ #define HA_CAN_BIT_FIELD (1 << 28) /* supports bit fields */ #define HA_NEED_READ_RANGE_BUFFER (1 << 29) /* for read_multi_range */ #define HA_ANY_INDEX_MAY_BE_UNIQUE (1 << 30) +#define HA_NO_COPY_ON_ALTER (1 << 31) + +/* Flags for partition handlers */ +#define HA_CAN_PARTITION (1 << 0) /* Partition support */ +#define HA_CAN_UPDATE_PARTITION_KEY (1 << 1) +#define HA_CAN_PARTITION_UNIQUE (1 << 2) /* bits in index_flags(index_number) for what you can do with index */ @@ -99,6 +109,10 @@ #define HA_ONLY_WHOLE_INDEX 16 /* Can't use part key searches */ #define HA_KEYREAD_ONLY 64 /* Support HA_EXTRA_KEYREAD */ +/* bits in alter_table_flags */ +#define HA_ONLINE_ADD_EMPTY_PARTITION 1 +#define HA_ONLINE_DROP_PARTITION 2 + /* Index scan will not return records in rowid order. Not guaranteed to be set for unordered (e.g. HASH) indexes. @@ -118,7 +132,7 @@ example + csv + heap + blackhole + federated + 0 (yes, the sum is deliberately inaccurate) */ -#define MAX_HA 14 +#define MAX_HA 15 /* Bits in index_ddl_flags(KEY *wanted_index) @@ -171,7 +185,7 @@ /* Options of START TRANSACTION statement (and later of SET TRANSACTION stmt) */ #define MYSQL_START_TRANS_OPT_WITH_CONS_SNAPSHOT 1 -enum db_type +enum legacy_db_type { DB_TYPE_UNKNOWN=0,DB_TYPE_DIAB_ISAM=1, DB_TYPE_HASH,DB_TYPE_MISAM,DB_TYPE_PISAM, @@ -182,7 +196,9 @@ enum db_type DB_TYPE_EXAMPLE_DB, DB_TYPE_ARCHIVE_DB, DB_TYPE_CSV_DB, DB_TYPE_FEDERATED_DB, DB_TYPE_BLACKHOLE_DB, - DB_TYPE_DEFAULT // Must be last + DB_TYPE_PARTITION_DB, + DB_TYPE_BINLOG, + DB_TYPE_DEFAULT=127 // Must be last }; enum row_type { ROW_TYPE_NOT_USED=-1, ROW_TYPE_DEFAULT, ROW_TYPE_FIXED, @@ -222,6 +238,9 @@ typedef ulonglong my_xid; // this line is the same as in log_event.h #define MAXGTRIDSIZE 64 #define MAXBQUALSIZE 64 +#define COMPATIBLE_DATA_YES 0 +#define COMPATIBLE_DATA_NO 1 + struct xid_t { long formatID; long gtrid_length; @@ -296,6 +315,18 @@ typedef struct xid_t XID; #define MAX_XID_LIST_SIZE (1024*128) #endif +/* The handler for a table type. Will be included in the TABLE structure */ + +struct st_table; +typedef struct st_table TABLE; +typedef struct st_table_share TABLE_SHARE; +struct st_foreign_key_info; +typedef struct st_foreign_key_info FOREIGN_KEY_INFO; +typedef bool (stat_print_fn)(THD *thd, const char *type, uint type_len, + const char *file, uint file_len, + const char *status, uint status_len); +enum ha_stat_type { HA_ENGINE_STATUS, HA_ENGINE_LOGS, HA_ENGINE_MUTEX }; + /* handlerton is a singleton structure - one instance per storage engine - to provide access to storage engine functionality that works on the @@ -310,6 +341,13 @@ typedef struct xid_t XID; typedef struct { /* + handlerton structure version + */ + const int interface_version; +#define MYSQL_HANDLERTON_INTERFACE_VERSION 0x0000 + + + /* storage engine name as it should be printed to a user */ const char *name; @@ -328,7 +366,7 @@ typedef struct Historical number used for frm file to determine the correct storage engine. This is going away and new engines will just use "name" for this. */ - enum db_type db_type; + enum legacy_db_type db_type; /* Method that initizlizes a storage engine */ @@ -390,12 +428,20 @@ typedef struct void *(*create_cursor_read_view)(); void (*set_cursor_read_view)(void *); void (*close_cursor_read_view)(void *); + handler *(*create)(TABLE_SHARE *table); + void (*drop_database)(char* path); + int (*panic)(enum ha_panic_function flag); + int (*start_consistent_snapshot)(THD *thd); + bool (*flush_logs)(); + bool (*show_status)(THD *thd, stat_print_fn *print, enum ha_stat_type stat); uint32 flags; /* global handler flags */ } handlerton; +extern const handlerton default_hton; + struct show_table_alias_st { const char *alias; - const char *type; + enum legacy_db_type type; }; /* Possible flags of a handlerton */ @@ -404,6 +450,8 @@ struct show_table_alias_st { #define HTON_ALTER_NOT_SUPPORTED (1 << 1) //Engine does not support alter #define HTON_CAN_RECREATE (1 << 2) //Delete all is used fro truncate #define HTON_HIDDEN (1 << 3) //Engine does not appear in lists +#define HTON_FLUSH_AFTER_RENAME (1 << 4) +#define HTON_NOT_USER_SELECTABLE (1 << 5) typedef struct st_thd_trans { @@ -418,6 +466,255 @@ typedef struct st_thd_trans enum enum_tx_isolation { ISO_READ_UNCOMMITTED, ISO_READ_COMMITTED, ISO_REPEATABLE_READ, ISO_SERIALIZABLE}; + +enum ndb_distribution { ND_KEYHASH= 0, ND_LINHASH= 1 }; + +typedef struct { + uint32 start_part; + uint32 end_part; + bool use_bit_array; +} part_id_range; +/** + * An enum and a struct to handle partitioning and subpartitioning. + */ +enum partition_type { + NOT_A_PARTITION= 0, + RANGE_PARTITION, + HASH_PARTITION, + LIST_PARTITION +}; + +enum partition_state { + PART_NORMAL= 0, + PART_IS_DROPPED= 1, + PART_TO_BE_DROPPED= 2, + PART_DROPPING= 3, + PART_IS_ADDED= 4, + PART_ADDING= 5, + PART_ADDED= 6 +}; + +#define UNDEF_NODEGROUP 65535 +class Item; + +class partition_element :public Sql_alloc { +public: + List<partition_element> subpartitions; + List<longlong> list_val_list; + ulonglong part_max_rows; + ulonglong part_min_rows; + char *partition_name; + char *tablespace_name; + longlong range_value; + char* part_comment; + char* data_file_name; + char* index_file_name; + handlerton *engine_type; + enum partition_state part_state; + uint16 nodegroup_id; + + partition_element() + : part_max_rows(0), part_min_rows(0), partition_name(NULL), + tablespace_name(NULL), range_value(0), part_comment(NULL), + data_file_name(NULL), index_file_name(NULL), + engine_type(NULL),part_state(PART_NORMAL), + nodegroup_id(UNDEF_NODEGROUP) + { + subpartitions.empty(); + list_val_list.empty(); + } + ~partition_element() {} +}; + +typedef struct { + longlong list_value; + uint partition_id; +} LIST_PART_ENTRY; + +class partition_info; + +typedef bool (*get_part_id_func)(partition_info *part_info, + uint32 *part_id); +typedef uint32 (*get_subpart_id_func)(partition_info *part_info); + +class partition_info :public Sql_alloc { +public: + /* + * Here comes a set of definitions needed for partitioned table handlers. + */ + List<partition_element> partitions; + List<partition_element> temp_partitions; + + List<char> part_field_list; + List<char> subpart_field_list; + + /* + If there is no subpartitioning, use only this func to get partition ids. + If there is subpartitioning, use the this func to get partition id when + you have both partition and subpartition fields. + */ + get_part_id_func get_partition_id; + + /* Get partition id when we don't have subpartition fields */ + get_part_id_func get_part_partition_id; + + /* + Get subpartition id when we have don't have partition fields by we do + have subpartition ids. + Mikael said that for given constant tuple + {subpart_field1, ..., subpart_fieldN} the subpartition id will be the + same in all subpartitions + */ + get_subpart_id_func get_subpartition_id; + + /* NULL-terminated list of fields used in partitioned expression */ + Field **part_field_array; + /* NULL-terminated list of fields used in subpartitioned expression */ + Field **subpart_field_array; + + /* + Array of all fields used in partition and subpartition expression, + without duplicates, NULL-terminated. + */ + Field **full_part_field_array; + + Item *part_expr; + Item *subpart_expr; + + Item *item_free_list; + + /* + A bitmap of partitions used by the current query. + Usage pattern: + * It is guaranteed that all partitions are set to be unused on query start. + * Before index/rnd_init(), partition pruning code sets the bits for used + partitions. + * The handler->extra(HA_EXTRA_RESET) call at query end sets all partitions + to be unused. + */ + MY_BITMAP used_partitions; + + union { + longlong *range_int_array; + LIST_PART_ENTRY *list_array; + }; + char* part_info_string; + + char *part_func_string; + char *subpart_func_string; + + partition_element *curr_part_elem; + partition_element *current_partition; + /* + These key_map's are used for Partitioning to enable quick decisions + on whether we can derive more information about which partition to + scan just by looking at what index is used. + */ + key_map all_fields_in_PF, all_fields_in_PPF, all_fields_in_SPF; + key_map some_fields_in_PF; + + handlerton *default_engine_type; + Item_result part_result_type; + partition_type part_type; + partition_type subpart_type; + + uint part_info_len; + uint part_func_len; + uint subpart_func_len; + + uint no_parts; + uint no_subparts; + uint count_curr_parts; + uint count_curr_subparts; + + uint part_error_code; + + uint no_list_values; + + uint no_part_fields; + uint no_subpart_fields; + uint no_full_part_fields; + + uint16 linear_hash_mask; + + bool use_default_partitions; + bool use_default_subpartitions; + bool defined_max_value; + bool list_of_part_fields; + bool list_of_subpart_fields; + bool linear_hash_ind; + + partition_info() + : get_partition_id(NULL), get_part_partition_id(NULL), + get_subpartition_id(NULL), + part_field_array(NULL), subpart_field_array(NULL), + full_part_field_array(NULL), + part_expr(NULL), subpart_expr(NULL), item_free_list(NULL), + list_array(NULL), + part_info_string(NULL), + part_func_string(NULL), subpart_func_string(NULL), + curr_part_elem(NULL), current_partition(NULL), + default_engine_type(NULL), + part_result_type(INT_RESULT), + part_type(NOT_A_PARTITION), subpart_type(NOT_A_PARTITION), + part_info_len(0), part_func_len(0), subpart_func_len(0), + no_parts(0), no_subparts(0), + count_curr_parts(0), count_curr_subparts(0), part_error_code(0), + no_list_values(0), no_part_fields(0), no_subpart_fields(0), + no_full_part_fields(0), linear_hash_mask(0), + use_default_partitions(TRUE), + use_default_subpartitions(TRUE), defined_max_value(FALSE), + list_of_part_fields(FALSE), list_of_subpart_fields(FALSE), + linear_hash_ind(FALSE) + { + all_fields_in_PF.clear_all(); + all_fields_in_PPF.clear_all(); + all_fields_in_SPF.clear_all(); + some_fields_in_PF.clear_all(); + partitions.empty(); + temp_partitions.empty(); + part_field_list.empty(); + subpart_field_list.empty(); + } + ~partition_info() {} +}; + + +#ifdef WITH_PARTITION_STORAGE_ENGINE +/* + Answers the question if subpartitioning is used for a certain table + SYNOPSIS + is_sub_partitioned() + part_info A reference to the partition_info struct + RETURN VALUE + Returns true if subpartitioning used and false otherwise + DESCRIPTION + A routine to check for subpartitioning for improved readability of code +*/ +inline +bool is_sub_partitioned(partition_info *part_info) +{ return (part_info->subpart_type == NOT_A_PARTITION ? FALSE : TRUE); } + + +/* + Returns the total number of partitions on the leaf level. + SYNOPSIS + get_tot_partitions() + part_info A reference to the partition_info struct + RETURN VALUE + Returns the number of partitions + DESCRIPTION + A routine to check for number of partitions for improved readability + of code +*/ +inline +uint get_tot_partitions(partition_info *part_info) +{ + return part_info->no_parts * + (is_sub_partitioned(part_info) ? part_info->no_subparts : 1); +} +#endif + typedef struct st_ha_create_information { CHARSET_INFO *table_charset, *default_table_charset; @@ -432,7 +729,7 @@ typedef struct st_ha_create_information ulong raid_chunksize; ulong used_fields; SQL_LIST merge_list; - enum db_type db_type; + handlerton *db_type; enum row_type row_type; uint null_bits; /* NULL bits at start of record */ uint options; /* OR of HA_CREATE_ options */ @@ -445,12 +742,6 @@ typedef struct st_ha_create_information } HA_CREATE_INFO; -/* The handler for a table type. Will be included in the TABLE structure */ - -struct st_table; -typedef struct st_table TABLE; -struct st_foreign_key_info; -typedef struct st_foreign_key_info FOREIGN_KEY_INFO; typedef struct st_savepoint SAVEPOINT; extern ulong savepoint_alloc_size; @@ -468,6 +759,50 @@ typedef struct st_ha_check_opt } HA_CHECK_OPT; +#ifdef WITH_PARTITION_STORAGE_ENGINE +bool is_partition_in_list(char *part_name, List<char> list_part_names); +bool is_partitions_in_table(partition_info *new_part_info, + partition_info *old_part_info); +bool check_reorganise_list(partition_info *new_part_info, + partition_info *old_part_info, + List<char> list_part_names); +bool set_up_defaults_for_partitioning(partition_info *part_info, + handler *file, + ulonglong max_rows, + uint start_no); +handler *get_ha_partition(partition_info *part_info); +int get_parts_for_update(const byte *old_data, byte *new_data, + const byte *rec0, partition_info *part_info, + uint32 *old_part_id, uint32 *new_part_id); +int get_part_for_delete(const byte *buf, const byte *rec0, + partition_info *part_info, uint32 *part_id); +bool check_partition_info(partition_info *part_info,handlerton *eng_type, + handler *file, ulonglong max_rows); +bool fix_partition_func(THD *thd, const char *name, TABLE *table); +char *generate_partition_syntax(partition_info *part_info, + uint *buf_length, bool use_sql_alloc, + bool add_default_info); +bool partition_key_modified(TABLE *table, List<Item> &fields); +void get_partition_set(const TABLE *table, byte *buf, const uint index, + const key_range *key_spec, + part_id_range *part_spec); +void get_full_part_id_from_key(const TABLE *table, byte *buf, + KEY *key_info, + const key_range *key_spec, + part_id_range *part_spec); +bool mysql_unpack_partition(THD *thd, const uchar *part_buf, + uint part_info_len, TABLE *table, + handlerton *default_db_type); +void make_used_partitions_str(partition_info *part_info, String *parts_str); +uint32 get_list_array_idx_for_endpoint(partition_info *part_info, + bool left_endpoint, + bool include_endpoint); +uint32 get_partition_id_range_for_endpoint(partition_info *part_info, + bool left_endpoint, + bool include_endpoint); +#endif + + /* This is a buffer area that the handler can use to store rows. 'end_of_used_area' should be kept updated after calls to @@ -485,10 +820,14 @@ typedef struct st_handler_buffer class handler :public Sql_alloc { +#ifdef WITH_PARTITION_STORAGE_ENGINE + friend class ha_partition; +#endif protected: - struct st_table *table; /* The table definition */ + struct st_table_share *table_share; /* The table definition */ + struct st_table *table; /* The current open table */ - virtual int index_init(uint idx) { active_index=idx; return 0; } + virtual int index_init(uint idx, bool sorted) { active_index=idx; return 0; } virtual int index_end() { active_index=MAX_KEY; return 0; } /* rnd_init() can be called two times without rnd_end() in between @@ -500,6 +839,8 @@ class handler :public Sql_alloc virtual int rnd_init(bool scan) =0; virtual int rnd_end() { return 0; } +private: + virtual int reset() { return extra(HA_EXTRA_RESET); } public: const handlerton *ht; /* storage engine of this handler */ byte *ref; /* Pointer to current row */ @@ -542,9 +883,11 @@ public: bool auto_increment_column_changed; bool implicit_emptied; /* Can be !=0 only if HEAP */ const COND *pushed_cond; + MY_BITMAP *read_set; + MY_BITMAP *write_set; - handler(const handlerton *ht_arg, TABLE *table_arg) :table(table_arg), - ht(ht_arg), + handler(const handlerton *ht_arg, TABLE_SHARE *share_arg) + :table_share(share_arg), ht(ht_arg), ref(0), data_file_length(0), max_data_file_length(0), index_file_length(0), delete_length(0), auto_increment_value(0), records(0), deleted(0), mean_rec_length(0), @@ -554,20 +897,28 @@ public: raid_type(0), ft_handler(0), inited(NONE), implicit_emptied(0), pushed_cond(NULL) {} - virtual ~handler(void) { /* TODO: DBUG_ASSERT(inited == NONE); */ } - int ha_open(const char *name, int mode, int test_if_locked); + virtual ~handler(void) + { + /* TODO: DBUG_ASSERT(inited == NONE); */ + } + virtual int ha_initialise(); + int ha_open(TABLE *table, const char *name, int mode, int test_if_locked); bool update_auto_increment(); virtual void print_error(int error, myf errflag); virtual bool get_error_message(int error, String *buf); uint get_dup_key(int error); - void change_table_ptr(TABLE *table_arg) { table=table_arg; } + void change_table_ptr(TABLE *table_arg, TABLE_SHARE *share) + { + table= table_arg; + table_share= share; + } virtual double scan_time() { return ulonglong2double(data_file_length) / IO_SIZE + 2; } virtual double read_time(uint index, uint ranges, ha_rows rows) { return rows2double(ranges+rows); } virtual const key_map *keys_to_use_for_scanning() { return &key_map_empty; } virtual bool has_transactions(){ return 0;} - virtual uint extra_rec_buf_length() { return 0; } + virtual uint extra_rec_buf_length() const { return 0; } /* Return upper bound of current number of records in the table @@ -586,12 +937,12 @@ public: virtual const char *index_type(uint key_number) { DBUG_ASSERT(0); return "";} - int ha_index_init(uint idx) + int ha_index_init(uint idx, bool sorted) { DBUG_ENTER("ha_index_init"); DBUG_ASSERT(inited==NONE); inited=INDEX; - DBUG_RETURN(index_init(idx)); + DBUG_RETURN(index_init(idx, sorted)); } int ha_index_end() { @@ -614,19 +965,224 @@ public: inited=NONE; DBUG_RETURN(rnd_end()); } + int ha_reset() + { + DBUG_ENTER("ha_reset"); + ha_clear_all_set(); + DBUG_RETURN(reset()); + } + /* this is necessary in many places, e.g. in HANDLER command */ int ha_index_or_rnd_end() { return inited == INDEX ? ha_index_end() : inited == RND ? ha_rnd_end() : 0; } + /* + These are a set of routines used to enable handlers to only read/write + partial lists of the fields in the table. The bit vector is maintained + by the server part and is used by the handler at calls to read/write + data in the table. + It replaces the use of query id's for this purpose. The benefit is that + the handler can also set bits in the read/write set if it has special + needs and it is also easy for other parts of the server to interact + with the handler (e.g. the replication part for row-level logging). + The routines are all part of the general handler and are not possible + to override by a handler. A handler can however set/reset bits by + calling these routines. + + The methods ha_retrieve_all_cols and ha_retrieve_all_pk are made + virtual to handle InnoDB specifics. If InnoDB doesn't need the + extra parameters HA_EXTRA_RETRIEVE_ALL_COLS and + HA_EXTRA_RETRIEVE_PRIMARY_KEY anymore then these methods need not be + virtual anymore. + */ + virtual int ha_retrieve_all_cols(); + virtual int ha_retrieve_all_pk(); + void ha_set_all_bits_in_read_set() + { + DBUG_ENTER("ha_set_all_bits_in_read_set"); + bitmap_set_all(read_set); + DBUG_VOID_RETURN; + } + void ha_set_all_bits_in_write_set() + { + DBUG_ENTER("ha_set_all_bits_in_write_set"); + bitmap_set_all(write_set); + DBUG_VOID_RETURN; + } + void ha_set_bit_in_read_set(uint fieldnr) + { + DBUG_ENTER("ha_set_bit_in_read_set"); + DBUG_PRINT("info", ("fieldnr = %d", fieldnr)); + bitmap_set_bit(read_set, fieldnr); + DBUG_VOID_RETURN; + } + void ha_clear_bit_in_read_set(uint fieldnr) + { + DBUG_ENTER("ha_clear_bit_in_read_set"); + DBUG_PRINT("info", ("fieldnr = %d", fieldnr)); + bitmap_clear_bit(read_set, fieldnr); + DBUG_VOID_RETURN; + } + void ha_set_bit_in_write_set(uint fieldnr) + { + DBUG_ENTER("ha_set_bit_in_write_set"); + DBUG_PRINT("info", ("fieldnr = %d", fieldnr)); + bitmap_set_bit(write_set, fieldnr); + DBUG_VOID_RETURN; + } + void ha_clear_bit_in_write_set(uint fieldnr) + { + DBUG_ENTER("ha_clear_bit_in_write_set"); + DBUG_PRINT("info", ("fieldnr = %d", fieldnr)); + bitmap_clear_bit(write_set, fieldnr); + DBUG_VOID_RETURN; + } + void ha_set_bit_in_rw_set(uint fieldnr, bool write_op) + { + DBUG_ENTER("ha_set_bit_in_rw_set"); + DBUG_PRINT("info", ("Set bit %u in read set", fieldnr)); + bitmap_set_bit(read_set, fieldnr); + if (!write_op) { + DBUG_VOID_RETURN; + } + else + { + DBUG_PRINT("info", ("Set bit %u in read and write set", fieldnr)); + bitmap_set_bit(write_set, fieldnr); + } + DBUG_VOID_RETURN; + } + bool ha_get_bit_in_read_set(uint fieldnr) + { + bool bit_set=bitmap_is_set(read_set,fieldnr); + DBUG_ENTER("ha_get_bit_in_read_set"); + DBUG_PRINT("info", ("bit %u = %u", fieldnr, bit_set)); + DBUG_RETURN(bit_set); + } + bool ha_get_bit_in_write_set(uint fieldnr) + { + bool bit_set=bitmap_is_set(write_set,fieldnr); + DBUG_ENTER("ha_get_bit_in_write_set"); + DBUG_PRINT("info", ("bit %u = %u", fieldnr, bit_set)); + DBUG_RETURN(bit_set); + } + bool ha_get_all_bit_in_read_set() + { + bool all_bits_set= bitmap_is_set_all(read_set); + DBUG_ENTER("ha_get_all_bit_in_read_set"); + DBUG_PRINT("info", ("all bits set = %u", all_bits_set)); + DBUG_RETURN(all_bits_set); + } + bool ha_get_all_bit_in_read_clear() + { + bool all_bits_set= bitmap_is_clear_all(read_set); + DBUG_ENTER("ha_get_all_bit_in_read_clear"); + DBUG_PRINT("info", ("all bits clear = %u", all_bits_set)); + DBUG_RETURN(all_bits_set); + } + bool ha_get_all_bit_in_write_set() + { + bool all_bits_set= bitmap_is_set_all(write_set); + DBUG_ENTER("ha_get_all_bit_in_write_set"); + DBUG_PRINT("info", ("all bits set = %u", all_bits_set)); + DBUG_RETURN(all_bits_set); + } + bool ha_get_all_bit_in_write_clear() + { + bool all_bits_set= bitmap_is_clear_all(write_set); + DBUG_ENTER("ha_get_all_bit_in_write_clear"); + DBUG_PRINT("info", ("all bits clear = %u", all_bits_set)); + DBUG_RETURN(all_bits_set); + } + void ha_set_primary_key_in_read_set(); + int ha_allocate_read_write_set(ulong no_fields); + void ha_clear_all_set(); uint get_index(void) const { return active_index; } virtual int open(const char *name, int mode, uint test_if_locked)=0; virtual int close(void)=0; - virtual int write_row(byte * buf) { return HA_ERR_WRONG_COMMAND; } - virtual int update_row(const byte * old_data, byte * new_data) - { return HA_ERR_WRONG_COMMAND; } - virtual int delete_row(const byte * buf) - { return HA_ERR_WRONG_COMMAND; } + virtual int ha_write_row(byte * buf); + virtual int ha_update_row(const byte * old_data, byte * new_data); + virtual int ha_delete_row(const byte * buf); + /* + SYNOPSIS + start_bulk_update() + RETURN + 0 Bulk update used by handler + 1 Bulk update not used, normal operation used + */ + virtual bool start_bulk_update() { return 1; } + /* + SYNOPSIS + start_bulk_delete() + RETURN + 0 Bulk delete used by handler + 1 Bulk delete not used, normal operation used + */ + virtual bool start_bulk_delete() { return 1; } + /* + SYNOPSIS + This method is similar to update_row, however the handler doesn't need + to execute the updates at this point in time. The handler can be certain + that another call to bulk_update_row will occur OR a call to + exec_bulk_update before the set of updates in this query is concluded. + + bulk_update_row() + old_data Old record + new_data New record + dup_key_found Number of duplicate keys found + RETURN + 0 Bulk delete used by handler + 1 Bulk delete not used, normal operation used + */ + virtual int bulk_update_row(const byte *old_data, byte *new_data, + uint *dup_key_found) + { + DBUG_ASSERT(FALSE); + return HA_ERR_WRONG_COMMAND; + } + /* + SYNOPSIS + After this call all outstanding updates must be performed. The number + of duplicate key errors are reported in the duplicate key parameter. + It is allowed to continue to the batched update after this call, the + handler has to wait until end_bulk_update with changing state. + + exec_bulk_update() + dup_key_found Number of duplicate keys found + RETURN + 0 Success + >0 Error code + */ + virtual int exec_bulk_update(uint *dup_key_found) + { + DBUG_ASSERT(FALSE); + return HA_ERR_WRONG_COMMAND; + } + /* + SYNOPSIS + Perform any needed clean-up, no outstanding updates are there at the + moment. + + end_bulk_update() + RETURN + Nothing + */ + virtual void end_bulk_update() { return; } + /* + SYNOPSIS + Execute all outstanding deletes and close down the bulk delete. + + end_bulk_delete() + RETURN + 0 Success + >0 Error code + */ + virtual int end_bulk_delete() + { + DBUG_ASSERT(FALSE); + return HA_ERR_WRONG_COMMAND; + } virtual int index_read(byte * buf, const byte * key, uint key_len, enum ha_rkey_function find_flag) { return HA_ERR_WRONG_COMMAND; } @@ -677,8 +1233,27 @@ public: { return 0; } virtual int extra_opt(enum ha_extra_function operation, ulong cache_size) { return extra(operation); } - virtual int reset() { return extra(HA_EXTRA_RESET); } virtual int external_lock(THD *thd, int lock_type) { return 0; } + /* + In an UPDATE or DELETE, if the row under the cursor was locked by another + transaction, and the engine used an optimistic read of the last + committed row value under the cursor, then the engine returns 1 from this + function. MySQL must NOT try to update this optimistic value. If the + optimistic value does not match the WHERE condition, MySQL can decide to + skip over this row. Currently only works for InnoDB. This can be used to + avoid unnecessary lock waits. + + If this method returns nonzero, it will also signal the storage + engine that the next read will be a locking re-read of the row. + */ + virtual bool was_semi_consistent_read() { return 0; } + /* + Tell the engine whether it should avoid unnecessary lock waits. + If yes, in an UPDATE or DELETE, if the row under the cursor was locked + by another transaction, the engine may try an optimistic read of + the last committed row value under the cursor. + */ + virtual void try_semi_consistent_read(bool) {} virtual void unlock_row() {} virtual int start_stmt(THD *thd, thr_lock_type lock_type) {return 0;} /* @@ -739,6 +1314,20 @@ public: virtual char *update_table_comment(const char * comment) { return (char*) comment;} virtual void append_create_info(String *packet) {} + /* + SYNOPSIS + is_fk_defined_on_table_or_index() + index Index to check if foreign key uses it + RETURN VALUE + TRUE Foreign key defined on table or index + FALSE No foreign key defined + DESCRIPTION + If index == MAX_KEY then a check for table is made and if index < + MAX_KEY then a check is made if the table has foreign keys and if + a foreign key uses this index (and thus the index cannot be dropped). + */ + virtual bool is_fk_defined_on_table_or_index(uint index) + { return FALSE; } virtual char* get_foreign_key_create_info() { return(NULL);} /* gets foreign key create string from InnoDB */ /* used in ALTER TABLE; 1 if changing storage engine is allowed */ @@ -754,6 +1343,12 @@ public: virtual const char *table_type() const =0; virtual const char **bas_ext() const =0; virtual ulong table_flags(void) const =0; + virtual ulong alter_table_flags(void) const { return 0; } +#ifdef WITH_PARTITION_STORAGE_ENGINE + virtual ulong partition_flags(void) const { return 0;} + virtual int get_default_no_partitions(ulonglong max_rows) { return 1;} + virtual void set_part_info(partition_info *part_info) { return; } +#endif virtual ulong index_flags(uint idx, uint part, bool all_parts) const =0; virtual ulong index_ddl_flags(KEY *wanted_index) const { return (HA_DDL_SUPPORT); } @@ -791,9 +1386,24 @@ public: */ virtual int rename_table(const char *from, const char *to); virtual int delete_table(const char *name); + virtual void drop_table(const char *name); virtual int create(const char *name, TABLE *form, HA_CREATE_INFO *info)=0; + virtual int create_handler_files(const char *name) { return FALSE;} + /* + SYNOPSIS + drop_partitions() + path Complete path of db and table name + RETURN VALUE + TRUE Failure + FALSE Success + DESCRIPTION + Drop a partition, during this operation no other activity is ongoing + in this server on the table. + */ + virtual int drop_partitions(const char *path) + { return HA_ERR_WRONG_COMMAND; } /* lock_count() can be more than one if the table is a MERGE */ virtual uint lock_count(void) const { return 1; } virtual THR_LOCK_DATA **store_lock(THD *thd, @@ -857,6 +1467,34 @@ public: Pops the top if condition stack, if stack is not empty */ virtual void cond_pop() { return; }; + virtual bool check_if_incompatible_data(HA_CREATE_INFO *create_info, + uint table_changes) + { return COMPATIBLE_DATA_NO; } + +private: + + /* + Row-level primitives for storage engines. + These should be overridden by the storage engine class. To call + these methods, use the corresponding 'ha_*' method above. + */ + friend int ndb_add_binlog_index(THD *, void *); + + virtual int write_row(byte *buf __attribute__((unused))) + { + return HA_ERR_WRONG_COMMAND; + } + + virtual int update_row(const byte *old_data __attribute__((unused)), + byte *new_data __attribute__((unused))) + { + return HA_ERR_WRONG_COMMAND; + } + + virtual int delete_row(const byte *buf __attribute__((unused))) + { + return HA_ERR_WRONG_COMMAND; + } }; /* Some extern variables used with handlers */ @@ -874,26 +1512,56 @@ extern ulong total_ha, total_ha_2pc; #define ha_rollback(thd) (ha_rollback_trans((thd), TRUE)) /* lookups */ -enum db_type ha_resolve_by_name(const char *name, uint namelen); -const char *ha_get_storage_engine(enum db_type db_type); -handler *get_new_handler(TABLE *table, MEM_ROOT *alloc, enum db_type db_type); -enum db_type ha_checktype(THD *thd, enum db_type database_type, +handlerton *ha_resolve_by_name(THD *thd, LEX_STRING *name); +handlerton *ha_resolve_by_legacy_type(THD *thd, enum legacy_db_type db_type); +const char *ha_get_storage_engine(enum legacy_db_type db_type); +handler *get_new_handler(TABLE_SHARE *share, MEM_ROOT *alloc, + handlerton *db_type); +handlerton *ha_checktype(THD *thd, enum legacy_db_type database_type, bool no_substitute, bool report_error); -bool ha_check_storage_engine_flag(enum db_type db_type, uint32 flag); + + +inline enum legacy_db_type ha_legacy_type(const handlerton *db_type) +{ + return (db_type == NULL) ? DB_TYPE_UNKNOWN : db_type->db_type; +} + +inline const char *ha_resolve_storage_engine_name(const handlerton *db_type) +{ + return db_type == NULL ? "UNKNOWN" : db_type->name; +} + +inline bool ha_check_storage_engine_flag(const handlerton *db_type, uint32 flag) +{ + return db_type == NULL ? FALSE : test(db_type->flags & flag); +} + +inline bool ha_storage_engine_is_enabled(const handlerton *db_type) +{ + return (db_type && db_type->create) ? + (db_type->state == SHOW_OPTION_YES) : FALSE; +} /* basic stuff */ int ha_init(void); +int ha_register_builtin_plugins(); +int ha_initialize_handlerton(handlerton *hton); + TYPELIB *ha_known_exts(void); int ha_panic(enum ha_panic_function flag); int ha_update_statistics(); void ha_close_connection(THD* thd); -my_bool ha_storage_engine_is_enabled(enum db_type database_type); -bool ha_flush_logs(void); +bool ha_flush_logs(handlerton *db_type); void ha_drop_database(char* path); -int ha_create_table(const char *name, HA_CREATE_INFO *create_info, +int ha_create_table(THD *thd, const char *path, + const char *db, const char *table_name, + HA_CREATE_INFO *create_info, bool update_create_info); -int ha_delete_table(THD *thd, enum db_type db_type, const char *path, - const char *alias, bool generate_warning); +int ha_delete_table(THD *thd, handlerton *db_type, const char *path, + const char *db, const char *alias, bool generate_warning); + +/* statistics and info */ +bool ha_show_status(THD *thd, handlerton *db_type, enum ha_stat_type stat); /* discovery */ int ha_create_table_from_engine(THD* thd, const char *db, const char *name); diff --git a/sql/handlerton-win.cc b/sql/handlerton-win.cc new file mode 100644 index 00000000000..9ce4eab2444 --- /dev/null +++ b/sql/handlerton-win.cc @@ -0,0 +1,72 @@ +#include "mysql_priv.h" + +extern handlerton heap_hton; +extern handlerton myisam_hton; +extern handlerton myisammrg_hton; +extern handlerton binlog_hton; +#ifdef WITH_INNOBASE_STORAGE_ENGINE +extern handlerton innobase_hton; +#endif +#ifdef WITH_BERKELEY_STORAGE_ENGINE +extern handlerton berkeley_hton; +#endif +#ifdef WITH_EXAMPLE_STORAGE_ENGINE +extern handlerton example_hton; +#endif +#ifdef WITH_ARCHIVE_STORAGE_ENGINE +extern handlerton archive_hton; +#endif +#ifdef WITH_CSV_STORAGE_ENGINE +extern handlerton tina_hton; +#endif +#ifdef WITH_BLACKHOLE_STORAGE_ENGINE +extern handlerton blackhole_hton; +#endif +#ifdef WITH_FEDERATED_STORAGE_ENGINE +extern handlerton federated_hton; +#endif +#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE +extern handlerton ndbcluster_hton; +#endif +#ifdef WITH_PARTITION_STORAGE_ENGINE +extern handlerton partition_hton; +#endif + +/* + This array is used for processing compiled in engines. +*/ +handlerton *sys_table_types[]= +{ + &heap_hton, + &myisam_hton, +#ifdef WITH_INNOBASE_STORAGE_ENGINE + &innobase_hton, +#endif +#ifdef WITH_BERKELEY_STORAGE_ENGINE + &berkeley_hton, +#endif +#ifdef WITH_EXAMPLE_STORAGE_ENGINE + &example_hton, +#endif +#ifdef WITH_ARCHIVE_STORAGE_ENGINE + &archive_hton, +#endif +#ifdef WITH_CSV_STORAGE_ENGINE + &tina_hton, +#endif +#ifdef WITH_BLACKHOLE_STORAGE_ENGINE + &blackhole_hton, +#endif +#ifdef WITH_FEDERATED_STORAGE_ENGINE + &federated_hton, +#endif +#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE + &ndbcluster_hton, +#endif +#ifdef WITH_PARTITION_STORAGE_ENGINE + &partition_hton, +#endif + &myisammrg_hton, + &binlog_hton, + NULL +}; diff --git a/sql/handlerton.cc.in b/sql/handlerton.cc.in new file mode 100644 index 00000000000..55af8cdd8cf --- /dev/null +++ b/sql/handlerton.cc.in @@ -0,0 +1,14 @@ + +#include "mysql_priv.h" + +extern handlerton heap_hton,myisam_hton,myisammrg_hton, + binlog_hton@mysql_se_decls@; + +/* + This array is used for processing compiled in engines. +*/ +handlerton *sys_table_types[]= +{ + &heap_hton,&myisam_hton@mysql_se_htons@,&myisammrg_hton,&binlog_hton,NULL +}; + diff --git a/sql/init.cc b/sql/init.cc index e53eeab8902..9f975296cb6 100644 --- a/sql/init.cc +++ b/sql/init.cc @@ -39,6 +39,7 @@ void unireg_init(ulong options) #endif VOID(strmov(reg_ext,".frm")); + reg_ext_length= 4; specialflag=SPECIAL_SAME_DB_NAME | options; /* Set options from argv */ /* Make a tab of powers of 10 */ for (i=0,nr=1.0; i < array_elements(log_10) ; i++) diff --git a/sql/item.cc b/sql/item.cc index 34dc450b924..fa5c2b5cc3b 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -1442,20 +1442,21 @@ bool agg_item_charsets(DTCollation &coll, const char *fname, Item_field::Item_field(Field *f) :Item_ident(0, NullS, *f->table_name, f->field_name), - item_equal(0), no_const_subst(0), + item_equal(0), no_const_subst(0), have_privileges(0), any_privileges(0) { set_field(f); /* - field_name and talbe_name should not point to garbage + field_name and table_name should not point to garbage if this item is to be reused */ orig_table_name= orig_field_name= ""; } + Item_field::Item_field(THD *thd, Name_resolution_context *context_arg, Field *f) - :Item_ident(context_arg, f->table->s->db, *f->table_name, f->field_name), + :Item_ident(context_arg, f->table->s->db.str, *f->table_name, f->field_name), item_equal(0), no_const_subst(0), have_privileges(0), any_privileges(0) { @@ -1522,7 +1523,7 @@ void Item_field::set_field(Field *field_par) max_length= field_par->max_length(); table_name= *field_par->table_name; field_name= field_par->field_name; - db_name= field_par->table->s->db; + db_name= field_par->table->s->db.str; alias_name_used= field_par->table->alias_name_used; unsigned_flag=test(field_par->flags & UNSIGNED_FLAG); collation.set(field_par->charset(), DERIVATION_IMPLICIT); @@ -3509,13 +3510,18 @@ bool Item_field::fix_fields(THD *thd, Item **reference) set_if_bigger(thd->lex->in_sum_func->max_arg_level, thd->lex->current_select->nest_level); } - else if (thd->set_query_id && field->query_id != thd->query_id) + else if (thd->set_query_id) { - /* We only come here in unions */ - TABLE *table=field->table; - field->query_id=thd->query_id; - table->used_fields++; - table->used_keys.intersect(field->part_of_key); + TABLE *table= field->table; + table->file->ha_set_bit_in_rw_set(field->fieldnr, + (bool)(thd->set_query_id-1)); + if (field->query_id != thd->query_id) + { + /* We only come here in unions */ + field->query_id=thd->query_id; + table->used_fields++; + table->used_keys.intersect(field->part_of_key); + } } #ifndef NO_EMBEDDED_ACCESS_CHECKS if (any_privileges) @@ -3769,15 +3775,20 @@ enum_field_types Item::field_type() const Field *Item::make_string_field(TABLE *table) { + Field *field; DBUG_ASSERT(collation.collation); if (max_length/collation.collation->mbmaxlen > CONVERT_IF_BIGGER_TO_BLOB) - return new Field_blob(max_length, maybe_null, name, table, + field= new Field_blob(max_length, maybe_null, name, collation.collation); - if (max_length > 0) - return new Field_varstring(max_length, maybe_null, name, table, + else if (max_length > 0) + field= new Field_varstring(max_length, maybe_null, name, table->s, collation.collation); - return new Field_string(max_length, maybe_null, name, table, - collation.collation); + else + field= new Field_string(max_length, maybe_null, name, + collation.collation); + if (field) + field->init(table); + return field; } @@ -3785,73 +3796,95 @@ Field *Item::make_string_field(TABLE *table) Create a field based on field_type of argument For now, this is only used to create a field for - IFNULL(x,something) + IFNULL(x,something) and time functions RETURN 0 error # Created field */ -Field *Item::tmp_table_field_from_field_type(TABLE *table) +Field *Item::tmp_table_field_from_field_type(TABLE *table, bool fixed_length) { /* The field functions defines a field to be not null if null_ptr is not 0 */ uchar *null_ptr= maybe_null ? (uchar*) "" : 0; + Field *field; switch (field_type()) { case MYSQL_TYPE_DECIMAL: case MYSQL_TYPE_NEWDECIMAL: - return new Field_new_decimal((char*) 0, max_length, null_ptr, 0, - Field::NONE, name, table, decimals, 0, + field= new Field_new_decimal((char*) 0, max_length, null_ptr, 0, + Field::NONE, name, decimals, 0, unsigned_flag); + break; case MYSQL_TYPE_TINY: - return new Field_tiny((char*) 0, max_length, null_ptr, 0, Field::NONE, - name, table, 0, unsigned_flag); + field= new Field_tiny((char*) 0, max_length, null_ptr, 0, Field::NONE, + name, 0, unsigned_flag); + break; case MYSQL_TYPE_SHORT: - return new Field_short((char*) 0, max_length, null_ptr, 0, Field::NONE, - name, table, 0, unsigned_flag); + field= new Field_short((char*) 0, max_length, null_ptr, 0, Field::NONE, + name, 0, unsigned_flag); + break; case MYSQL_TYPE_LONG: - return new Field_long((char*) 0, max_length, null_ptr, 0, Field::NONE, - name, table, 0, unsigned_flag); + field= new Field_long((char*) 0, max_length, null_ptr, 0, Field::NONE, + name, 0, unsigned_flag); + break; #ifdef HAVE_LONG_LONG case MYSQL_TYPE_LONGLONG: - return new Field_longlong((char*) 0, max_length, null_ptr, 0, Field::NONE, - name, table, 0, unsigned_flag); + field= new Field_longlong((char*) 0, max_length, null_ptr, 0, Field::NONE, + name, 0, unsigned_flag); + break; #endif case MYSQL_TYPE_FLOAT: - return new Field_float((char*) 0, max_length, null_ptr, 0, Field::NONE, - name, table, decimals, 0, unsigned_flag); + field= new Field_float((char*) 0, max_length, null_ptr, 0, Field::NONE, + name, decimals, 0, unsigned_flag); + break; case MYSQL_TYPE_DOUBLE: - return new Field_double((char*) 0, max_length, null_ptr, 0, Field::NONE, - name, table, decimals, 0, unsigned_flag); + field= new Field_double((char*) 0, max_length, null_ptr, 0, Field::NONE, + name, decimals, 0, unsigned_flag); + break; case MYSQL_TYPE_NULL: - return new Field_null((char*) 0, max_length, Field::NONE, - name, table, &my_charset_bin); + field= new Field_null((char*) 0, max_length, Field::NONE, + name, &my_charset_bin); + break; case MYSQL_TYPE_INT24: - return new Field_medium((char*) 0, max_length, null_ptr, 0, Field::NONE, - name, table, 0, unsigned_flag); + field= new Field_medium((char*) 0, max_length, null_ptr, 0, Field::NONE, + name, 0, unsigned_flag); + break; case MYSQL_TYPE_NEWDATE: case MYSQL_TYPE_DATE: - return new Field_date(maybe_null, name, table, &my_charset_bin); + field= new Field_date(maybe_null, name, &my_charset_bin); + break; case MYSQL_TYPE_TIME: - return new Field_time(maybe_null, name, table, &my_charset_bin); + field= new Field_time(maybe_null, name, &my_charset_bin); + break; case MYSQL_TYPE_TIMESTAMP: case MYSQL_TYPE_DATETIME: - return new Field_datetime(maybe_null, name, table, &my_charset_bin); + field= new Field_datetime(maybe_null, name, &my_charset_bin); + break; case MYSQL_TYPE_YEAR: - return new Field_year((char*) 0, max_length, null_ptr, 0, Field::NONE, - name, table); + field= new Field_year((char*) 0, max_length, null_ptr, 0, Field::NONE, + name); + break; case MYSQL_TYPE_BIT: - return new Field_bit_as_char(NULL, max_length, null_ptr, 0, NULL, 0, - Field::NONE, name, table); + field= new Field_bit_as_char(NULL, max_length, null_ptr, 0, NULL, 0, + Field::NONE, name); + break; default: /* This case should never be chosen */ DBUG_ASSERT(0); /* If something goes awfully wrong, it's better to get a string than die */ + case MYSQL_TYPE_STRING: + if (fixed_length && max_length < CONVERT_IF_BIGGER_TO_BLOB) + { + field= new Field_string(max_length, maybe_null, name, + collation.collation); + break; + } + /* Fall through to make_string_field() */ case MYSQL_TYPE_ENUM: case MYSQL_TYPE_SET: - case MYSQL_TYPE_STRING: case MYSQL_TYPE_VAR_STRING: case MYSQL_TYPE_VARCHAR: return make_string_field(table); @@ -3860,10 +3893,12 @@ Field *Item::tmp_table_field_from_field_type(TABLE *table) case MYSQL_TYPE_LONG_BLOB: case MYSQL_TYPE_BLOB: case MYSQL_TYPE_GEOMETRY: - return new Field_blob(max_length, maybe_null, name, table, - collation.collation); + field= new Field_blob(max_length, maybe_null, name, collation.collation); break; // Blob handled outside of case } + if (field) + field->init(table); + return field; } @@ -5082,8 +5117,9 @@ bool Item_default_value::fix_fields(THD *thd, Item **items) if (!(def_field= (Field*) sql_alloc(field_arg->field->size_of()))) goto error; memcpy(def_field, field_arg->field, field_arg->field->size_of()); - def_field->move_field(def_field->table->s->default_values - - def_field->table->record[0]); + def_field->move_field_offset((my_ptrdiff_t) + (def_field->table->s->default_values - + def_field->table->record[0])); set_field(def_field); return FALSE; @@ -5185,16 +5221,22 @@ bool Item_insert_value::fix_fields(THD *thd, Item **items) if (!def_field) return TRUE; memcpy(def_field, field_arg->field, field_arg->field->size_of()); - def_field->move_field(def_field->table->insert_values - - def_field->table->record[0]); + def_field->move_field_offset((my_ptrdiff_t) + (def_field->table->insert_values - + def_field->table->record[0])); set_field(def_field); } else { Field *tmp_field= field_arg->field; /* charset doesn't matter here, it's to avoid sigsegv only */ - set_field(new Field_null(0, 0, Field::NONE, tmp_field->field_name, - tmp_field->table, &my_charset_bin)); + tmp_field= new Field_null(0, 0, Field::NONE, field_arg->field->field_name, + &my_charset_bin); + if (tmp_field) + { + tmp_field->init(field_arg->field->table); + set_field(tmp_field); + } } return FALSE; } @@ -5987,24 +6029,31 @@ Field *Item_type_holder::make_field_by_type(TABLE *table) The field functions defines a field to be not null if null_ptr is not 0 */ uchar *null_ptr= maybe_null ? (uchar*) "" : 0; - switch (fld_type) - { + Field *field; + + switch (fld_type) { case MYSQL_TYPE_ENUM: DBUG_ASSERT(enum_set_typelib); - return new Field_enum((char *) 0, max_length, null_ptr, 0, + field= new Field_enum((char *) 0, max_length, null_ptr, 0, Field::NONE, name, - table, get_enum_pack_length(enum_set_typelib->count), + get_enum_pack_length(enum_set_typelib->count), enum_set_typelib, collation.collation); + if (field) + field->init(table); + return field; case MYSQL_TYPE_SET: DBUG_ASSERT(enum_set_typelib); - return new Field_set((char *) 0, max_length, null_ptr, 0, + field= new Field_set((char *) 0, max_length, null_ptr, 0, Field::NONE, name, - table, get_set_pack_length(enum_set_typelib->count), + get_set_pack_length(enum_set_typelib->count), enum_set_typelib, collation.collation); + if (field) + field->init(table); + return field; default: break; } - return tmp_table_field_from_field_type(table); + return tmp_table_field_from_field_type(table, 0); } diff --git a/sql/item.h b/sql/item.h index eee9bc5b284..5de69013605 100644 --- a/sql/item.h +++ b/sql/item.h @@ -368,6 +368,28 @@ public: } }; + +/* + This enum is used to report information about monotonicity of function + represented by Item* tree. + Monotonicity is defined only for Item* trees that represent table + partitioning expressions (i.e. have no subselects/user vars/PS parameters + etc etc). An Item* tree is assumed to have the same monotonicity properties + as its correspoinding function F: + + [signed] longlong F(field1, field2, ...) { + put values of field_i into table record buffer; + return item->val_int(); + } +*/ + +typedef enum monotonicity_info +{ + NON_MONOTONIC, /* none of the below holds */ + MONOTONIC_INCREASING, /* F() is unary and "x < y" => "F(x) < F(y)" */ + MONOTONIC_STRICT_INCREASING /* F() is unary and "x < y" => "F(x) <= F(y)" */ +} enum_monotonicity_info; + /*************************************************************************/ typedef bool (Item::*Item_processor)(byte *arg); @@ -393,6 +415,7 @@ public: FIELD_VARIANCE_ITEM, INSERT_VALUE_ITEM, SUBSELECT_ITEM, ROW_ITEM, CACHE_ITEM, TYPE_HOLDER, PARAM_ITEM, TRIGGER_FIELD_ITEM, DECIMAL_ITEM, + XPATH_NODESET, XPATH_NODESET_CMP, VIEW_FIXER_ITEM}; enum cond_result { COND_UNDEF,COND_OK,COND_TRUE,COND_FALSE }; @@ -465,6 +488,15 @@ public: virtual Item_result cast_to_int_type() const { return result_type(); } virtual enum_field_types field_type() const; virtual enum Type type() const =0; + + /* + Return information about function monotonicity. See comment for + enum_monotonicity_info for details. This function can only be called + after fix_fields() call. + */ + virtual enum_monotonicity_info get_monotonicity_info() const + { return NON_MONOTONIC; } + /* valXXX methods must return NULL or 0 or 0.0 if null_value is set. */ /* Return double precision floating point representation of item. @@ -555,6 +587,7 @@ public: TRUE value is true (not equal to 0) */ virtual bool val_bool(); + virtual String *val_nodeset(String*) { return 0; } /* Helper functions, see item_sum.cc */ String *val_string_from_real(String *str); String *val_string_from_int(String *str); @@ -729,7 +762,7 @@ public: // used in row subselects to get value of elements virtual void bring_value() {} - Field *tmp_table_field_from_field_type(TABLE *table); + Field *tmp_table_field_from_field_type(TABLE *table, bool fixed_length); virtual Item_field *filed_for_view_update() { return 0; } virtual Item *neg_transformer(THD *thd) { return NULL; } @@ -1138,6 +1171,10 @@ public: { return field->type(); } + enum_monotonicity_info get_monotonicity_info() const + { + return MONOTONIC_STRICT_INCREASING; + } Field *get_tmp_table_field() { return result_field; } Field *tmp_table_field(TABLE *t_arg) { return result_field; } bool get_date(TIME *ltime,uint fuzzydate); @@ -1907,6 +1944,7 @@ public: #include "item_timefunc.h" #include "item_uniq.h" #include "item_subselect.h" +#include "item_xmlfunc.h" class Item_copy_string :public Item { diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc index 15614a32c39..0d2056e9e99 100644 --- a/sql/item_cmpfunc.cc +++ b/sql/item_cmpfunc.cc @@ -1232,7 +1232,7 @@ enum_field_types Item_func_ifnull::field_type() const Field *Item_func_ifnull::tmp_table_field(TABLE *table) { - return tmp_table_field_from_field_type(table); + return tmp_table_field_from_field_type(table, 0); } double diff --git a/sql/item_create.cc b/sql/item_create.cc index 342ef245a76..17f1fbca471 100644 --- a/sql/item_create.cc +++ b/sql/item_create.cc @@ -462,7 +462,6 @@ Item *create_func_cast(Item *a, Cast_target cast_type, int len, int dec, CHARSET_INFO *cs) { Item *res; - LINT_INIT(res); switch (cast_type) { case ITEM_CAST_BINARY: res= new Item_func_binary(a); break; @@ -478,6 +477,10 @@ Item *create_func_cast(Item *a, Cast_target cast_type, int len, int dec, res= new Item_char_typecast(a, len, cs ? cs : current_thd->variables.collation_connection); break; + default: + DBUG_ASSERT(0); + res= 0; + break; } return res; } @@ -499,6 +502,16 @@ Item *create_func_quote(Item* a) return new Item_func_quote(a); } +Item *create_func_xml_extractvalue(Item *a, Item *b) +{ + return new Item_func_xml_extractvalue(a, b); +} + +Item *create_func_xml_update(Item *a, Item *b, Item *c) +{ + return new Item_func_xml_update(a, b, c); +} + #ifdef HAVE_SPATIAL Item *create_func_as_wkt(Item *a) { diff --git a/sql/item_create.h b/sql/item_create.h index 35db9be3c89..c76dc6b9ad7 100644 --- a/sql/item_create.h +++ b/sql/item_create.h @@ -102,7 +102,8 @@ Item *create_load_file(Item* a); Item *create_func_is_free_lock(Item* a); Item *create_func_is_used_lock(Item* a); Item *create_func_quote(Item* a); - +Item *create_func_xml_extractvalue(Item *a, Item *b); +Item *create_func_xml_update(Item *a, Item *b, Item *c); #ifdef HAVE_SPATIAL Item *create_func_geometry_from_text(Item *a); diff --git a/sql/item_func.cc b/sql/item_func.cc index 272e77a4318..c4c03e1cbf2 100644 --- a/sql/item_func.cc +++ b/sql/item_func.cc @@ -362,41 +362,43 @@ bool Item_func::eq(const Item *item, bool binary_cmp) const } -Field *Item_func::tmp_table_field(TABLE *t_arg) +Field *Item_func::tmp_table_field(TABLE *table) { - Field *res; - LINT_INIT(res); + Field *field; + LINT_INIT(field); switch (result_type()) { case INT_RESULT: if (max_length > 11) - res= new Field_longlong(max_length, maybe_null, name, t_arg, - unsigned_flag); + field= new Field_longlong(max_length, maybe_null, name, unsigned_flag); else - res= new Field_long(max_length, maybe_null, name, t_arg, - unsigned_flag); + field= new Field_long(max_length, maybe_null, name, unsigned_flag); break; case REAL_RESULT: - res= new Field_double(max_length, maybe_null, name, t_arg, decimals); + field= new Field_double(max_length, maybe_null, name, decimals); break; case STRING_RESULT: - res= make_string_field(t_arg); + return make_string_field(table); break; case DECIMAL_RESULT: - res= new Field_new_decimal(my_decimal_precision_to_length(decimal_precision(), - decimals, - unsigned_flag), - maybe_null, name, t_arg, decimals, unsigned_flag); + field= new Field_new_decimal(my_decimal_precision_to_length(decimal_precision(), + decimals, + unsigned_flag), + maybe_null, name, decimals, unsigned_flag); break; case ROW_RESULT: default: // This case should never be chosen DBUG_ASSERT(0); + field= 0; break; } - return res; + if (field) + field->init(table); + return field; } + my_decimal *Item_func::val_decimal(my_decimal *decimal_value) { DBUG_ASSERT(fixed); @@ -4637,7 +4639,8 @@ Item_func_sp::Item_func_sp(Name_resolution_context *context_arg, sp_name *name) { maybe_null= 1; m_name->init_qname(current_thd); - dummy_table= (TABLE*) sql_calloc(sizeof(TABLE)); + dummy_table= (TABLE*) sql_calloc(sizeof(TABLE)+ sizeof(TABLE_SHARE)); + dummy_table->s= (TABLE_SHARE*) (dummy_table+1); } @@ -4648,9 +4651,11 @@ Item_func_sp::Item_func_sp(Name_resolution_context *context_arg, { maybe_null= 1; m_name->init_qname(current_thd); - dummy_table= (TABLE*) sql_calloc(sizeof(TABLE)); + dummy_table= (TABLE*) sql_calloc(sizeof(TABLE)+ sizeof(TABLE_SHARE)); + dummy_table->s= (TABLE_SHARE*) (dummy_table+1); } + void Item_func_sp::cleanup() { @@ -4705,16 +4710,15 @@ Item_func_sp::sp_result_field(void) const DBUG_RETURN(0); } } - if (!dummy_table->s) + if (!dummy_table->alias) { char *empty_name= (char *) ""; - TABLE_SHARE *share; - dummy_table->s= share= &dummy_table->share_not_to_be_used; - dummy_table->alias = empty_name; - dummy_table->maybe_null = maybe_null; + dummy_table->alias= empty_name; + dummy_table->maybe_null= maybe_null; dummy_table->in_use= current_thd; - share->table_cache_key = empty_name; - share->table_name = empty_name; + dummy_table->s->table_cache_key.str = empty_name; + dummy_table->s->table_name.str= empty_name; + dummy_table->s->db.str= empty_name; } field= m_sp->create_result_field(max_length, name, dummy_table); DBUG_RETURN(field); diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc index 88620634354..68f189ccf8c 100644 --- a/sql/item_subselect.cc +++ b/sql/item_subselect.cc @@ -133,6 +133,7 @@ Item_subselect::select_transformer(JOIN *join) bool Item_subselect::fix_fields(THD *thd_param, Item **ref) { char const *save_where= thd_param->where; + uint8 uncacheable; bool res; DBUG_ASSERT(fixed == 0); @@ -178,15 +179,17 @@ bool Item_subselect::fix_fields(THD *thd_param, Item **ref) fix_length_and_dec(); } else - return 1; - uint8 uncacheable= engine->uncacheable(); - if (uncacheable) + goto err; + + if ((uncacheable= engine->uncacheable())) { const_item_cache= 0; if (uncacheable & UNCACHEABLE_RAND) used_tables_cache|= RAND_TABLE_BIT; } fixed= 1; + +err: thd->where= save_where; return res; } @@ -1613,7 +1616,7 @@ int subselect_uniquesubquery_engine::exec() } if (!table->file->inited) - table->file->ha_index_init(tab->ref.key); + table->file->ha_index_init(tab->ref.key, 0); error= table->file->index_read(table->record[0], tab->ref.key_buff, tab->ref.key_length,HA_READ_KEY_EXACT); @@ -1666,7 +1669,7 @@ int subselect_indexsubquery_engine::exec() } if (!table->file->inited) - table->file->ha_index_init(tab->ref.key); + table->file->ha_index_init(tab->ref.key, 1); error= table->file->index_read(table->record[0], tab->ref.key_buff, tab->ref.key_length,HA_READ_KEY_EXACT); @@ -1801,7 +1804,7 @@ void subselect_uniquesubquery_engine::print(String *str) str->append(STRING_WITH_LEN("<primary_index_lookup>(")); tab->ref.items[0]->print(str); str->append(STRING_WITH_LEN(" in ")); - str->append(tab->table->s->table_name); + str->append(tab->table->s->table_name.str, tab->table->s->table_name.length); KEY *key_info= tab->table->key_info+ tab->ref.key; str->append(STRING_WITH_LEN(" on ")); str->append(key_info->name); @@ -1819,7 +1822,7 @@ void subselect_indexsubquery_engine::print(String *str) str->append(STRING_WITH_LEN("<index_lookup>(")); tab->ref.items[0]->print(str); str->append(STRING_WITH_LEN(" in ")); - str->append(tab->table->s->table_name); + str->append(tab->table->s->table_name.str, tab->table->s->table_name.length); KEY *key_info= tab->table->key_info+ tab->ref.key; str->append(STRING_WITH_LEN(" on ")); str->append(key_info->name); diff --git a/sql/item_sum.cc b/sql/item_sum.cc index a3a25ec8d6f..dc1cf6cc8b7 100644 --- a/sql/item_sum.cc +++ b/sql/item_sum.cc @@ -371,26 +371,33 @@ bool Item_sum::walk (Item_processor processor, byte *argument) Field *Item_sum::create_tmp_field(bool group, TABLE *table, uint convert_blob_length) { + Field *field; switch (result_type()) { case REAL_RESULT: - return new Field_double(max_length,maybe_null,name,table,decimals); + field= new Field_double(max_length, maybe_null, name, decimals); + break; case INT_RESULT: - return new Field_longlong(max_length,maybe_null,name,table,unsigned_flag); + field= new Field_longlong(max_length, maybe_null, name, unsigned_flag); + break; case STRING_RESULT: - if (max_length > 255 && convert_blob_length) - return new Field_varstring(convert_blob_length, maybe_null, - name, table, - collation.collation); - return make_string_field(table); + if (max_length <= 255 || !convert_blob_length) + return make_string_field(table); + field= new Field_varstring(convert_blob_length, maybe_null, + name, table->s, collation.collation); + break; case DECIMAL_RESULT: - return new Field_new_decimal(max_length, maybe_null, name, table, + field= new Field_new_decimal(max_length, maybe_null, name, decimals, unsigned_flag); + break; case ROW_RESULT: default: // This case should never be choosen DBUG_ASSERT(0); return 0; } + if (field) + field->init(table); + return field; } @@ -538,9 +545,10 @@ Item_sum_hybrid::fix_fields(THD *thd, Item **ref) Field *Item_sum_hybrid::create_tmp_field(bool group, TABLE *table, uint convert_blob_length) { + Field *field; if (args[0]->type() == Item::FIELD_ITEM) { - Field *field= ((Item_field*) args[0])->field; + field= ((Item_field*) args[0])->field; if ((field= create_tmp_field_from_field(current_thd, field, name, table, NULL, convert_blob_length))) @@ -554,16 +562,21 @@ Field *Item_sum_hybrid::create_tmp_field(bool group, TABLE *table, */ switch (args[0]->field_type()) { case MYSQL_TYPE_DATE: - return new Field_date(maybe_null, name, table, collation.collation); + field= new Field_date(maybe_null, name, collation.collation); + break; case MYSQL_TYPE_TIME: - return new Field_time(maybe_null, name, table, collation.collation); + field= new Field_time(maybe_null, name, collation.collation); + break; case MYSQL_TYPE_TIMESTAMP: case MYSQL_TYPE_DATETIME: - return new Field_datetime(maybe_null, name, table, collation.collation); - default: + field= new Field_datetime(maybe_null, name, collation.collation); break; + default: + return Item_sum::create_tmp_field(group, table, convert_blob_length); } - return Item_sum::create_tmp_field(group, table, convert_blob_length); + if (field) + field->init(table); + return field; } @@ -1065,6 +1078,7 @@ Item *Item_sum_avg::copy_or_same(THD* thd) Field *Item_sum_avg::create_tmp_field(bool group, TABLE *table, uint convert_blob_len) { + Field *field; if (group) { /* @@ -1072,14 +1086,18 @@ Field *Item_sum_avg::create_tmp_field(bool group, TABLE *table, The easyest way is to do this is to store both value in a string and unpack on access. */ - return new Field_string(((hybrid_type == DECIMAL_RESULT) ? + field= new Field_string(((hybrid_type == DECIMAL_RESULT) ? dec_bin_size : sizeof(double)) + sizeof(longlong), - 0, name, table, &my_charset_bin); + 0, name, &my_charset_bin); } - if (hybrid_type == DECIMAL_RESULT) - return new Field_new_decimal(max_length, maybe_null, name, table, + else if (hybrid_type == DECIMAL_RESULT) + field= new Field_new_decimal(max_length, maybe_null, name, decimals, unsigned_flag); - return new Field_double(max_length, maybe_null, name, table, decimals); + else + field= new Field_double(max_length, maybe_null, name, decimals); + if (field) + field->init(table); + return field; } @@ -1244,6 +1262,7 @@ Item *Item_sum_variance::copy_or_same(THD* thd) Field *Item_sum_variance::create_tmp_field(bool group, TABLE *table, uint convert_blob_len) { + Field *field; if (group) { /* @@ -1251,15 +1270,19 @@ Field *Item_sum_variance::create_tmp_field(bool group, TABLE *table, The easyest way is to do this is to store both value in a string and unpack on access. */ - return new Field_string(((hybrid_type == DECIMAL_RESULT) ? + field= new Field_string(((hybrid_type == DECIMAL_RESULT) ? dec_bin_size0 + dec_bin_size1 : sizeof(double)*2) + sizeof(longlong), - 0, name, table, &my_charset_bin); + 0, name, &my_charset_bin); } - if (hybrid_type == DECIMAL_RESULT) - return new Field_new_decimal(max_length, maybe_null, name, table, + else if (hybrid_type == DECIMAL_RESULT) + field= new Field_new_decimal(max_length, maybe_null, name, decimals, unsigned_flag); - return new Field_double(max_length, maybe_null,name,table,decimals); + else + field= new Field_double(max_length, maybe_null, name, decimals); + if (field) + field->init(table); + return field; } @@ -2524,7 +2547,7 @@ bool Item_sum_count_distinct::setup(THD *thd) table->file->extra(HA_EXTRA_NO_ROWS); // Don't update rows table->no_rows=1; - if (table->s->db_type == DB_TYPE_HEAP) + if (table->s->db_type == &heap_hton) { /* No blobs, otherwise it would have been MyISAM: set up a compare @@ -2639,7 +2662,7 @@ bool Item_sum_count_distinct::add() */ return tree->unique_add(table->record[0] + table->s->null_bytes); } - if ((error= table->file->write_row(table->record[0])) && + if ((error= table->file->ha_write_row(table->record[0])) && error != HA_ERR_FOUND_DUPP_KEY && error != HA_ERR_FOUND_DUPP_UNIQUE) return TRUE; diff --git a/sql/item_timefunc.cc b/sql/item_timefunc.cc index 61449d3c671..3560a74ddb2 100644 --- a/sql/item_timefunc.cc +++ b/sql/item_timefunc.cc @@ -885,6 +885,19 @@ longlong Item_func_to_days::val_int() return (longlong) calc_daynr(ltime.year,ltime.month,ltime.day); } +enum_monotonicity_info Item_func_to_days::get_monotonicity_info() const +{ + if (args[0]->type() == Item::FIELD_ITEM) + { + if (args[0]->field_type() == MYSQL_TYPE_DATE) + return MONOTONIC_STRICT_INCREASING; + if (args[0]->field_type() == MYSQL_TYPE_DATETIME) + return MONOTONIC_INCREASING; + } + return NON_MONOTONIC; +} + + longlong Item_func_dayofyear::val_int() { DBUG_ASSERT(fixed == 1); @@ -1067,6 +1080,14 @@ longlong Item_func_year::val_int() return (longlong) ltime.year; } +enum_monotonicity_info Item_func_year::get_monotonicity_info() const +{ + if (args[0]->type() == Item::FIELD_ITEM && + (args[0]->field_type() == MYSQL_TYPE_DATE || + args[0]->field_type() == MYSQL_TYPE_DATETIME)) + return MONOTONIC_INCREASING; + return NON_MONOTONIC; +} longlong Item_func_unix_timestamp::val_int() { @@ -3002,18 +3023,6 @@ get_date_time_result_type(const char *format, uint length) } -Field *Item_func_str_to_date::tmp_table_field(TABLE *t_arg) -{ - if (cached_field_type == MYSQL_TYPE_TIME) - return (new Field_time(maybe_null, name, t_arg, &my_charset_bin)); - if (cached_field_type == MYSQL_TYPE_DATE) - return (new Field_date(maybe_null, name, t_arg, &my_charset_bin)); - if (cached_field_type == MYSQL_TYPE_DATETIME) - return (new Field_datetime(maybe_null, name, t_arg, &my_charset_bin)); - return (new Field_string(max_length, maybe_null, name, t_arg, &my_charset_bin)); -} - - void Item_func_str_to_date::fix_length_and_dec() { char format_buff[64]; diff --git a/sql/item_timefunc.h b/sql/item_timefunc.h index 71f595184ec..9a2cb7a4c9e 100644 --- a/sql/item_timefunc.h +++ b/sql/item_timefunc.h @@ -65,6 +65,7 @@ public: max_length=6*MY_CHARSET_BIN_MB_MAXLEN; maybe_null=1; } + enum_monotonicity_info get_monotonicity_info() const; }; @@ -234,6 +235,7 @@ public: Item_func_year(Item *a) :Item_int_func(a) {} longlong val_int(); const char *func_name() const { return "year"; } + enum_monotonicity_info get_monotonicity_info() const; void fix_length_and_dec() { decimals=0; @@ -340,10 +342,10 @@ public: max_length=MAX_DATE_WIDTH*MY_CHARSET_BIN_MB_MAXLEN; } int save_in_field(Field *to, bool no_conversions); - Field *tmp_table_field(TABLE *t_arg) + Field *tmp_table_field(TABLE *table) { - return (new Field_date(maybe_null, name, t_arg, &my_charset_bin)); - } + return tmp_table_field_from_field_type(table, 0); + } }; @@ -355,9 +357,9 @@ public: Item_date_func(Item *a,Item *b) :Item_str_func(a,b) {} Item_date_func(Item *a,Item *b, Item *c) :Item_str_func(a,b,c) {} enum_field_types field_type() const { return MYSQL_TYPE_DATETIME; } - Field *tmp_table_field(TABLE *t_arg) + Field *tmp_table_field(TABLE *table) { - return (new Field_datetime(maybe_null, name, t_arg, &my_charset_bin)); + return tmp_table_field_from_field_type(table, 0); } }; @@ -378,9 +380,9 @@ public: longlong val_int() { DBUG_ASSERT(fixed == 1); return value; } String *val_str(String *str); void fix_length_and_dec(); - Field *tmp_table_field(TABLE *t_arg) + Field *tmp_table_field(TABLE *table) { - return (new Field_time(maybe_null, name, t_arg, &my_charset_bin)); + return tmp_table_field_from_field_type(table, 0); } /* Abstract method that defines which time zone is used for conversion. @@ -618,9 +620,9 @@ public: } enum_field_types field_type() const { return MYSQL_TYPE_TIME; } const char *func_name() const { return "sec_to_time"; } - Field *tmp_table_field(TABLE *t_arg) + Field *tmp_table_field(TABLE *table) { - return (new Field_time(maybe_null, name, t_arg, &my_charset_bin)); + return tmp_table_field_from_field_type(table, 0); } }; @@ -741,9 +743,9 @@ public: bool get_date(TIME *ltime, uint fuzzy_date); const char *cast_type() const { return "date"; } enum_field_types field_type() const { return MYSQL_TYPE_DATE; } - Field *tmp_table_field(TABLE *t_arg) + Field *tmp_table_field(TABLE *table) { - return (new Field_date(maybe_null, name, t_arg, &my_charset_bin)); + return tmp_table_field_from_field_type(table, 0); } void fix_length_and_dec() { @@ -763,9 +765,9 @@ public: bool get_time(TIME *ltime); const char *cast_type() const { return "time"; } enum_field_types field_type() const { return MYSQL_TYPE_TIME; } - Field *tmp_table_field(TABLE *t_arg) + Field *tmp_table_field(TABLE *table) { - return (new Field_time(maybe_null, name, t_arg, &my_charset_bin)); + return tmp_table_field_from_field_type(table, 0); } }; @@ -778,9 +780,9 @@ public: String *val_str(String *str); const char *cast_type() const { return "datetime"; } enum_field_types field_type() const { return MYSQL_TYPE_DATETIME; } - Field *tmp_table_field(TABLE *t_arg) + Field *tmp_table_field(TABLE *table) { - return (new Field_datetime(maybe_null, name, t_arg, &my_charset_bin)); + return tmp_table_field_from_field_type(table, 0); } }; @@ -796,9 +798,9 @@ public: decimals=0; max_length=MAX_DATE_WIDTH*MY_CHARSET_BIN_MB_MAXLEN; } - Field *tmp_table_field(TABLE *t_arg) + Field *tmp_table_field(TABLE *table) { - return (new Field_date(maybe_null, name, t_arg, &my_charset_bin)); + return tmp_table_field_from_field_type(table, 0); } }; @@ -816,18 +818,9 @@ public: enum_field_types field_type() const { return cached_field_type; } void fix_length_and_dec(); -/* - TODO: - Change this when we support - microseconds in TIME/DATETIME -*/ - Field *tmp_table_field(TABLE *t_arg) + Field *tmp_table_field(TABLE *table) { - if (cached_field_type == MYSQL_TYPE_TIME) - return (new Field_time(maybe_null, name, t_arg, &my_charset_bin)); - else if (cached_field_type == MYSQL_TYPE_DATETIME) - return (new Field_datetime(maybe_null, name, t_arg, &my_charset_bin)); - return (new Field_string(max_length, maybe_null, name, t_arg, &my_charset_bin)); + return tmp_table_field_from_field_type(table, 0); } void print(String *str); const char *func_name() const { return "add_time"; } @@ -847,9 +840,9 @@ public: max_length=MAX_TIME_WIDTH*MY_CHARSET_BIN_MB_MAXLEN; maybe_null= 1; } - Field *tmp_table_field(TABLE *t_arg) + Field *tmp_table_field(TABLE *table) { - return (new Field_time(maybe_null, name, t_arg, &my_charset_bin)); + return tmp_table_field_from_field_type(table, 0); } }; @@ -866,9 +859,9 @@ public: decimals=0; max_length=MAX_TIME_WIDTH*MY_CHARSET_BIN_MB_MAXLEN; } - Field *tmp_table_field(TABLE *t_arg) + Field *tmp_table_field(TABLE *table) { - return (new Field_time(maybe_null, name, t_arg, &my_charset_bin)); + return tmp_table_field_from_field_type(table, 0); } }; @@ -942,7 +935,10 @@ public: const char *func_name() const { return "str_to_date"; } enum_field_types field_type() const { return cached_field_type; } void fix_length_and_dec(); - Field *tmp_table_field(TABLE *t_arg); + Field *tmp_table_field(TABLE *table) + { + return tmp_table_field_from_field_type(table, 1); + } }; diff --git a/sql/item_uniq.cc b/sql/item_uniq.cc index 79b2ca68f4f..9db8228b345 100644 --- a/sql/item_uniq.cc +++ b/sql/item_uniq.cc @@ -25,5 +25,8 @@ Field *Item_sum_unique_users::create_tmp_field(bool group, TABLE *table, uint convert_blob_length) { - return new Field_long(9,maybe_null,name,table,1); + Field *field= new Field_long(9, maybe_null, name, 1); + if (field) + field->init(table); + return field; } diff --git a/sql/item_xmlfunc.cc b/sql/item_xmlfunc.cc new file mode 100644 index 00000000000..bb5775780fa --- /dev/null +++ b/sql/item_xmlfunc.cc @@ -0,0 +1,2572 @@ +/* Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + + +#ifdef __GNUC__ +#pragma implementation +#endif + +#include "mysql_priv.h" +#include "my_xml.h" + + +/* + TODO: future development directions: + 1. add real constants for XPATH_NODESET_CMP and XPATH_NODESET + into enum Type in item.h. + 2. add nodeset_to_nodeset_comparator + 3. add lacking functions: + - name() + - last() + - lang() + - string() + - id() + - translate() + - local-name() + - starts-with() + - namespace-uri() + - substring-after() + - normalize-space() + - substring-before() + 4. add lacking axis: + - following-sibling + - following, + - preceding-sibling + - preceding +*/ + + +/* Structure to store a parsed XML tree */ +typedef struct my_xml_node_st +{ + uint level; /* level in XML tree, 0 means root node */ + enum my_xml_node_type type; /* node type: node, or attribute, or text */ + uint parent; /* link to the parent */ + const char *beg; /* beginning of the name or text */ + const char *end; /* end of the name or text */ + const char *tagend; /* where this tag ends */ +} MY_XML_NODE; + + +/* Lexical analizer token */ +typedef struct my_xpath_lex_st +{ + int term; /* token type, see MY_XPATH_LEX_XXXXX below */ + const char *beg; /* beginnign of the token */ + const char *end; /* end of the token */ +} MY_XPATH_LEX; + + +/* Structure to store nodesets */ +typedef struct my_xpath_flt_st +{ + uint num; /* absolute position in MY_XML_NODE array */ + uint pos; /* relative position in context */ +} MY_XPATH_FLT; + + +/* XPath function creator */ +typedef struct my_xpath_function_names_st +{ + const char *name; /* function name */ + size_t length; /* function name length */ + size_t minargs; /* min number of arguments */ + size_t maxargs; /* max number of arguments */ + Item *(*create)(struct my_xpath_st *xpath, Item **args, uint nargs); +} MY_XPATH_FUNC; + + +/* XPath query parser */ +typedef struct my_xpath_st +{ + int debug; + MY_XPATH_LEX query; /* Whole query */ + MY_XPATH_LEX lasttok; /* last scanned token */ + MY_XPATH_LEX prevtok; /* previous scanned token */ + int axis; /* last scanned axis */ + int extra; /* last scanned "extra", context dependent */ + MY_XPATH_FUNC *func; /* last scanned function creator */ + Item *item; /* current expression */ + Item *context; /* last scanned context */ + String *context_cache; /* last context provider */ + String *pxml; /* Parsed XML, an array of MY_XML_NODE */ + CHARSET_INFO *cs; /* character set/collation string comparison */ +} MY_XPATH; + + +/* Dynamic array of MY_XPATH_FLT */ +class XPathFilter :public String +{ +public: + XPathFilter() :String() {} + inline bool append_element(MY_XPATH_FLT *flt) + { + String *str= this; + return str->append((const char*)flt, (uint32) sizeof(MY_XPATH_FLT)); + } + inline bool append_element(uint32 num, uint32 pos) + { + MY_XPATH_FLT add; + add.num= num; + add.pos= pos; + return append_element(&add); + } + inline MY_XPATH_FLT *element(uint i) + { + return (MY_XPATH_FLT*) (ptr() + i * sizeof(MY_XPATH_FLT)); + } + inline uint32 numelements() + { + return length() / sizeof(MY_XPATH_FLT); + } +}; + + +/* + Common features of the functions returning a node set. +*/ +class Item_nodeset_func :public Item_str_func +{ +protected: + String tmp_value, tmp2_value; + MY_XPATH_FLT *fltbeg, *fltend; + MY_XML_NODE *nodebeg, *nodeend; + uint numnodes; +public: + String *pxml; + String context_cache; + Item_nodeset_func(String *pxml_arg) :Item_str_func(), pxml(pxml_arg) {} + Item_nodeset_func(Item *a, String *pxml_arg) + :Item_str_func(a), pxml(pxml_arg) {} + Item_nodeset_func(Item *a, Item *b, String *pxml_arg) + :Item_str_func(a, b), pxml(pxml_arg) {} + Item_nodeset_func(Item *a, Item *b, Item *c, String *pxml_arg) + :Item_str_func(a,b,c), pxml(pxml_arg) {} + void prepare_nodes() + { + nodebeg= (MY_XML_NODE*) pxml->ptr(); + nodeend= (MY_XML_NODE*) (pxml->ptr() + pxml->length()); + numnodes= nodeend - nodebeg; + } + void prepare(String *nodeset) + { + prepare_nodes(); + String *res= args[0]->val_nodeset(&tmp_value); + fltbeg= (MY_XPATH_FLT*) res->ptr(); + fltend= (MY_XPATH_FLT*) (res->ptr() + res->length()); + nodeset->length(0); + } + enum Type type() const { return XPATH_NODESET; } + String *val_str(String *str) + { + prepare_nodes(); + String *res= val_nodeset(&tmp2_value); + fltbeg= (MY_XPATH_FLT*) res->ptr(); + fltend= (MY_XPATH_FLT*) (res->ptr() + res->length()); + String active; + active.alloc(numnodes); + bzero((char*) active.ptr(), numnodes); + for (MY_XPATH_FLT *flt= fltbeg; flt < fltend; flt++) + { + MY_XML_NODE *node; + uint j; + for (j=0, node= nodebeg ; j < numnodes; j++, node++) + { + if (node->type == MY_XML_NODE_TEXT && + node->parent == flt->num) + active[j]= 1; + } + } + + str->length(0); + str->set_charset(collation.collation); + for (uint i=0 ; i < numnodes; i++) + { + if(active[i]) + { + if (str->length()) + str->append(" ", 1, &my_charset_latin1); + str->append(nodebeg[i].beg, nodebeg[i].end - nodebeg[i].beg); + } + } + return str; + } + enum Item_result result_type () const { return STRING_RESULT; } + void fix_length_and_dec() { max_length= MAX_BLOB_WIDTH; } + const char *func_name() const { return "nodeset"; } +}; + + +/* Returns an XML root */ +class Item_nodeset_func_rootelement :public Item_nodeset_func +{ +public: + Item_nodeset_func_rootelement(String *pxml): Item_nodeset_func(pxml) {} + const char *func_name() const { return "xpath_rootelement"; } + String *val_nodeset(String *nodeset); +}; + + +/* Returns a Union of two node sets */ +class Item_nodeset_func_union :public Item_nodeset_func +{ +public: + Item_nodeset_func_union(Item *a, Item *b, String *pxml) + :Item_nodeset_func(a, b, pxml) {} + const char *func_name() const { return "xpath_union"; } + String *val_nodeset(String *nodeset); +}; + + +/* Makes one step towards the given axis */ +class Item_nodeset_func_axisbyname :public Item_nodeset_func +{ + const char *node_name; + uint node_namelen; +public: + Item_nodeset_func_axisbyname(Item *a, const char *n_arg, uint l_arg, + String *pxml): + Item_nodeset_func(a, pxml), node_name(n_arg), node_namelen(l_arg) { } + const char *func_name() const { return "xpath_axisbyname"; } + bool validname(MY_XML_NODE *n) + { + if (node_name[0] == '*') + return 1; + return (node_namelen == (uint) (n->end - n->beg)) && + !memcmp(node_name, n->beg, node_namelen); + } +}; + + +/* Returns children */ +class Item_nodeset_func_childbyname: public Item_nodeset_func_axisbyname +{ +public: + Item_nodeset_func_childbyname(Item *a, const char *n_arg, uint l_arg, + String *pxml): + Item_nodeset_func_axisbyname(a, n_arg, l_arg, pxml) {} + const char *func_name() const { return "xpath_childbyname"; } + String *val_nodeset(String *nodeset); +}; + + +/* Returns descendants */ +class Item_nodeset_func_descendantbyname: public Item_nodeset_func_axisbyname +{ + bool need_self; +public: + Item_nodeset_func_descendantbyname(Item *a, const char *n_arg, uint l_arg, + String *pxml, bool need_self_arg): + Item_nodeset_func_axisbyname(a, n_arg, l_arg, pxml), + need_self(need_self_arg) {} + const char *func_name() const { return "xpath_descendantbyname"; } + String *val_nodeset(String *nodeset); +}; + + +/* Returns ancestors */ +class Item_nodeset_func_ancestorbyname: public Item_nodeset_func_axisbyname +{ + bool need_self; +public: + Item_nodeset_func_ancestorbyname(Item *a, const char *n_arg, uint l_arg, + String *pxml, bool need_self_arg): + Item_nodeset_func_axisbyname(a, n_arg, l_arg, pxml), + need_self(need_self_arg) {} + const char *func_name() const { return "xpath_ancestorbyname"; } + String *val_nodeset(String *nodeset); +}; + + +/* Returns parents */ +class Item_nodeset_func_parentbyname: public Item_nodeset_func_axisbyname +{ +public: + Item_nodeset_func_parentbyname(Item *a, const char *n_arg, uint l_arg, + String *pxml): + Item_nodeset_func_axisbyname(a, n_arg, l_arg, pxml) {} + const char *func_name() const { return "xpath_parentbyname"; } + String *val_nodeset(String *nodeset); +}; + + +/* Returns attributes */ +class Item_nodeset_func_attributebyname: public Item_nodeset_func_axisbyname +{ +public: + Item_nodeset_func_attributebyname(Item *a, const char *n_arg, uint l_arg, + String *pxml): + Item_nodeset_func_axisbyname(a, n_arg, l_arg, pxml) {} + const char *func_name() const { return "xpath_attributebyname"; } + String *val_nodeset(String *nodeset); +}; + + +/* + Condition iterator: goes through all nodes in the current + context and checks a condition, returning those nodes + giving TRUE condition result. +*/ +class Item_nodeset_func_predicate :public Item_nodeset_func +{ +public: + Item_nodeset_func_predicate(Item *a, Item *b, String *pxml): + Item_nodeset_func(a, b, pxml) {} + const char *func_name() const { return "xpath_predicate"; } + String *val_nodeset(String *nodeset); +}; + + +/* Selects nodes with a given position in context */ +class Item_nodeset_func_elementbyindex :public Item_nodeset_func +{ +public: + Item_nodeset_func_elementbyindex(Item *a, Item *b, String *pxml): + Item_nodeset_func(a, b, pxml) { } + const char *func_name() const { return "xpath_elementbyindex"; } + String *val_nodeset(String *nodeset); +}; + + +/* + We need to distinguish a number from a boolean: + a[1] and a[true] are different things in XPath. +*/ +class Item_bool :public Item_int +{ +public: + Item_bool(int32 i): Item_int(i) {} + const char *func_name() const { return "xpath_bool"; } + bool is_bool_func() { return 1; } +}; + + +/* + Converts its argument into a boolean value. + * a number is true if it is non-zero + * a node-set is true if and only if it is non-empty + * a string is true if and only if its length is non-zero +*/ +class Item_xpath_cast_bool :public Item_int_func +{ + String *pxml; + String tmp_value; +public: + Item_xpath_cast_bool(Item *a, String *pxml_arg) + :Item_int_func(a), pxml(pxml_arg) {} + const char *func_name() const { return "xpath_cast_bool"; } + bool is_bool_func() { return 1; } + longlong val_int() + { + if (args[0]->type() == XPATH_NODESET) + { + String *flt= args[0]->val_nodeset(&tmp_value); + return flt->length() == sizeof(MY_XPATH_FLT) ? 1 : 0; + } + return args[0]->val_real() ? 1 : 0; + } +}; + + +/* + Converts its argument into a number +*/ +class Item_xpath_cast_number :public Item_real_func +{ +public: + Item_xpath_cast_number(Item *a): Item_real_func(a) {} + const char *func_name() const { return "xpath_cast_number"; } + virtual double val_real() { return args[0]->val_real(); } +}; + + +/* + Context cache, for predicate +*/ +class Item_nodeset_context_cache :public Item_nodeset_func +{ +public: + String *string_cache; + Item_nodeset_context_cache(String *str_arg, String *pxml): + Item_nodeset_func(pxml), string_cache(str_arg) { } + String *val_nodeset(String *res) + { return string_cache; } + void fix_length_and_dec() { max_length= MAX_BLOB_WIDTH; } +}; + + +class Item_func_xpath_position :public Item_int_func +{ + String *pxml; + String tmp_value; +public: + Item_func_xpath_position(Item *a, String *p) + :Item_int_func(a), pxml(p) {} + const char *func_name() const { return "xpath_position"; } + void fix_length_and_dec() { max_length=10; } + longlong val_int() + { + String *flt= args[0]->val_nodeset(&tmp_value); + if (flt->length() == sizeof(MY_XPATH_FLT)) + return ((MY_XPATH_FLT*)flt->ptr())->pos + 1; + return 0; + } +}; + + +class Item_func_xpath_count :public Item_int_func +{ + String *pxml; + String tmp_value; +public: + Item_func_xpath_count(Item *a, String *p) + :Item_int_func(a), pxml(p) {} + const char *func_name() const { return "xpath_count"; } + void fix_length_and_dec() { max_length=10; } + longlong val_int() + { + String *res= args[0]->val_nodeset(&tmp_value); + return res->length() / sizeof(MY_XPATH_FLT); + } +}; + + +class Item_func_xpath_sum :public Item_real_func +{ + String *pxml; + String tmp_value; +public: + Item_func_xpath_sum(Item *a, String *p) + :Item_real_func(a), pxml(p) {} + + const char *func_name() const { return "xpath_sum"; } + double val_real() + { + double sum= 0; + String *res= args[0]->val_nodeset(&tmp_value); + MY_XPATH_FLT *fltbeg= (MY_XPATH_FLT*) res->ptr(); + MY_XPATH_FLT *fltend= (MY_XPATH_FLT*) (res->ptr() + res->length()); + uint numnodes= pxml->length() / sizeof(MY_XML_NODE); + MY_XML_NODE *nodebeg= (MY_XML_NODE*) pxml->ptr(); + + for (MY_XPATH_FLT *flt= fltbeg; flt < fltend; flt++) + { + MY_XML_NODE *self= &nodebeg[flt->num]; + for (uint j= flt->num + 1; j < numnodes; j++) + { + MY_XML_NODE *node= &nodebeg[j]; + if (node->level <= self->level) + break; + if ((node->parent == flt->num) && + (node->type == MY_XML_NODE_TEXT)) + { + char *end; + int err; + double add= my_strntod(collation.collation, (char*) node->beg, + node->end - node->beg, &end, &err); + if (!err) + sum+= add; + } + } + } + return sum; + } +}; + + +class Item_nodeset_to_const_comparator :public Item_bool_func +{ + String *pxml; + String tmp_nodeset; +public: + Item_nodeset_to_const_comparator(Item *nodeset, Item *cmpfunc, String *p) + :Item_bool_func(nodeset,cmpfunc), pxml(p) {} + enum Type type() const { return XPATH_NODESET_CMP; }; + const char *func_name() const { return "xpath_nodeset_to_const_comparator"; } + bool is_bool_func() { return 1; } + + longlong val_int() + { + Item_func *comp= (Item_func*)args[1]; + Item_string *fake= (Item_string*)(comp->arguments()[1]); + String *res= args[0]->val_nodeset(&tmp_nodeset); + MY_XPATH_FLT *fltbeg= (MY_XPATH_FLT*) res->ptr(); + MY_XPATH_FLT *fltend= (MY_XPATH_FLT*) (res->ptr() + res->length()); + MY_XML_NODE *nodebeg= (MY_XML_NODE*) pxml->ptr(); + uint numnodes= pxml->length() / sizeof(MY_XML_NODE); + + for (MY_XPATH_FLT *flt= fltbeg; flt < fltend; flt++) + { + MY_XML_NODE *self= &nodebeg[flt->num]; + for (uint j= flt->num + 1; j < numnodes; j++) + { + MY_XML_NODE *node= &nodebeg[j]; + if (node->level <= self->level) + break; + if ((node->parent == flt->num) && + (node->type == MY_XML_NODE_TEXT)) + { + fake->str_value.set(node->beg, node->end - node->beg, + collation.collation); + if (args[1]->val_int()) + return 1; + } + } + } + return 0; + } +}; + + +String *Item_nodeset_func_rootelement::val_nodeset(String *nodeset) +{ + nodeset->length(0); + ((XPathFilter*)nodeset)->append_element(0, 0); + return nodeset; +} + + +String * Item_nodeset_func_union::val_nodeset(String *nodeset) +{ + uint numnodes= pxml->length() / sizeof(MY_XML_NODE); + String set0, *s0= args[0]->val_nodeset(&set0); + String set1, *s1= args[1]->val_nodeset(&set1); + String both_str; + both_str.alloc(numnodes); + char *both= (char*) both_str.ptr(); + bzero((void*)both, numnodes); + uint pos= 0; + MY_XPATH_FLT *flt; + + fltbeg= (MY_XPATH_FLT*) s0->ptr(); + fltend= (MY_XPATH_FLT*) (s0->ptr() + s0->length()); + for (flt= fltbeg; flt < fltend; flt++) + both[flt->num]= 1; + + fltbeg= (MY_XPATH_FLT*) s1->ptr(); + fltend= (MY_XPATH_FLT*) (s1->ptr() + s1->length()); + for (flt= fltbeg; flt < fltend; flt++) + both[flt->num]= 1; + + nodeset->length(0); + for (uint i= 0, pos= 0; i < numnodes; i++) + { + if (both[i]) + ((XPathFilter*)nodeset)->append_element(i, pos++); + } + return nodeset; +} + + +String *Item_nodeset_func_childbyname::val_nodeset(String *nodeset) +{ + prepare(nodeset); + for (MY_XPATH_FLT *flt= fltbeg; flt < fltend; flt++) + { + MY_XML_NODE *self= &nodebeg[flt->num]; + for (uint pos= 0, j= flt->num + 1 ; j < numnodes; j++) + { + MY_XML_NODE *node= &nodebeg[j]; + if (node->level <= self->level) + break; + if ((node->parent == flt->num) && + (node->type == MY_XML_NODE_TAG) && + validname(node)) + ((XPathFilter*)nodeset)->append_element(j, pos++); + } + } + return nodeset; +} + + +String *Item_nodeset_func_descendantbyname::val_nodeset(String *nodeset) +{ + prepare(nodeset); + for (MY_XPATH_FLT *flt= fltbeg; flt < fltend; flt++) + { + uint pos= 0; + MY_XML_NODE *self= &nodebeg[flt->num]; + if (need_self && validname(self)) + ((XPathFilter*)nodeset)->append_element(flt->num,pos++); + for (uint j= flt->num + 1 ; j < numnodes ; j++) + { + MY_XML_NODE *node= &nodebeg[j]; + if (node->level <= self->level) + break; + if ((node->type == MY_XML_NODE_TAG) && validname(node)) + ((XPathFilter*)nodeset)->append_element(j,pos++); + } + } + return nodeset; +} + + +String *Item_nodeset_func_ancestorbyname::val_nodeset(String *nodeset) +{ + char *active; + String active_str; + prepare(nodeset); + active_str.alloc(numnodes); + active= (char*) active_str.ptr(); + bzero((void*)active, numnodes); + uint pos= 0; + + for (MY_XPATH_FLT *flt= fltbeg; flt < fltend; flt++) + { + /* + Go to the root and add all nodes on the way. + Don't add the root if context is the root itelf + */ + MY_XML_NODE *self= &nodebeg[flt->num]; + if (need_self && validname(self)) + { + active[flt->num]= 1; + pos++; + } + + for (uint j= self->parent; nodebeg[j].parent != j; j= nodebeg[j].parent) + { + if (flt->num && validname(&nodebeg[j])) + { + active[j]= 1; + pos++; + } + } + } + + for (uint j= 0; j < numnodes ; j++) + { + if (active[j]) + ((XPathFilter*)nodeset)->append_element(j, --pos); + } + return nodeset; +} + + +String *Item_nodeset_func_parentbyname::val_nodeset(String *nodeset) +{ + char *active; + String active_str; + prepare(nodeset); + active_str.alloc(numnodes); + active= (char*) active_str.ptr(); + bzero((void*)active, numnodes); + for (MY_XPATH_FLT *flt= fltbeg; flt < fltend; flt++) + { + uint j= nodebeg[flt->num].parent; + if (flt->num && validname(&nodebeg[j])) + active[j]= 1; + } + for (uint j= 0, pos= 0; j < numnodes ; j++) + { + if (active[j]) + ((XPathFilter*)nodeset)->append_element(j, pos++); + } + return nodeset; +} + + +String *Item_nodeset_func_attributebyname::val_nodeset(String *nodeset) +{ + prepare(nodeset); + for (MY_XPATH_FLT *flt= fltbeg; flt < fltend; flt++) + { + MY_XML_NODE *self= &nodebeg[flt->num]; + for (uint pos=0, j= flt->num + 1 ; j < numnodes; j++) + { + MY_XML_NODE *node= &nodebeg[j]; + if (node->level <= self->level) + break; + if ((node->parent == flt->num) && + (node->type == MY_XML_NODE_ATTR) && + validname(node)) + ((XPathFilter*)nodeset)->append_element(j, pos++); + } + } + return nodeset; +} + + +String *Item_nodeset_func_predicate::val_nodeset(String *str) +{ + Item_nodeset_func *nodeset_func= (Item_nodeset_func*) args[0]; + Item_func *comp_func= (Item_func*)args[1]; + uint pos= 0; + prepare(str); + for (MY_XPATH_FLT *flt= fltbeg; flt < fltend; flt++) + { + nodeset_func->context_cache.length(0); + ((XPathFilter*)(&nodeset_func->context_cache))->append_element(flt->num, + flt->pos); + if (comp_func->val_int()) + ((XPathFilter*)str)->append_element(flt->num, pos++); + } + return str; +} + + +String *Item_nodeset_func_elementbyindex::val_nodeset(String *nodeset) +{ + prepare(nodeset); + int index= args[1]->val_int() - 1; + if (index >= 0) + { + MY_XPATH_FLT *flt; + uint pos; + for (pos= 0, flt= fltbeg; flt < fltend; flt++) + { + if (flt->pos == (uint) index || args[1]->is_bool_func()) + ((XPathFilter*)nodeset)->append_element(flt->num, pos++); + } + } + return nodeset; +} + + +/* + If item is a node set, then casts it to boolean, + otherwise returns the item itself. +*/ +static Item* nodeset2bool(MY_XPATH *xpath, Item *item) +{ + if (item->type() == Item::XPATH_NODESET) + return new Item_xpath_cast_bool(item, xpath->pxml); + return item; +} + + +/* + XPath lexical tokens +*/ +#define MY_XPATH_LEX_DIGITS 'd' +#define MY_XPATH_LEX_IDENT 'i' +#define MY_XPATH_LEX_STRING 's' +#define MY_XPATH_LEX_SLASH '/' +#define MY_XPATH_LEX_LB '[' +#define MY_XPATH_LEX_RB ']' +#define MY_XPATH_LEX_LP '(' +#define MY_XPATH_LEX_RP ')' +#define MY_XPATH_LEX_EQ '=' +#define MY_XPATH_LEX_LESS '<' +#define MY_XPATH_LEX_GREATER '>' +#define MY_XPATH_LEX_AT '@' +#define MY_XPATH_LEX_COLON ':' +#define MY_XPATH_LEX_ASTERISK '*' +#define MY_XPATH_LEX_DOT '.' +#define MY_XPATH_LEX_VLINE '|' +#define MY_XPATH_LEX_MINUS '-' +#define MY_XPATH_LEX_PLUS '+' +#define MY_XPATH_LEX_EXCL '!' +#define MY_XPATH_LEX_COMMA ',' +#define MY_XPATH_LEX_DOLLAR '$' +#define MY_XPATH_LEX_ERROR 'A' +#define MY_XPATH_LEX_EOF 'B' +#define MY_XPATH_LEX_AND 'C' +#define MY_XPATH_LEX_OR 'D' +#define MY_XPATH_LEX_DIV 'E' +#define MY_XPATH_LEX_MOD 'F' +#define MY_XPATH_LEX_FUNC 'G' +#define MY_XPATH_LEX_NODETYPE 'H' +#define MY_XPATH_LEX_AXIS 'I' +#define MY_XPATH_LEX_LE 'J' +#define MY_XPATH_LEX_GE 'K' + + +/* + XPath axis type +*/ +#define MY_XPATH_AXIS_ANCESTOR 0 +#define MY_XPATH_AXIS_ANCESTOR_OR_SELF 1 +#define MY_XPATH_AXIS_ATTRIBUTE 2 +#define MY_XPATH_AXIS_CHILD 3 +#define MY_XPATH_AXIS_DESCENDANT 4 +#define MY_XPATH_AXIS_DESCENDANT_OR_SELF 5 +#define MY_XPATH_AXIS_FOLLOWING 6 +#define MY_XPATH_AXIS_FOLLOWING_SIBLING 7 +#define MY_XPATH_AXIS_NAMESPACE 8 +#define MY_XPATH_AXIS_PARENT 9 +#define MY_XPATH_AXIS_PRECEDING 10 +#define MY_XPATH_AXIS_PRECEDING_SIBLING 11 +#define MY_XPATH_AXIS_SELF 12 + + +/* + Create scalar comparator + + SYNOPSYS + Create a comparator function for scalar arguments, + for the given arguments and operation. + + RETURN + The newly created item. +*/ +static Item *eq_func(int oper, Item *a, Item *b) +{ + switch (oper) + { + case '=': return new Item_func_eq(a, b); + case '!': return new Item_func_ne(a, b); + case MY_XPATH_LEX_GE: return new Item_func_ge(a, b); + case MY_XPATH_LEX_LE: return new Item_func_le(a, b); + case MY_XPATH_LEX_GREATER: return new Item_func_gt(a, b); + case MY_XPATH_LEX_LESS: return new Item_func_lt(a, b); + } + return 0; +} + + +/* + Create scalar comparator + + SYNOPSYS + Create a comparator function for scalar arguments, + for the given arguments and reverse operation, e.g. + + A >= B is converted into A < B + + RETURN + The newly created item. +*/ +static Item *eq_func_reverse(int oper, Item *a, Item *b) +{ + switch (oper) + { + case '=': return new Item_func_eq(a, b); + case '!': return new Item_func_ne(a, b); + case MY_XPATH_LEX_GE: return new Item_func_lt(a, b); + case MY_XPATH_LEX_LE: return new Item_func_gt(a, b); + case MY_XPATH_LEX_GREATER: return new Item_func_le(a, b); + case MY_XPATH_LEX_LESS: return new Item_func_ge(a, b); + } + return 0; +} + + +/* + Create a comparator + + SYNOPSYS + Create a comparator for scalar or non-scalar arguments, + for the given arguments and operation. + + RETURN + The newly created item. +*/ +static Item *create_comparator(MY_XPATH *xpath, int oper, Item *a, Item *b) +{ + if (a->type() != Item::XPATH_NODESET && + b->type() != Item::XPATH_NODESET) + { + return eq_func(oper, a, b); // two scalar arguments + } + else if (a->type() == Item::XPATH_NODESET && + b->type() == Item::XPATH_NODESET) + { + return 0; // TODO: Comparison of two nodesets + } + else + { + /* + Compare a node set to a scalar value. + We just create a fake Item_string() argument, + which will be filled to the partular value + in a loop through all of the nodes in the node set. + */ + + Item *fake= new Item_string("", 0, xpath->cs); + Item_nodeset_func *nodeset; + Item *scalar, *comp; + if (a->type() == Item::XPATH_NODESET) + { + nodeset= (Item_nodeset_func*) a; + scalar= b; + comp= eq_func(oper, scalar, fake); + } + else + { + nodeset= (Item_nodeset_func*) b; + scalar= a; + comp= eq_func_reverse(oper, scalar, fake); + } + return new Item_nodeset_to_const_comparator(nodeset, comp, xpath->pxml); + } +} + + +/* + Create a step + + SYNOPSYS + Create a step function for the given argument and axis. + + RETURN + The newly created item. +*/ +static Item* nametestfunc(MY_XPATH *xpath, + int type, Item *arg, const char *beg, uint len) +{ + DBUG_ASSERT(arg != 0); + DBUG_ASSERT(arg->type() == Item::XPATH_NODESET); + DBUG_ASSERT(beg != 0); + DBUG_ASSERT(len > 0); + + Item *res; + switch (type) + { + case MY_XPATH_AXIS_ANCESTOR: + res= new Item_nodeset_func_ancestorbyname(arg, beg, len, xpath->pxml, 0); + break; + case MY_XPATH_AXIS_ANCESTOR_OR_SELF: + res= new Item_nodeset_func_ancestorbyname(arg, beg, len, xpath->pxml, 1); + break; + case MY_XPATH_AXIS_PARENT: + res= new Item_nodeset_func_parentbyname(arg, beg, len, xpath->pxml); + break; + case MY_XPATH_AXIS_DESCENDANT: + res= new Item_nodeset_func_descendantbyname(arg, beg, len, xpath->pxml, 0); + break; + case MY_XPATH_AXIS_DESCENDANT_OR_SELF: + res= new Item_nodeset_func_descendantbyname(arg, beg, len, xpath->pxml, 1); + break; + case MY_XPATH_AXIS_ATTRIBUTE: + res= new Item_nodeset_func_attributebyname(arg, beg, len, xpath->pxml); + break; + default: + res= new Item_nodeset_func_childbyname(arg, beg, len, xpath->pxml); + } + return res; +} + + +/* + Tokens consisting of one character, for faster lexical analizer. +*/ +static char simpletok[128]= +{ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +/* + ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? + @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ + ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ € +*/ + 0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1,0, + 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0 +}; + + +/* + XPath keywords +*/ +struct my_xpath_keyword_names_st +{ + int tok; + const char *name; + size_t length; + int extra; +}; + + +static struct my_xpath_keyword_names_st my_keyword_names[] = +{ + {MY_XPATH_LEX_AND , "and" , 3, 0 }, + {MY_XPATH_LEX_OR , "or" , 2, 0 }, + {MY_XPATH_LEX_DIV , "div" , 3, 0 }, + {MY_XPATH_LEX_MOD , "mod" , 3, 0 }, + + {MY_XPATH_LEX_NODETYPE, "comment" , 7, 0 }, + {MY_XPATH_LEX_NODETYPE, "text" , 4, 0 }, + {MY_XPATH_LEX_NODETYPE, "processing-instruction" , 22,0 }, + {MY_XPATH_LEX_NODETYPE, "node" , 4, 0 }, + + {MY_XPATH_LEX_AXIS,"ancestor" , 8,MY_XPATH_AXIS_ANCESTOR }, + {MY_XPATH_LEX_AXIS,"ancestor-or-self" ,16,MY_XPATH_AXIS_ANCESTOR_OR_SELF }, + {MY_XPATH_LEX_AXIS,"attribute" , 9,MY_XPATH_AXIS_ATTRIBUTE }, + {MY_XPATH_LEX_AXIS,"child" , 5,MY_XPATH_AXIS_CHILD }, + {MY_XPATH_LEX_AXIS,"descendant" ,10,MY_XPATH_AXIS_DESCENDANT }, + {MY_XPATH_LEX_AXIS,"descendant-or-self",18,MY_XPATH_AXIS_DESCENDANT_OR_SELF}, + {MY_XPATH_LEX_AXIS,"following" , 9,MY_XPATH_AXIS_FOLLOWING }, + {MY_XPATH_LEX_AXIS,"following-sibling" ,17,MY_XPATH_AXIS_FOLLOWING_SIBLING }, + {MY_XPATH_LEX_AXIS,"namespace" , 9,MY_XPATH_AXIS_NAMESPACE }, + {MY_XPATH_LEX_AXIS,"parent" , 6,MY_XPATH_AXIS_PARENT }, + {MY_XPATH_LEX_AXIS,"preceding" , 9,MY_XPATH_AXIS_PRECEDING }, + {MY_XPATH_LEX_AXIS,"preceding-sibling" ,17,MY_XPATH_AXIS_PRECEDING_SIBLING }, + {MY_XPATH_LEX_AXIS,"self" , 4,MY_XPATH_AXIS_SELF }, + + {0,NULL,0,0} +}; + + +/* + Lookup a keyword + + SYNOPSYS + Check that the last scanned identifier is a keyword. + + RETURN + - Token type, on lookup success. + - MY_XPATH_LEX_IDENT, on lookup failure. +*/ +static int my_xpath_keyword(MY_XPATH *x, const char *beg, const char *end) +{ + struct my_xpath_keyword_names_st *k; + size_t length= end-beg; + for (k= my_keyword_names; k->name; k++) + { + if (length == k->length && !strncasecmp(beg, k->name, length)) + { + x->extra= k->extra; + return k->tok; + } + } + return MY_XPATH_LEX_IDENT; +} + + +/* + Functions to create an item, a-la those in item_create.cc +*/ + +static Item *create_func_true(MY_XPATH *xpath, Item **args, uint nargs) +{ + return new Item_bool(1); +} + + +static Item *create_func_false(MY_XPATH *xpath, Item **args, uint nargs) +{ + return new Item_bool(0); +} + + +static Item *create_func_not(MY_XPATH *xpath, Item **args, uint nargs) +{ + return new Item_func_not(nodeset2bool(xpath, args[0])); +} + + +static Item *create_func_ceiling(MY_XPATH *xpath, Item **args, uint nargs) +{ + return new Item_func_ceiling(args[0]); +} + + +static Item *create_func_floor(MY_XPATH *xpath, Item **args, uint nargs) +{ + return new Item_func_floor(args[0]); +} + + +static Item *create_func_bool(MY_XPATH *xpath, Item **args, uint nargs) +{ + return new Item_xpath_cast_bool(args[0], xpath->pxml); +} + + +static Item *create_func_number(MY_XPATH *xpath, Item **args, uint nargs) +{ + return new Item_xpath_cast_number(args[0]); +} + + +static Item *create_func_round(MY_XPATH *xpath, Item **args, uint nargs) +{ + return new Item_func_round(args[0], new Item_int((char*)"0",0,1),0); +} + + +static Item *create_func_last(MY_XPATH *xpath, Item **args, uint nargs) +{ + return new Item_func_xpath_count(xpath->context, xpath->pxml); +} + + +static Item *create_func_position(MY_XPATH *xpath, Item **args, uint nargs) +{ + return new Item_func_xpath_position(xpath->context, xpath->pxml); +} + + +static Item *create_func_contains(MY_XPATH *xpath, Item **args, uint nargs) +{ + return new Item_xpath_cast_bool(new Item_func_locate(args[0], args[1]), + xpath->pxml); +} + + +static Item *create_func_concat(MY_XPATH *xpath, Item **args, uint nargs) +{ + return new Item_func_concat(args[0], args[1]); +} + + +static Item *create_func_substr(MY_XPATH *xpath, Item **args, uint nargs) +{ + if (nargs == 2) + return new Item_func_substr(args[0], args[1]); + else + return new Item_func_substr(args[0], args[1], args[2]); +} + + +static Item *create_func_count(MY_XPATH *xpath, Item **args, uint nargs) +{ + if (args[0]->type() != Item::XPATH_NODESET) + return 0; + return new Item_func_xpath_count(args[0], xpath->pxml); +} + + +static Item *create_func_sum(MY_XPATH *xpath, Item **args, uint nargs) +{ + if (args[0]->type() != Item::XPATH_NODESET) + return 0; + return new Item_func_xpath_sum(args[0], xpath->pxml); +} + + +/* + Functions names. Separate lists for names with + lengths 3,4,5 and 6 for faster lookups. +*/ +static MY_XPATH_FUNC my_func_names3[]= +{ + {"sum", 3, 1 , 1 , create_func_sum}, + {"not", 3, 1 , 1 , create_func_not}, + {0 , 0, 0 , 0, 0} +}; + + +static MY_XPATH_FUNC my_func_names4[]= +{ + {"last", 4, 0, 0, create_func_last}, + {"true", 4, 0, 0, create_func_true}, + {"name", 4, 0, 1, 0}, + {"lang", 4, 1, 1, 0}, + {0 , 0, 0, 0, 0} +}; + + +static MY_XPATH_FUNC my_func_names5[]= +{ + {"count", 5, 1, 1, create_func_count}, + {"false", 5, 0, 0, create_func_false}, + {"floor", 5, 1, 1, create_func_floor}, + {"round", 5, 1, 1, create_func_round}, + {0 , 0, 0, 0, 0} +}; + + +static MY_XPATH_FUNC my_func_names6[]= +{ + {"concat", 6, 2, 255, create_func_concat}, + {"number", 6, 0, 1 , create_func_number}, + {"string", 6, 0, 1 , 0}, + {0 , 0, 0, 0 , 0} +}; + + +/* Other functions, with name longer than 6, all together */ +static MY_XPATH_FUNC my_func_names[] = +{ + {"id" , 2 , 1 , 1 , 0}, + {"boolean" , 7 , 1 , 1 , create_func_bool}, + {"ceiling" , 7 , 1 , 1 , create_func_ceiling}, + {"position" , 8 , 0 , 0 , create_func_position}, + {"contains" , 8 , 2 , 2 , create_func_contains}, + {"substring" , 9 , 2 , 3 , create_func_substr}, + {"translate" , 9 , 3 , 3 , 0}, + + {"local-name" , 10 , 0 , 1 , 0}, + {"starts-with" , 11 , 2 , 2 , 0}, + {"namespace-uri" , 13 , 0 , 1 , 0}, + {"substring-after" , 15 , 2 , 2 , 0}, + {"normalize-space" , 15 , 0 , 1 , 0}, + {"substring-before" , 16 , 2 , 2 , 0}, + + {NULL,0,0,0,0} +}; + + +/* + Lookup a function by name + + SYNOPSYS + Lookup a function by its name. + + RETURN + Pointer to a MY_XPATH_FUNC variable on success. + 0 - on failure. + +*/ +MY_XPATH_FUNC * +my_xpath_function(const char *beg, const char *end) +{ + MY_XPATH_FUNC *k, *function_names; + uint length= end-beg; + switch (length) + { + case 1: return 0; + case 3: function_names= my_func_names3; break; + case 4: function_names= my_func_names4; break; + case 5: function_names= my_func_names5; break; + case 6: function_names= my_func_names6; break; + default: function_names= my_func_names; + } + for (k= function_names; k->name; k++) + if (k->create && length == k->length && !strncasecmp(beg, k->name, length)) + return k; + return NULL; +} + + +/* Initialize a lex analizer token */ +static void +my_xpath_lex_init(MY_XPATH_LEX *lex, + const char *str, const char *strend) +{ + lex->beg= str; + lex->end= strend; +} + + +/* Initialize an XPath query parser */ +static void +my_xpath_init(MY_XPATH *xpath) +{ + bzero((void*)xpath, sizeof(xpath[0])); +} + + +/* + Some ctype-alike helper functions. Note, we cannot + reuse cs->ident_map[], because in Xpath, unlike in SQL, + dash character is a valid identifier part. +*/ +static int +my_xident_beg(int c) +{ + return (((c) >= 'a' && (c) <= 'z') || + ((c) >= 'A' && (c) <= 'Z') || + ((c) == '_')); +} + + +static int +my_xident_body(int c) +{ + return (((c) >= 'a' && (c) <= 'z') || + ((c) >= 'A' && (c) <= 'Z') || + ((c) >= '0' && (c) <= '9') || + ((c)=='-')); +} + + +static int +my_xdigit(int c) +{ + return ((c) >= '0' && (c) <= '9'); +} + + +/* + Scan the next token + + SYNOPSYS + Scan the next token from the input. + lex->term is set to the scanned token type. + lex->beg and lex->end are set to the beginnig + and to the end of the token. + RETURN + N/A +*/ +static void +my_xpath_lex_scan(MY_XPATH *xpath, + MY_XPATH_LEX *lex, const char *beg, const char *end) +{ + int ch; + for ( ; beg < end && *beg == ' ' ; beg++); // skip leading spaces + lex->beg= beg; + + if (beg >= end) + { + lex->end= beg; + lex->term= MY_XPATH_LEX_EOF; // end of line reached + return; + } + ch= *beg++; + + if (ch > 0 && ch < 128 && simpletok[ch]) + { + // a token consisting of one character found + lex->end= beg; + lex->term= ch; + return; + } + + if (my_xident_beg(ch)) // ident, or a function call, or a keyword + { + // scan until the end of the identifier + for ( ; beg < end && my_xident_body(*beg); beg++); + lex->end= beg; + + // check if a function call + if (*beg == '(' && (xpath->func= my_xpath_function(lex->beg, beg))) + { + lex->term= MY_XPATH_LEX_FUNC; + return; + } + + // check if a keyword + lex->term= my_xpath_keyword(xpath, lex->beg, beg); + return; + } + + if (my_xdigit(ch)) // a sequence of digits + { + for ( ; beg < end && my_xdigit(*beg) ; beg++); + lex->end= beg; + lex->term= MY_XPATH_LEX_DIGITS; + return; + } + + if (ch == '"' || ch == '\'') // a string: either '...' or "..." + { + for ( ; beg < end && *beg != ch ; beg++); + if (beg < end) + { + lex->end= beg+1; + lex->term= MY_XPATH_LEX_STRING; + return; + } + else + { + // unexpected end-of-line, without closing quot sign + lex->end= end; + lex->term= MY_XPATH_LEX_ERROR; + return; + } + } + + lex->end= beg; + lex->term= MY_XPATH_LEX_ERROR; // unknown character + return; +} + + +/* + Scan the given token + + SYNOPSYS + Scan the given token and rotate lasttok to prevtok on success. + + RETURN + 1 - success + 0 - failure +*/ +static int +my_xpath_parse_term(MY_XPATH *xpath, int term) +{ + if (xpath->lasttok.term == term) + { + xpath->prevtok= xpath->lasttok; + my_xpath_lex_scan(xpath, &xpath->lasttok, + xpath->lasttok.end, xpath->query.end); + return 1; + } + return 0; +} + + +/* + Scan AxisName + + SYNOPSYS + Scan an axis name and store the scanned axis type into xpath->axis. + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_AxisName(MY_XPATH *xpath) +{ + int rc= my_xpath_parse_term(xpath, MY_XPATH_LEX_AXIS); + xpath->axis= xpath->extra; + return rc; +} + + +/********************************************* +** Grammar rules, according to http://www.w3.org/TR/xpath +** Implemented using recursive descendant method. +** All the following grammar processing functions accept +** a signle "xpath" argument and return 1 on success and 0 on error. +** They also modify "xpath" argument by creating new items. +*/ + +/* [9] PredicateExpr ::= Expr */ +#define my_xpath_parse_PredicateExpr(x) my_xpath_parse_Expr((x)) + +/* [14] Expr ::= OrExpr */ +#define my_xpath_parse_Expr(x) my_xpath_parse_OrExpr((x)) + +static int my_xpath_parse_LocationPath(MY_XPATH *xpath); +static int my_xpath_parse_AbsoluteLocationPath(MY_XPATH *xpath); +static int my_xpath_parse_RelativeLocationPath(MY_XPATH *xpath); +static int my_xpath_parse_AbbreviatedAbsoluteLocationPath(MY_XPATH *xpath); +static int my_xpath_parse_AbbreviatedStep(MY_XPATH *xpath); +static int my_xpath_parse_Step(MY_XPATH *xpath); +static int my_xpath_parse_AxisSpecifier(MY_XPATH *xpath); +static int my_xpath_parse_NodeTest(MY_XPATH *xpath); +static int my_xpath_parse_AbbreviatedAxisSpecifier(MY_XPATH *xpath); +static int my_xpath_parse_NameTest(MY_XPATH *xpath); +static int my_xpath_parse_FunctionCall(MY_XPATH *xpath); +static int my_xpath_parse_Number(MY_XPATH *xpath); +static int my_xpath_parse_FilterExpr(MY_XPATH *xpath); +static int my_xpath_parse_PathExpr(MY_XPATH *xpath); +static int my_xpath_parse_OrExpr(MY_XPATH *xpath); +static int my_xpath_parse_UnaryExpr(MY_XPATH *xpath); +static int my_xpath_parse_MultiplicativeExpr(MY_XPATH *xpath); +static int my_xpath_parse_AdditiveExpr(MY_XPATH *xpath); +static int my_xpath_parse_RelationalExpr(MY_XPATH *xpath); +static int my_xpath_parse_AndExpr(MY_XPATH *xpath); +static int my_xpath_parse_EqualityExpr(MY_XPATH *xpath); +static int my_xpath_parse_VariableReference(MY_XPATH *xpath); +static int my_xpath_parse_slash_opt_slash(MY_XPATH *xpath); + + +/* + Scan LocationPath + + SYNOPSYS + + [1] LocationPath ::= RelativeLocationPath + | AbsoluteLocationPath + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_LocationPath(MY_XPATH *xpath) +{ + Item *context= xpath->context; + + int rc= my_xpath_parse_RelativeLocationPath(xpath) || + my_xpath_parse_AbsoluteLocationPath(xpath); + + xpath->item= xpath->context; + xpath->context= context; + return rc; +} + + +/* + Scan Absolute Location Path + + SYNOPSYS + + [2] AbsoluteLocationPath ::= '/' RelativeLocationPath? + | AbbreviatedAbsoluteLocationPath + [10] AbbreviatedAbsoluteLocationPath ::= '//' RelativeLocationPath + + We combine these two rules into one rule for better performance: + + [2,10] AbsoluteLocationPath ::= '/' RelativeLocationPath? + | '//' RelativeLocationPath + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_AbsoluteLocationPath(MY_XPATH *xpath) +{ + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_SLASH)) + return 0; + + xpath->context= new Item_nodeset_func_rootelement(xpath->pxml); + + if (my_xpath_parse_term(xpath, MY_XPATH_LEX_SLASH)) + { + xpath->context= new Item_nodeset_func_descendantbyname(xpath->context, + "*", 1, + xpath->pxml, 1); + return my_xpath_parse_RelativeLocationPath(xpath); + } + + if (my_xpath_parse_RelativeLocationPath(xpath)) + return 1; + + return 1; +} + + +/* + Scan Relative Location Path + + SYNOPSYS + + For better performance we combine these two rules + + [3] RelativeLocationPath ::= Step + | RelativeLocationPath '/' Step + | AbbreviatedRelativeLocationPath + [11] AbbreviatedRelativeLocationPath ::= RelativeLocationPath '//' Step + + + Into this one: + + [3-11] RelativeLocationPath ::= Step + | RelativeLocationPath '/' Step + | RelativeLocationPath '//' Step + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_RelativeLocationPath(MY_XPATH *xpath) +{ + if (!my_xpath_parse_Step(xpath)) + return 0; + while (my_xpath_parse_term(xpath, MY_XPATH_LEX_SLASH)) + { + if (my_xpath_parse_term(xpath, MY_XPATH_LEX_SLASH)) + xpath->context= new Item_nodeset_func_descendantbyname(xpath->context, + "*", 1, + xpath->pxml, 1); + if (!my_xpath_parse_Step(xpath)) + return 0; + } + return 1; +} + + +/* + Scan non-abbreviated or abbreviated Step + + SYNOPSYS + + [4] Step ::= AxisSpecifier NodeTest Predicate* + | AbbreviatedStep + [8] Predicate ::= '[' PredicateExpr ']' + + RETURN + 1 - success + 0 - failure +*/ +static int +my_xpath_parse_AxisSpecifier_NodeTest_opt_Predicate_list(MY_XPATH *xpath) +{ + if (!my_xpath_parse_AxisSpecifier(xpath)) + return 0; + + if (!my_xpath_parse_NodeTest(xpath)) + return 0; + + while (my_xpath_parse_term(xpath, MY_XPATH_LEX_LB)) + { + Item *prev_context= xpath->context; + String *context_cache; + context_cache= &((Item_nodeset_func*)xpath->context)->context_cache; + xpath->context= new Item_nodeset_context_cache(context_cache, xpath->pxml); + xpath->context_cache= context_cache; + + if(!my_xpath_parse_PredicateExpr(xpath)) + return 0; + + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_RB)) + return 0; + + xpath->item= nodeset2bool(xpath, xpath->item); + + if (xpath->item->is_bool_func()) + { + xpath->context= new Item_nodeset_func_predicate(prev_context, + xpath->item, + xpath->pxml); + } + else + { + xpath->context= new Item_nodeset_func_elementbyindex(prev_context, + xpath->item, + xpath->pxml); + } + } + return 1; +} + + +static int my_xpath_parse_Step(MY_XPATH *xpath) +{ + return + my_xpath_parse_AxisSpecifier_NodeTest_opt_Predicate_list(xpath) || + my_xpath_parse_AbbreviatedStep(xpath); +} + + +/* + Scan Abbreviated Axis Specifier + + SYNOPSYS + [5] AxisSpecifier ::= AxisName '::' + | AbbreviatedAxisSpecifier + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_AbbreviatedAxisSpecifier(MY_XPATH *xpath) +{ + if (my_xpath_parse_term(xpath, MY_XPATH_LEX_AT)) + xpath->axis= MY_XPATH_AXIS_ATTRIBUTE; + else + xpath->axis= MY_XPATH_AXIS_CHILD; + return 1; +} + + +/* + Scan non-abbreviated axis specifier + + SYNOPSYS + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_AxisName_colon_colon(MY_XPATH *xpath) +{ + return my_xpath_parse_AxisName(xpath) && + my_xpath_parse_term(xpath, MY_XPATH_LEX_COLON) && + my_xpath_parse_term(xpath, MY_XPATH_LEX_COLON); +} + + +/* + Scan Abbreviated AxisSpecifier + + SYNOPSYS + [13] AbbreviatedAxisSpecifier ::= '@'? + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_AxisSpecifier(MY_XPATH *xpath) +{ + return my_xpath_parse_AxisName_colon_colon(xpath) || + my_xpath_parse_AbbreviatedAxisSpecifier(xpath); +} + + +/* + Scan NodeType followed by parens + + SYNOPSYS + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_NodeTest_lp_rp(MY_XPATH *xpath) +{ + return my_xpath_parse_term(xpath, MY_XPATH_LEX_NODETYPE) && + my_xpath_parse_term(xpath, MY_XPATH_LEX_LP) && + my_xpath_parse_term(xpath, MY_XPATH_LEX_RP); +} + + +/* + Scan NodeTest + + SYNOPSYS + + [7] NodeTest ::= NameTest + | NodeType '(' ')' + | 'processing-instruction' '(' Literal ')' + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_NodeTest(MY_XPATH *xpath) +{ + return my_xpath_parse_NameTest(xpath) || + my_xpath_parse_NodeTest_lp_rp(xpath); +} + + +/* + Scan Abbreviated Step + + SYNOPSYS + + [12] AbbreviatedStep ::= '.' | '..' + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_AbbreviatedStep(MY_XPATH *xpath) +{ + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_DOT)) + return 0; + if (my_xpath_parse_term(xpath, MY_XPATH_LEX_DOT)) + xpath->context= new Item_nodeset_func_parentbyname(xpath->context, "*", 1, + xpath->pxml); + return 1; +} + + +/* + Scan Primary Expression + + SYNOPSYS + + [15] PrimaryExpr ::= VariableReference + | '(' Expr ')' + | Literal + | Number + | FunctionCall + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_lp_Expr_rp(MY_XPATH *xpath) +{ + return my_xpath_parse_term(xpath, MY_XPATH_LEX_LP) && + my_xpath_parse_Expr(xpath) && + my_xpath_parse_term(xpath, MY_XPATH_LEX_RP); +} +static int my_xpath_parse_PrimaryExpr_literal(MY_XPATH *xpath) +{ + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_STRING)) + return 0; + xpath->item= new Item_string(xpath->prevtok.beg + 1, + xpath->prevtok.end - xpath->prevtok.beg - 2, + xpath->cs); + return 1; +} +static int my_xpath_parse_PrimaryExpr(MY_XPATH *xpath) +{ + return + my_xpath_parse_lp_Expr_rp(xpath) || + my_xpath_parse_VariableReference(xpath) || + my_xpath_parse_PrimaryExpr_literal(xpath) || + my_xpath_parse_Number(xpath) || + my_xpath_parse_FunctionCall(xpath); +} + + +/* + Scan Function Call + + SYNOPSYS + [16] FunctionCall ::= FunctionName '(' ( Argument ( ',' Argument )* )? ')' + [17] Argument ::= Expr + + RETURN + 1 - success + 0 - failure + +*/ +static int my_xpath_parse_FunctionCall(MY_XPATH *xpath) +{ + Item *args[256]; + uint nargs; + + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_FUNC)) + return 0; + + MY_XPATH_FUNC *func= xpath->func; + + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_LP)) + return 0; + + for (nargs= 0 ; nargs < func->maxargs; ) + { + if (!my_xpath_parse_Expr(xpath)) + return 0; + args[nargs++]= xpath->item; + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_COMMA)) + { + if (nargs < func->minargs) + return 0; + else + break; + } + } + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_RP)) + return 0; + + return ((xpath->item= func->create(xpath, args, nargs))) ? 1 : 0; +} + + +/* + Scan Union Expression + + SYNOPSYS + [18] UnionExpr ::= PathExpr + | UnionExpr '|' PathExpr + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_UnionExpr(MY_XPATH *xpath) +{ + if (!my_xpath_parse_PathExpr(xpath)) + return 0; + + while (my_xpath_parse_term(xpath, MY_XPATH_LEX_VLINE)) + { + Item *prev= xpath->item; + if (prev->type() != Item::XPATH_NODESET) + return 0; + + if (!my_xpath_parse_PathExpr(xpath) + || xpath->item->type() != Item::XPATH_NODESET) + return 0; + xpath->item= new Item_nodeset_func_union(prev, xpath->item, xpath->pxml); + } + return 1; +} + + +/* + Scan Path Expression + + SYNOPSYS + + [19] PathExpr ::= LocationPath + | FilterExpr + | FilterExpr '/' RelativeLocationPath + | FilterExpr '//' RelativeLocationPath + RETURN + 1 - success + 0 - failure +*/ +static int +my_xpath_parse_FilterExpr_opt_slashes_RelativeLocationPath(MY_XPATH *xpath) +{ + if (!my_xpath_parse_FilterExpr(xpath)) + return 0; + + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_SLASH)) + return 1; + + my_xpath_parse_term(xpath, MY_XPATH_LEX_SLASH); + return my_xpath_parse_RelativeLocationPath(xpath); +} +static int my_xpath_parse_PathExpr(MY_XPATH *xpath) +{ + return my_xpath_parse_LocationPath(xpath) || + my_xpath_parse_FilterExpr_opt_slashes_RelativeLocationPath(xpath); +} + + + +/* + Scan Filter Expression + + SYNOPSYS + [20] FilterExpr ::= PrimaryExpr + | FilterExpr Predicate + + or in other words: + + [20] FilterExpr ::= PrimaryExpr Predicate* + + RETURN + 1 - success + 0 - failure + +*/ +static int my_xpath_parse_FilterExpr(MY_XPATH *xpath) +{ + return my_xpath_parse_PrimaryExpr(xpath); +} + + +/* + Scan Or Expression + + SYNOPSYS + [21] OrExpr ::= AndExpr + | OrExpr 'or' AndExpr + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_OrExpr(MY_XPATH *xpath) +{ + if (!my_xpath_parse_AndExpr(xpath)) + return 0; + + while (my_xpath_parse_term(xpath, MY_XPATH_LEX_OR)) + { + Item *prev= xpath->item; + if (!my_xpath_parse_AndExpr(xpath)) + return 0; + xpath->item= new Item_cond_or(nodeset2bool(xpath, prev), + nodeset2bool(xpath, xpath->item)); + } + return 1; +} + + +/* + Scan And Expression + + SYNOPSYS + [22] AndExpr ::= EqualityExpr + | AndExpr 'and' EqualityExpr + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_AndExpr(MY_XPATH *xpath) +{ + if (!my_xpath_parse_EqualityExpr(xpath)) + return 0; + + while (my_xpath_parse_term(xpath, MY_XPATH_LEX_AND)) + { + Item *prev= xpath->item; + if (!my_xpath_parse_EqualityExpr(xpath)) + return 0; + + xpath->item= new Item_cond_and(nodeset2bool(xpath,prev), + nodeset2bool(xpath,xpath->item)); + } + return 1; +} + + +/* + Scan Equality Expression + + SYNOPSYS + [23] EqualityExpr ::= RelationalExpr + | EqualityExpr '=' RelationalExpr + | EqualityExpr '!=' RelationalExpr + or in other words: + + [23] EqualityExpr ::= RelationalExpr ( EqualityOperator EqualityExpr )* + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_ne(MY_XPATH *xpath) +{ + return my_xpath_parse_term(xpath, MY_XPATH_LEX_EXCL) && + my_xpath_parse_term(xpath, MY_XPATH_LEX_EQ); +} +static int my_xpath_parse_EqualityOperator(MY_XPATH *xpath) +{ + if (my_xpath_parse_ne(xpath)) + { + xpath->extra= '!'; + return 1; + } + if (my_xpath_parse_term(xpath, MY_XPATH_LEX_EQ)) + { + xpath->extra= '='; + return 1; + } + return 0; +} +static int my_xpath_parse_EqualityExpr(MY_XPATH *xpath) +{ + if (!my_xpath_parse_RelationalExpr(xpath)) + return 0; + while (my_xpath_parse_EqualityOperator(xpath)) + { + Item *prev= xpath->item; + int oper= xpath->extra; + if (!my_xpath_parse_RelationalExpr(xpath)) + return 0; + + if (!(xpath->item= create_comparator(xpath, oper, prev, xpath->item))) + return 0; + } + return 1; +} + + +/* + Scan Relational Expression + + SYNOPSYS + + [24] RelationalExpr ::= AdditiveExpr + | RelationalExpr '<' AdditiveExpr + | RelationalExpr '>' AdditiveExpr + | RelationalExpr '<=' AdditiveExpr + | RelationalExpr '>=' AdditiveExpr + or in other words: + + [24] RelationalExpr ::= AdditiveExpr (RelationalOperator RelationalExpr)* + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_RelationalOperator(MY_XPATH *xpath) +{ + if (my_xpath_parse_term(xpath, MY_XPATH_LEX_LESS)) + { + xpath->extra= my_xpath_parse_term(xpath, MY_XPATH_LEX_EQ) ? + MY_XPATH_LEX_LE : MY_XPATH_LEX_LESS; + return 1; + } + else if (my_xpath_parse_term(xpath, MY_XPATH_LEX_GREATER)) + { + xpath->extra= my_xpath_parse_term(xpath, MY_XPATH_LEX_EQ) ? + MY_XPATH_LEX_GE : MY_XPATH_LEX_GREATER; + return 1; + } + return 0; +} +static int my_xpath_parse_RelationalExpr(MY_XPATH *xpath) +{ + if (!my_xpath_parse_AdditiveExpr(xpath)) + return 0; + while (my_xpath_parse_RelationalOperator(xpath)) + { + Item *prev= xpath->item; + int oper= xpath->extra; + + if (!my_xpath_parse_AdditiveExpr(xpath)) + return 0; + + if (!(xpath->item= create_comparator(xpath, oper, prev, xpath->item))) + return 0; + } + return 1; +} + + +/* + Scan Additive Expression + + SYNOPSYS + + [25] AdditiveExpr ::= MultiplicativeExpr + | AdditiveExpr '+' MultiplicativeExpr + | AdditiveExpr '-' MultiplicativeExpr + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_AdditiveOperator(MY_XPATH *xpath) +{ + return my_xpath_parse_term(xpath, MY_XPATH_LEX_PLUS) || + my_xpath_parse_term(xpath, MY_XPATH_LEX_MINUS); +} +static int my_xpath_parse_AdditiveExpr(MY_XPATH *xpath) +{ + if (!my_xpath_parse_MultiplicativeExpr(xpath)) + return 0; + + while (my_xpath_parse_AdditiveOperator(xpath)) + { + int oper= xpath->prevtok.term; + Item *prev= xpath->item; + if (!my_xpath_parse_MultiplicativeExpr(xpath)) + return 0; + + if (oper == MY_XPATH_LEX_PLUS) + xpath->item= new Item_func_plus(prev, xpath->item); + else + xpath->item= new Item_func_minus(prev, xpath->item); + }; + return 1; +} + + +/* + Scan Multiplicative Expression + + SYNOPSYS + + [26] MultiplicativeExpr ::= UnaryExpr + | MultiplicativeExpr MultiplyOperator UnaryExpr + | MultiplicativeExpr 'div' UnaryExpr + | MultiplicativeExpr 'mod' UnaryExpr + or in other words: + + [26] MultiplicativeExpr ::= UnaryExpr (MulOper MultiplicativeExpr)* + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_MultiplicativeOperator(MY_XPATH *xpath) +{ + return + my_xpath_parse_term(xpath, MY_XPATH_LEX_ASTERISK) || + my_xpath_parse_term(xpath, MY_XPATH_LEX_DIV) || + my_xpath_parse_term(xpath, MY_XPATH_LEX_MOD); +} +static int my_xpath_parse_MultiplicativeExpr(MY_XPATH *xpath) +{ + if (!my_xpath_parse_UnaryExpr(xpath)) + return 0; + + while (my_xpath_parse_MultiplicativeOperator(xpath)) + { + int oper= xpath->prevtok.term; + Item *prev= xpath->item; + if (!my_xpath_parse_UnaryExpr(xpath)) + return 0; + switch (oper) + { + case MY_XPATH_LEX_ASTERISK: + xpath->item= new Item_func_mul(prev, xpath->item); + break; + case MY_XPATH_LEX_DIV: + xpath->item= new Item_func_int_div(prev, xpath->item); + break; + case MY_XPATH_LEX_MOD: + xpath->item= new Item_func_mod(prev, xpath->item); + break; + } + } + return 1; +} + + +/* + Scan Unary Expression + + SYNOPSYS + + [27] UnaryExpr ::= UnionExpr + | '-' UnaryExpr + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_UnaryExpr(MY_XPATH *xpath) +{ + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_MINUS)) + return my_xpath_parse_UnionExpr(xpath); + if (!my_xpath_parse_UnaryExpr(xpath)) + return 0; + xpath->item= new Item_func_neg(xpath->item); + return 1; +} + + +/* + Scan Number + + SYNOPSYS + + [30] Number ::= Digits ('.' Digits?)? | '.' Digits) + + or in other words: + + [30] Number ::= Digits + | Digits '.' + | Digits '.' Digits + | '.' Digits + + Note: the last rule is not supported yet, + as it is in conflict with abbreviated step. + 1 + .123 does not work, + 1 + 0.123 does. + Perhaps it is better to move this code into lex analizer. + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_Number(MY_XPATH *xpath) +{ + const char *beg; + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_DIGITS)) + return 0; + beg= xpath->prevtok.beg; + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_DOT)) + { + xpath->item= new Item_int(xpath->prevtok.beg, + xpath->prevtok.end - xpath->prevtok.beg); + return 1; + } + my_xpath_parse_term(xpath, MY_XPATH_LEX_DIGITS); + + xpath->item= new Item_float(beg, xpath->prevtok.end - beg); + return 1; +} + + +/* + Scan Variable reference + + SYNOPSYS + + [36] VariableReference ::= '$' QName + RETURN + 1 - success + 0 - failure +*/ +static int +my_xpath_parse_VariableReference(MY_XPATH *xpath) +{ + return my_xpath_parse_term(xpath, MY_XPATH_LEX_DOLLAR) && + my_xpath_parse_term(xpath, MY_XPATH_LEX_IDENT); +} + + +/* + Scan Name Test + + SYNOPSYS + + [37] NameTest ::= '*' + | NCName ':' '*' + | QName + RETURN + 1 - success + 0 - failure +*/ +static int +my_xpath_parse_NodeTest_QName(MY_XPATH *xpath) +{ + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_IDENT)) + return 0; + DBUG_ASSERT(xpath->context); + uint len= xpath->prevtok.end - xpath->prevtok.beg; + xpath->context= nametestfunc(xpath, xpath->axis, xpath->context, + xpath->prevtok.beg, len); + return 1; +} +static int +my_xpath_parse_NodeTest_asterisk(MY_XPATH *xpath) +{ + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_ASTERISK)) + return 0; + DBUG_ASSERT(xpath->context); + xpath->context= nametestfunc(xpath, xpath->axis, xpath->context, "*", 1); + return 1; +} +static int +my_xpath_parse_NameTest(MY_XPATH *xpath) +{ + return my_xpath_parse_NodeTest_asterisk(xpath) || + my_xpath_parse_NodeTest_QName(xpath); +} + + +/* + Scan an XPath expression + + SYNOPSYS + Scan xpath expression. + The expression is returned in xpath->expr. + + RETURN + 1 - success + 0 - failure +*/ +static int +my_xpath_parse(MY_XPATH *xpath, const char *str, const char *strend) +{ + my_xpath_lex_init(&xpath->query, str, strend); + my_xpath_lex_init(&xpath->prevtok, str, strend); + my_xpath_lex_scan(xpath, &xpath->lasttok, str, strend); + + return + my_xpath_parse_Expr(xpath) && + my_xpath_parse_term(xpath, MY_XPATH_LEX_EOF); +} + + +void Item_xml_str_func::fix_length_and_dec() +{ + String *xp, tmp; + MY_XPATH xpath; + int rc; + + nodeset_func= 0; + + if (agg_arg_charsets(collation, args, arg_count, MY_COLL_CMP_CONV)) + return; + + if (collation.collation->mbminlen > 1) + { + /* UCS2 is not supported */ + my_printf_error(ER_UNKNOWN_ERROR, + "Character set '%s' is not supported by XPATH", + MYF(0), collation.collation->csname); + return; + } + + if (!args[1]->const_item()) + { + my_printf_error(ER_UNKNOWN_ERROR, + "Only constant XPATH queries are supported", MYF(0)); + return; + } + + xp= args[1]->val_str(&tmp); + my_xpath_init(&xpath); + xpath.cs= collation.collation; + xpath.debug= 0; + xpath.pxml= &pxml; + + rc= my_xpath_parse(&xpath, xp->ptr(), xp->ptr() + xp->length()); + + if (!rc) + { + char context[32]; + uint clen= xpath.query.end - xpath.lasttok.beg; + set_if_bigger(clen, sizeof(context) - 1); + memcpy(context, xpath.lasttok.beg, clen); + context[clen]= '\0'; + my_printf_error(ER_UNKNOWN_ERROR, "XPATH syntax error: '%s'", + MYF(0), context); + return; + } + + nodeset_func= xpath.item; + if (nodeset_func) + nodeset_func->fix_fields(current_thd, &nodeset_func); + max_length= MAX_BLOB_WIDTH; +} + + +#define MAX_LEVEL 256 +typedef struct +{ + uint level; + String *pxml; // parsed XML + uint pos[MAX_LEVEL]; // Tag position stack +} MY_XML_USER_DATA; + + +/* + Find the parent node + + SYNOPSYS + Find the parent node, i.e. a tag or attrubute node on the given level. + + RETURN + 1 - success + 0 - failure +*/ +static uint xml_parent_tag(MY_XML_NODE *items, uint nitems, uint level) +{ + if (!nitems) + return 0; + + MY_XML_NODE *p, *last= &items[nitems-1]; + for (p= last; p >= items; p--) + { + if (p->level == level && + (p->type == MY_XML_NODE_TAG || + p->type == MY_XML_NODE_ATTR)) + { + return p - items; + } + } + return 0; +} + + +/* + Process tag beginning + + SYNOPSYS + + A call-back function executed when XML parser + is entering a tag or an attribue. + Appends the new node into data->pxml. + Increments data->level. + + RETURN + Currently only MY_XML_OK +*/ +static int xml_enter(MY_XML_PARSER *st,const char *attr, uint len) +{ + MY_XML_USER_DATA *data= (MY_XML_USER_DATA*)st->user_data; + MY_XML_NODE *nodes= (MY_XML_NODE*) data->pxml->ptr(); + uint numnodes= data->pxml->length() / sizeof(MY_XML_NODE); + uint parent= xml_parent_tag(nodes, numnodes, data->level - 1); + MY_XML_NODE node; + + data->pos[data->level]= numnodes; + node.level= data->level++; + node.type= st->current_node_type; // TAG or ATTR + node.beg= attr; + node.end= attr + len; + node.parent= parent; + data->pxml->append((const char*) &node, sizeof(MY_XML_NODE)); + return MY_XML_OK; +} + + +/* + Process text node + + SYNOPSYS + + A call-back function executed when XML parser + is entering into a tag or an attribue textual value. + The value is appended into data->pxml. + + RETURN + Currently only MY_XML_OK +*/ +static int xml_value(MY_XML_PARSER *st,const char *attr, uint len) +{ + MY_XML_USER_DATA *data= (MY_XML_USER_DATA*)st->user_data; + MY_XML_NODE *nodes= (MY_XML_NODE*) data->pxml->ptr(); + uint numnodes= data->pxml->length() / sizeof(MY_XML_NODE); + uint parent= xml_parent_tag(nodes, numnodes, data->level - 1); + MY_XML_NODE node; + + node.level= data->level; + node.type= MY_XML_NODE_TEXT; + node.beg= attr; + node.end= attr + len; + node.parent= parent; + data->pxml->append((const char*) &node, sizeof(MY_XML_NODE)); + return MY_XML_OK; +} + + +/* + Leave a tag or an attribute + + SYNOPSYS + + A call-back function executed when XML parser + is leaving a tag or an attribue. + Decrements data->level. + + RETURN + Currently only MY_XML_OK +*/ +static int xml_leave(MY_XML_PARSER *st,const char *attr, uint len) +{ + MY_XML_USER_DATA *data= (MY_XML_USER_DATA*)st->user_data; + DBUG_ASSERT(data->level > 0); + data->level--; + + MY_XML_NODE *nodes= (MY_XML_NODE*) data->pxml->ptr(); + nodes+= data->pos[data->level]; + nodes->tagend= st->cur; + + return MY_XML_OK; +} + + +/* + Parse raw XML + + SYNOPSYS + + + RETURN + Currently pointer to parsed XML on success + 0 on parse error +*/ +String *Item_xml_str_func::parse_xml(String *raw_xml, String *parsed_xml_buf) +{ + MY_XML_PARSER p; + MY_XML_USER_DATA user_data; + int rc; + + parsed_xml_buf->length(0); + + /* Prepare XML parser */ + my_xml_parser_create(&p); + p.flags= MY_XML_FLAG_RELATIVE_NAMES | MY_XML_FLAG_SKIP_TEXT_NORMALIZATION; + user_data.level= 0; + user_data.pxml= parsed_xml_buf; + my_xml_set_enter_handler(&p, xml_enter); + my_xml_set_value_handler(&p, xml_value); + my_xml_set_leave_handler(&p, xml_leave); + my_xml_set_user_data(&p, (void*) &user_data); + + /* Add root node */ + p.current_node_type= MY_XML_NODE_TAG; + xml_enter(&p, raw_xml->ptr(), 0); + + /* Execute XML parser */ + rc= my_xml_parse(&p, raw_xml->ptr(), raw_xml->length()); + my_xml_parser_free(&p); + + return rc == MY_XML_OK ? parsed_xml_buf : 0; +} + + +String *Item_func_xml_extractvalue::val_str(String *str) +{ + String *res; + if (!nodeset_func || + !(res= args[0]->val_str(str)) || + !parse_xml(res, &pxml)) + { + null_value= 1; + return 0; + } + res= nodeset_func->val_str(&tmp_value); + return res; +} + + +String *Item_func_xml_update::val_str(String *str) +{ + String *res, *nodeset, *rep; + + if (!nodeset_func || + !(res= args[0]->val_str(str)) || + !(rep= args[2]->val_str(&tmp_value3)) || + !parse_xml(res, &pxml) || + !(nodeset= nodeset_func->val_nodeset(&tmp_value2))) + { + null_value= 1; + return 0; + } + + MY_XML_NODE *nodebeg= (MY_XML_NODE*) pxml.ptr(); + MY_XML_NODE *nodeend= (MY_XML_NODE*) pxml.ptr() + pxml.length(); + MY_XPATH_FLT *fltbeg= (MY_XPATH_FLT*) nodeset->ptr(); + MY_XPATH_FLT *fltend= (MY_XPATH_FLT*) (nodeset->ptr() + nodeset->length()); + + /* Allow replacing of one tag only */ + if (fltend - fltbeg != 1) + { + /* TODO: perhaps add a warning that more than one tag selected */ + return res; + } + + nodebeg+= fltbeg->num; + + tmp_value.length(0); + tmp_value.set_charset(collation.collation); + uint offs= nodebeg->type == MY_XML_NODE_TAG ? 1 : 0; + tmp_value.append(res->ptr(), nodebeg->beg - res->ptr() - offs); + tmp_value.append(rep->ptr(), rep->length()); + const char *end= nodebeg->tagend + offs; + tmp_value.append(end, res->ptr() + res->length() - end); + return &tmp_value; +} diff --git a/sql/item_xmlfunc.h b/sql/item_xmlfunc.h new file mode 100644 index 00000000000..bc47e9c5bb1 --- /dev/null +++ b/sql/item_xmlfunc.h @@ -0,0 +1,56 @@ +/* Copyright (C) 2000-2005 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + + +/* This file defines all XML functions */ + + +#ifdef __GNUC__ +#pragma interface /* gcc class implementation */ +#endif + + +class Item_xml_str_func: public Item_str_func +{ +protected: + String tmp_value, pxml; + Item *nodeset_func; +public: + Item_xml_str_func(Item *a, Item *b): Item_str_func(a,b) {} + Item_xml_str_func(Item *a, Item *b, Item *c): Item_str_func(a,b,c) {} + void fix_length_and_dec(); + String *parse_xml(String *raw_xml, String *parsed_xml_buf); +}; + + +class Item_func_xml_extractvalue: public Item_xml_str_func +{ +public: + Item_func_xml_extractvalue(Item *a,Item *b) :Item_xml_str_func(a,b) {} + const char *func_name() const { return "extractvalue"; } + String *val_str(String *); +}; + + +class Item_func_xml_update: public Item_xml_str_func +{ + String tmp_value2, tmp_value3; +public: + Item_func_xml_update(Item *a,Item *b,Item *c) :Item_xml_str_func(a,b,c) {} + const char *func_name() const { return "updatexml"; } + String *val_str(String *); +}; + diff --git a/sql/key.cc b/sql/key.cc index 9d86095f33e..4001c6177a1 100644 --- a/sql/key.cc +++ b/sql/key.cc @@ -28,7 +28,7 @@ ** Used when calculating key for NEXT_NUMBER */ -int find_ref_key(TABLE *table,Field *field, uint *key_length) +int find_ref_key(KEY *key, uint key_count, Field *field, uint *key_length) { reg2 int i; reg3 KEY *key_info; @@ -38,8 +38,8 @@ int find_ref_key(TABLE *table,Field *field, uint *key_length) /* Test if some key starts as fieldpos */ - for (i= 0, key_info= table->key_info ; - i < (int) table->s->keys ; + for (i= 0, key_info= key ; + i < (int) key_count ; i++, key_info++) { if (key_info->key_part[0].offset == fieldpos) @@ -50,8 +50,8 @@ int find_ref_key(TABLE *table,Field *field, uint *key_length) } /* Test if some key contains fieldpos */ - for (i= 0, key_info= table->key_info ; - i < (int) table->s->keys ; + for (i= 0, key_info= key; + i < (int) key_count ; i++, key_info++) { uint j; @@ -429,3 +429,86 @@ int key_cmp(KEY_PART_INFO *key_part, const byte *key, uint key_length) } return 0; // Keys are equal } + + +/* + Compare two records in index order + SYNOPSIS + key_rec_cmp() + key Index information + rec0 Pointer to table->record[0] + first_rec Pointer to record compare with + second_rec Pointer to record compare against first_rec + DESCRIPTION + This method is set-up such that it can be called directly from the + priority queue and it is attempted to be optimised as much as possible + since this will be called O(N * log N) times while performing a merge + sort in various places in the code. + + We retrieve the pointer to table->record[0] using the fact that key_parts + have an offset making it possible to calculate the start of the record. + We need to get the diff to the compared record since none of the records + being compared are stored in table->record[0]. + + We first check for NULL values, if there are no NULL values we use + a compare method that gets two field pointers and a max length + and return the result of the comparison. +*/ + +int key_rec_cmp(void *key, byte *first_rec, byte *second_rec) +{ + KEY *key_info= (KEY*)key; + uint key_parts= key_info->key_parts, i= 0; + KEY_PART_INFO *key_part= key_info->key_part; + char *rec0= key_part->field->ptr - key_part->offset; + my_ptrdiff_t first_diff= first_rec - (byte*)rec0, sec_diff= second_rec - (byte*)rec0; + int result= 0; + DBUG_ENTER("key_rec_cmp"); + + do + { + Field *field= key_part->field; + uint length; + + if (key_part->null_bit) + { + /* The key_part can contain NULL values */ + bool first_is_null= field->is_null_in_record_with_offset(first_diff); + bool sec_is_null= field->is_null_in_record_with_offset(sec_diff); + /* + NULL is smaller then everything so if first is NULL and the other + not then we know that we should return -1 and for the opposite + we should return +1. If both are NULL then we call it equality + although it is a strange form of equality, we have equally little + information of the real value. + */ + if (!first_is_null) + { + if (!sec_is_null) + ; /* Fall through, no NULL fields */ + else + { + DBUG_RETURN(+1); + } + } + else if (!sec_is_null) + { + DBUG_RETURN(-1); + } + else + goto next_loop; /* Both were NULL */ + } + /* + No null values in the fields + We use the virtual method cmp_max with a max length parameter. + For most field types this translates into a cmp without + max length. The exceptions are the BLOB and VARCHAR field types + that take the max length into account. + */ + result= field->cmp_max(field->ptr+first_diff, field->ptr+sec_diff, + key_part->length); +next_loop: + key_part++; + } while (!result && ++i < key_parts); + DBUG_RETURN(result); +} diff --git a/sql/lex.h b/sql/lex.h index efcb9b84f81..cf83fc9488c 100644 --- a/sql/lex.h +++ b/sql/lex.h @@ -74,6 +74,7 @@ static SYMBOL symbols[] = { { "ASC", SYM(ASC)}, { "ASCII", SYM(ASCII_SYM)}, { "ASENSITIVE", SYM(ASENSITIVE_SYM)}, + { "AUTHORS", SYM(AUTHORS_SYM)}, { "AUTO_INCREMENT", SYM(AUTO_INC)}, { "AVG", SYM(AVG_SYM)}, { "AVG_ROW_LENGTH", SYM(AVG_ROW_LENGTH)}, @@ -110,6 +111,7 @@ static SYMBOL symbols[] = { { "CIPHER", SYM(CIPHER_SYM)}, { "CLIENT", SYM(CLIENT_SYM)}, { "CLOSE", SYM(CLOSE_SYM)}, + { "COALESCE", SYM(COALESCE)}, { "CODE", SYM(CODE_SYM)}, { "COLLATE", SYM(COLLATE_SYM)}, { "COLLATION", SYM(COLLATION_SYM)}, @@ -246,6 +248,7 @@ static SYMBOL symbols[] = { { "INSENSITIVE", SYM(INSENSITIVE_SYM)}, { "INSERT", SYM(INSERT)}, { "INSERT_METHOD", SYM(INSERT_METHOD)}, + { "INSTALL", SYM(INSTALL_SYM)}, { "INT", SYM(INT_SYM)}, { "INT1", SYM(TINYINT)}, { "INT2", SYM(SMALLINT)}, @@ -275,11 +278,14 @@ static SYMBOL symbols[] = { { "LEAVE", SYM(LEAVE_SYM)}, { "LEAVES", SYM(LEAVES)}, { "LEFT", SYM(LEFT)}, + { "LESS", SYM(LESS_SYM)}, { "LEVEL", SYM(LEVEL_SYM)}, { "LIKE", SYM(LIKE)}, { "LIMIT", SYM(LIMIT)}, + { "LINEAR", SYM(LINEAR_SYM)}, { "LINES", SYM(LINES)}, { "LINESTRING", SYM(LINESTRING)}, + { "LIST", SYM(LIST_SYM)}, { "LOAD", SYM(LOAD)}, { "LOCAL", SYM(LOCAL_SYM)}, { "LOCALTIME", SYM(NOW_SYM)}, @@ -313,6 +319,7 @@ static SYMBOL symbols[] = { { "MAX_ROWS", SYM(MAX_ROWS)}, { "MAX_UPDATES_PER_HOUR", SYM(MAX_UPDATES_PER_HOUR)}, { "MAX_USER_CONNECTIONS", SYM(MAX_USER_CONNECTIONS_SYM)}, + { "MAXVALUE", SYM(MAX_VALUE_SYM)}, { "MEDIUM", SYM(MEDIUM_SYM)}, { "MEDIUMBLOB", SYM(MEDIUMBLOB)}, { "MEDIUMINT", SYM(MEDIUMINT)}, @@ -344,6 +351,7 @@ static SYMBOL symbols[] = { { "NEW", SYM(NEW_SYM)}, { "NEXT", SYM(NEXT_SYM)}, { "NO", SYM(NO_SYM)}, + { "NODEGROUP", SYM(NODEGROUP_SYM)}, { "NONE", SYM(NONE_SYM)}, { "NOT", SYM(NOT_SYM)}, { "NO_WRITE_TO_BINLOG", SYM(NO_WRITE_TO_BINLOG)}, @@ -365,9 +373,13 @@ static SYMBOL symbols[] = { { "OUTER", SYM(OUTER)}, { "OUTFILE", SYM(OUTFILE)}, { "PACK_KEYS", SYM(PACK_KEYS_SYM)}, + { "PARSER", SYM(PARSER_SYM)}, { "PARTIAL", SYM(PARTIAL)}, + { "PARTITION", SYM(PARTITION_SYM)}, + { "PARTITIONS", SYM(PARTITIONS_SYM)}, { "PASSWORD", SYM(PASSWORD)}, { "PHASE", SYM(PHASE_SYM)}, + { "PLUGIN", SYM(PLUGIN_SYM)}, { "POINT", SYM(POINT_SYM)}, { "POLYGON", SYM(POLYGON)}, { "PRECISION", SYM(PRECISION)}, @@ -386,6 +398,7 @@ static SYMBOL symbols[] = { { "RAID_CHUNKS", SYM(RAID_CHUNKS)}, { "RAID_CHUNKSIZE", SYM(RAID_CHUNKSIZE)}, { "RAID_TYPE", SYM(RAID_TYPE)}, + { "RANGE", SYM(RANGE_SYM)}, { "READ", SYM(READ_SYM)}, { "READS", SYM(READS_SYM)}, { "REAL", SYM(REAL)}, @@ -399,6 +412,7 @@ static SYMBOL symbols[] = { { "RELEASE", SYM(RELEASE_SYM)}, { "RELOAD", SYM(RELOAD)}, { "RENAME", SYM(RENAME)}, + { "REORGANISE", SYM(REORGANISE_SYM)}, { "REPAIR", SYM(REPAIR)}, { "REPEATABLE", SYM(REPEATABLE_SYM)}, { "REPLACE", SYM(REPLACE)}, @@ -443,7 +457,7 @@ static SYMBOL symbols[] = { { "SNAPSHOT", SYM(SNAPSHOT_SYM)}, { "SMALLINT", SYM(SMALLINT)}, { "SOME", SYM(ANY_SYM)}, - { "SONAME", SYM(UDF_SONAME_SYM)}, + { "SONAME", SYM(SONAME_SYM)}, { "SOUNDS", SYM(SOUNDS_SYM)}, { "SPATIAL", SYM(SPATIAL_SYM)}, { "SPECIFIC", SYM(SPECIFIC_SYM)}, @@ -477,6 +491,8 @@ static SYMBOL symbols[] = { { "STRING", SYM(STRING_SYM)}, { "STRIPED", SYM(RAID_STRIPED_SYM)}, { "SUBJECT", SYM(SUBJECT_SYM)}, + { "SUBPARTITION", SYM(SUBPARTITION_SYM)}, + { "SUBPARTITIONS", SYM(SUBPARTITIONS_SYM)}, { "SUPER", SYM(SUPER_SYM)}, { "SUSPEND", SYM(SUSPEND_SYM)}, { "TABLE", SYM(TABLE_SYM)}, @@ -486,6 +502,7 @@ static SYMBOL symbols[] = { { "TEMPTABLE", SYM(TEMPTABLE_SYM)}, { "TERMINATED", SYM(TERMINATED)}, { "TEXT", SYM(TEXT_SYM)}, + { "THAN", SYM(THAN_SYM)}, { "THEN", SYM(THEN_SYM)}, { "TIME", SYM(TIME_SYM)}, { "TIMESTAMP", SYM(TIMESTAMP)}, @@ -511,6 +528,7 @@ static SYMBOL symbols[] = { { "UNIQUE", SYM(UNIQUE_SYM)}, { "UNKNOWN", SYM(UNKNOWN_SYM)}, { "UNLOCK", SYM(UNLOCK_SYM)}, + { "UNINSTALL", SYM(UNINSTALL_SYM)}, { "UNSIGNED", SYM(UNSIGNED)}, { "UNTIL", SYM(UNTIL_SYM)}, { "UPDATE", SYM(UPDATE_SYM)}, @@ -577,7 +595,6 @@ static SYMBOL sql_functions[] = { { "CENTROID", F_SYM(FUNC_ARG1),0,CREATE_FUNC_GEOM(create_func_centroid)}, { "CHAR_LENGTH", F_SYM(FUNC_ARG1),0,CREATE_FUNC(create_func_char_length)}, { "CHARACTER_LENGTH", F_SYM(FUNC_ARG1),0,CREATE_FUNC(create_func_char_length)}, - { "COALESCE", SYM(COALESCE)}, { "COERCIBILITY", F_SYM(FUNC_ARG1),0,CREATE_FUNC(create_func_coercibility)}, { "COMPRESS", F_SYM(FUNC_ARG1),0,CREATE_FUNC(create_func_compress)}, { "CONCAT", SYM(CONCAT)}, @@ -614,6 +631,7 @@ static SYMBOL sql_functions[] = { { "EQUALS", F_SYM(FUNC_ARG2),0,CREATE_FUNC_GEOM(create_func_equals)}, { "EXTERIORRING", F_SYM(FUNC_ARG1),0,CREATE_FUNC_GEOM(create_func_exteriorring)}, { "EXTRACT", SYM(EXTRACT_SYM)}, + { "EXTRACTVALUE", F_SYM(FUNC_ARG2),0,CREATE_FUNC(create_func_xml_extractvalue)}, { "EXP", F_SYM(FUNC_ARG1),0,CREATE_FUNC(create_func_exp)}, { "EXPORT_SET", SYM(EXPORT_SET)}, { "FIELD", SYM(FIELD_FUNC)}, /* For compability */ @@ -768,6 +786,7 @@ static SYMBOL sql_functions[] = { { "UNHEX", F_SYM(FUNC_ARG1),0,CREATE_FUNC(create_func_unhex)}, { "UNIQUE_USERS", SYM(UNIQUE_USERS)}, { "UNIX_TIMESTAMP", SYM(UNIX_TIMESTAMP)}, + { "UPDATEXML", F_SYM(FUNC_ARG3),0,CREATE_FUNC(create_func_xml_update)}, { "UPPER", F_SYM(FUNC_ARG1),0,CREATE_FUNC(create_func_ucase)}, { "UUID", F_SYM(FUNC_ARG0),0,CREATE_FUNC(create_func_uuid)}, { "VARIANCE", SYM(VARIANCE_SYM)}, diff --git a/sql/lock.cc b/sql/lock.cc index d0bfcfd7272..8e24c56799d 100644 --- a/sql/lock.cc +++ b/sql/lock.cc @@ -72,7 +72,7 @@ TODO: #ifndef MASTER #include "../srclib/myisammrg/myrg_def.h" #else -#include "../myisammrg/myrg_def.h" +#include "../storage/myisammrg/myrg_def.h" #endif static MYSQL_LOCK *get_lock_data(THD *thd, TABLE **table,uint count, @@ -146,6 +146,7 @@ MYSQL_LOCK *mysql_lock_tables(THD *thd, TABLE **tables, uint count, } thd->proc_info="System lock"; + DBUG_PRINT("info", ("thd->proc_info %s", thd->proc_info)); if (lock_external(thd, tables, count)) { my_free((gptr) sql_lock,MYF(0)); @@ -153,6 +154,7 @@ MYSQL_LOCK *mysql_lock_tables(THD *thd, TABLE **tables, uint count, break; } thd->proc_info="Table lock"; + DBUG_PRINT("info", ("thd->proc_info %s", thd->proc_info)); thd->locked=1; rc= thr_lock_errno_to_mysql[(int) thr_multi_lock(sql_lock->locks, sql_lock->lock_count, @@ -218,6 +220,7 @@ static int lock_external(THD *thd, TABLE **tables, uint count) int lock_type,error; DBUG_ENTER("lock_external"); + DBUG_PRINT("info", ("count %d", count)); for (i=1 ; i <= count ; i++, tables++) { DBUG_ASSERT((*tables)->reginfo.lock_type >= TL_READ); @@ -226,7 +229,6 @@ static int lock_external(THD *thd, TABLE **tables, uint count) ((*tables)->reginfo.lock_type >= TL_READ && (*tables)->reginfo.lock_type <= TL_READ_NO_INSERT)) lock_type=F_RDLCK; - if ((error=(*tables)->file->external_lock(thd,lock_type))) { print_lock_error(error, (*tables)->file->table_type()); @@ -355,12 +357,15 @@ void mysql_lock_abort(THD *thd, TABLE *table) { MYSQL_LOCK *locked; TABLE *write_lock_used; + DBUG_ENTER("mysql_lock_abort"); + if ((locked = get_lock_data(thd,&table,1,1,&write_lock_used))) { for (uint i=0; i < locked->lock_count; i++) thr_abort_locks(locked->locks[i]->lock); my_free((gptr) locked,MYF(0)); } + DBUG_VOID_RETURN; } @@ -583,6 +588,7 @@ static MYSQL_LOCK *get_lock_data(THD *thd, TABLE **table_ptr, uint count, TABLE **to; DBUG_ENTER("get_lock_data"); + DBUG_PRINT("info", ("count %d", count)); *write_lock_used=0; for (i=tables=lock_count=0 ; i < count ; i++) { @@ -600,9 +606,9 @@ static MYSQL_LOCK *get_lock_data(THD *thd, TABLE **table_ptr, uint count, table_ptr[i]->reginfo.lock_type >= TL_WRITE_ALLOW_WRITE && count != 1) { - my_error(ER_WRONG_LOCK_OF_SYSTEM_TABLE, MYF(0), table_ptr[i]->s->db, - table_ptr[i]->s->table_name); - return 0; + my_error(ER_WRONG_LOCK_OF_SYSTEM_TABLE, MYF(0), table_ptr[i]->s->db.str, + table_ptr[i]->s->table_name.str); + DBUG_RETURN(0); } } @@ -615,6 +621,8 @@ static MYSQL_LOCK *get_lock_data(THD *thd, TABLE **table_ptr, uint count, to=sql_lock->table=(TABLE**) (locks+tables); sql_lock->table_count=lock_count; sql_lock->lock_count=tables; + DBUG_PRINT("info", ("sql_lock->table_count %d sql_lock->lock_count %d", + sql_lock->table_count, sql_lock->lock_count)); for (i=0 ; i < count ; i++) { @@ -727,32 +735,35 @@ int lock_table_name(THD *thd, TABLE_LIST *table_list) DBUG_ENTER("lock_table_name"); DBUG_PRINT("enter",("db: %s name: %s", db, table_list->table_name)); - safe_mutex_assert_owner(&LOCK_open); - - key_length=(uint) (strmov(strmov(key,db)+1,table_list->table_name) - -key)+ 1; - + key_length= create_table_def_key(thd, key, table_list, 0); /* Only insert the table if we haven't insert it already */ for (table=(TABLE*) hash_first(&open_cache, (byte*)key, key_length, &state); table ; - table = (TABLE*) hash_next(&open_cache, (byte*)key, key_length, &state)) + table = (TABLE*) hash_next(&open_cache,(byte*) key,key_length, &state)) + { if (table->in_use == thd) + { + DBUG_PRINT("info", ("Table is in use")); + table->s->version= 0; // Ensure no one can use this + table->locked_by_name= 1; DBUG_RETURN(0); - + } + } /* Create a table entry with the right key and with an old refresh version Note that we must use my_malloc() here as this is freed by the table cache */ - if (!(table= (TABLE*) my_malloc(sizeof(*table)+key_length, - MYF(MY_WME | MY_ZEROFILL)))) + if (!(table= (TABLE*) my_malloc(sizeof(*table)+ sizeof(TABLE_SHARE)+ + key_length, MYF(MY_WME | MY_ZEROFILL)))) DBUG_RETURN(-1); - table->s= &table->share_not_to_be_used; - memcpy((table->s->table_cache_key= (char*) (table+1)), key, key_length); - table->s->db= table->s->table_cache_key; - table->s->key_length=key_length; - table->in_use=thd; + table->s= (TABLE_SHARE*) (table+1); + memcpy((table->s->table_cache_key.str= (char*) (table->s+1)), key, + key_length); + table->s->table_cache_key.length= key_length; + table->s->tmp_table= INTERNAL_TMP_TABLE; // for intern_close_table + table->in_use= thd; table->locked_by_name=1; table_list->table=table; @@ -782,8 +793,17 @@ static bool locked_named_table(THD *thd, TABLE_LIST *table_list) { for (; table_list ; table_list=table_list->next_local) { - if (table_list->table && table_is_used(table_list->table,0)) - return 1; + TABLE *table= table_list->table; + if (table) + { + TABLE *save_next= table->next; + bool result; + table->next= 0; + result= table_is_used(table_list->table, 0); + table->next= save_next; + if (result) + return 1; + } } return 0; // All tables are locked } @@ -793,6 +813,7 @@ bool wait_for_locked_table_names(THD *thd, TABLE_LIST *table_list) { bool result=0; DBUG_ENTER("wait_for_locked_table_names"); + safe_mutex_assert_owner(&LOCK_open); while (locked_named_table(thd,table_list)) @@ -802,7 +823,7 @@ bool wait_for_locked_table_names(THD *thd, TABLE_LIST *table_list) result=1; break; } - wait_for_refresh(thd); + wait_for_condition(thd, &LOCK_open, &COND_refresh); pthread_mutex_lock(&LOCK_open); } DBUG_RETURN(result); @@ -1154,5 +1175,3 @@ bool make_global_read_lock_block_commit(THD *thd) thd->exit_cond(old_message); // this unlocks LOCK_global_read_lock DBUG_RETURN(error); } - - diff --git a/sql/log.cc b/sql/log.cc index 6c37cb04c61..dd08ca7b9b5 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -24,6 +24,7 @@ #include "mysql_priv.h" #include "sql_repl.h" +#include "rpl_filter.h" #include <my_dir.h> #include <stdarg.h> @@ -46,11 +47,25 @@ static int binlog_commit(THD *thd, bool all); static int binlog_rollback(THD *thd, bool all); static int binlog_prepare(THD *thd, bool all); +/* + This is a POD. Please keep it that way! + + Don't add constructors, destructors, or virtual functions. +*/ +struct binlog_trx_data { + bool empty() const { + return pending == NULL && my_b_tell(&trans_log) == 0; + } + IO_CACHE trans_log; // The transaction cache + Rows_log_event *pending; // The pending binrows event +}; + handlerton binlog_hton = { + MYSQL_HANDLERTON_INTERFACE_VERSION, "binlog", SHOW_OPTION_YES, "This is a meta storage engine to represent the binlog in a transaction", - DB_TYPE_UNKNOWN, /* IGNORE for now */ + DB_TYPE_BINLOG, /* IGNORE for now */ binlog_init, 0, sizeof(my_off_t), /* savepoint size = binlog offset */ @@ -67,9 +82,16 @@ handlerton binlog_hton = { NULL, /* create_cursor_read_view */ NULL, /* set_cursor_read_view */ NULL, /* close_cursor_read_view */ - HTON_HIDDEN + NULL, /* Create a new handler */ + NULL, /* Drop a database */ + NULL, /* Panic call */ + NULL, /* Start Consistent Snapshot */ + NULL, /* Flush logs */ + NULL, /* Show status */ + HTON_NOT_USER_SELECTABLE | HTON_HIDDEN }; + /* this function is mostly a placeholder. conceptually, binlog initialization (now mostly done in MYSQL_LOG::open) @@ -83,19 +105,45 @@ bool binlog_init() static int binlog_close_connection(THD *thd) { - IO_CACHE *trans_log= (IO_CACHE*)thd->ha_data[binlog_hton.slot]; - DBUG_ASSERT(mysql_bin_log.is_open() && !my_b_tell(trans_log)); + binlog_trx_data *const trx_data= + (binlog_trx_data*) thd->ha_data[binlog_hton.slot]; + IO_CACHE *trans_log= &trx_data->trans_log; + DBUG_ASSERT(mysql_bin_log.is_open() && trx_data->empty()); close_cached_file(trans_log); - my_free((gptr)trans_log, MYF(0)); + thd->ha_data[binlog_hton.slot]= 0; + my_free((gptr)trx_data, MYF(0)); return 0; } -static int binlog_end_trans(THD *thd, IO_CACHE *trans_log, Log_event *end_ev) +static int +binlog_end_trans(THD *thd, binlog_trx_data *trx_data, Log_event *end_ev) { - int error=0; DBUG_ENTER("binlog_end_trans"); + int error=0; + IO_CACHE *trans_log= &trx_data->trans_log; + if (end_ev) + { + thd->binlog_flush_pending_rows_event(true); error= mysql_bin_log.write(thd, trans_log, end_ev); + } + else + { + thd->binlog_delete_pending_rows_event(); + } + + /* + We need to step the table map version both after writing the + entire transaction to the log file and after rolling back the + transaction. + + We need to step the table map version after writing the + transaction cache to disk. In addition, we need to step the table + map version on a rollback to ensure that a new table map event is + generated instead of the one that was written to the thrown-away + transaction cache. + */ + ++mysql_bin_log.m_table_map_version; statistic_increment(binlog_cache_use, &LOCK_status); if (trans_log->disk_writes != 0) @@ -121,32 +169,36 @@ static int binlog_prepare(THD *thd, bool all) static int binlog_commit(THD *thd, bool all) { - IO_CACHE *trans_log= (IO_CACHE*)thd->ha_data[binlog_hton.slot]; DBUG_ENTER("binlog_commit"); + binlog_trx_data *const trx_data= + (binlog_trx_data*) thd->ha_data[binlog_hton.slot]; + IO_CACHE *trans_log= &trx_data->trans_log; DBUG_ASSERT(mysql_bin_log.is_open() && (all || !(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))); - if (!my_b_tell(trans_log)) + if (trx_data->empty()) { // we're here because trans_log was flushed in MYSQL_LOG::log() DBUG_RETURN(0); } Query_log_event qev(thd, STRING_WITH_LEN("COMMIT"), TRUE, FALSE); - DBUG_RETURN(binlog_end_trans(thd, trans_log, &qev)); + DBUG_RETURN(binlog_end_trans(thd, trx_data, &qev)); } static int binlog_rollback(THD *thd, bool all) { - int error=0; - IO_CACHE *trans_log= (IO_CACHE*)thd->ha_data[binlog_hton.slot]; DBUG_ENTER("binlog_rollback"); + int error=0; + binlog_trx_data *const trx_data= + (binlog_trx_data*) thd->ha_data[binlog_hton.slot]; + IO_CACHE *trans_log= &trx_data->trans_log; /* First assert is guaranteed - see trans_register_ha() call below. The second must be true. If it is not, we're registering unnecessary, doing extra work. The cause should be found and eliminated */ DBUG_ASSERT(all || !(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))); - DBUG_ASSERT(mysql_bin_log.is_open() && my_b_tell(trans_log)); + DBUG_ASSERT(mysql_bin_log.is_open() && !trx_data->empty()); /* Update the binary log with a BEGIN/ROLLBACK block if we have cached some queries and we updated some non-transactional @@ -156,10 +208,10 @@ static int binlog_rollback(THD *thd, bool all) if (unlikely(thd->options & OPTION_STATUS_NO_TRANS_UPDATE)) { Query_log_event qev(thd, STRING_WITH_LEN("ROLLBACK"), TRUE, FALSE); - error= binlog_end_trans(thd, trans_log, &qev); + error= binlog_end_trans(thd, trx_data, &qev); } else - error= binlog_end_trans(thd, trans_log, 0); + error= binlog_end_trans(thd, trx_data, 0); DBUG_RETURN(error); } @@ -186,20 +238,26 @@ static int binlog_rollback(THD *thd, bool all) static int binlog_savepoint_set(THD *thd, void *sv) { - IO_CACHE *trans_log= (IO_CACHE*)thd->ha_data[binlog_hton.slot]; DBUG_ENTER("binlog_savepoint_set"); - DBUG_ASSERT(mysql_bin_log.is_open() && my_b_tell(trans_log)); + binlog_trx_data *const trx_data= + (binlog_trx_data*) thd->ha_data[binlog_hton.slot]; + DBUG_ASSERT(mysql_bin_log.is_open() && my_b_tell(&trx_data->trans_log)); - *(my_off_t *)sv= my_b_tell(trans_log); + *(my_off_t *)sv= my_b_tell(&trx_data->trans_log); /* Write it to the binary log */ - Query_log_event qinfo(thd, thd->query, thd->query_length, TRUE, FALSE); - DBUG_RETURN(mysql_bin_log.write(&qinfo)); + + int const error= + thd->binlog_query(THD::STMT_QUERY_TYPE, + thd->query, thd->query_length, TRUE, FALSE); + DBUG_RETURN(error); } static int binlog_savepoint_rollback(THD *thd, void *sv) { - IO_CACHE *trans_log= (IO_CACHE*)thd->ha_data[binlog_hton.slot]; DBUG_ENTER("binlog_savepoint_rollback"); + binlog_trx_data *const trx_data= + (binlog_trx_data*) thd->ha_data[binlog_hton.slot]; + IO_CACHE *trans_log= &trx_data->trans_log; DBUG_ASSERT(mysql_bin_log.is_open() && my_b_tell(trans_log)); /* @@ -209,8 +267,10 @@ static int binlog_savepoint_rollback(THD *thd, void *sv) */ if (unlikely(thd->options & OPTION_STATUS_NO_TRANS_UPDATE)) { - Query_log_event qinfo(thd, thd->query, thd->query_length, TRUE, FALSE); - DBUG_RETURN(mysql_bin_log.write(&qinfo)); + int const error= + thd->binlog_query(THD::STMT_QUERY_TYPE, + thd->query, thd->query_length, TRUE, FALSE); + DBUG_RETURN(error); } reinit_io_cache(trans_log, WRITE_CACHE, *(my_off_t *)sv, 0, 0); DBUG_RETURN(0); @@ -358,6 +418,7 @@ MYSQL_LOG::MYSQL_LOG() :bytes_written(0), last_time(0), query_start(0), name(0), prepared_xids(0), log_type(LOG_CLOSED), file_id(1), open_count(1), write_error(FALSE), inited(FALSE), need_start_event(TRUE), + m_table_map_version(0), description_event_for_exec(0), description_event_for_queue(0) { /* @@ -1354,7 +1415,7 @@ void MYSQL_LOG::new_file(bool need_lock) to change base names at some point. */ THD *thd = current_thd; /* may be 0 if we are reacting to SIGHUP */ - Rotate_log_event r(thd,new_name+dirname_length(new_name), + Rotate_log_event r(new_name+dirname_length(new_name), 0, LOG_EVENT_OFFSET, 0); r.write(&log_file); bytes_written += r.data_written; @@ -1580,6 +1641,162 @@ bool MYSQL_LOG::is_query_in_union(THD *thd, query_id_t query_id_param) query_id_param >= thd->binlog_evt_union.first_query_id); } + +/* + These functions are placed in this file since they need access to + binlog_hton, which has internal linkage. +*/ + +int THD::binlog_setup_trx_data() +{ + DBUG_ENTER("THD::binlog_setup_trx_data"); + binlog_trx_data *trx_data= + (binlog_trx_data*) ha_data[binlog_hton.slot]; + + if (trx_data) + DBUG_RETURN(0); // Already set up + + ha_data[binlog_hton.slot]= trx_data= + (binlog_trx_data*) my_malloc(sizeof(binlog_trx_data), MYF(MY_ZEROFILL)); + if (!trx_data || + open_cached_file(&trx_data->trans_log, mysql_tmpdir, + LOG_PREFIX, binlog_cache_size, MYF(MY_WME))) + { + my_free((gptr)trx_data, MYF(MY_ALLOW_ZERO_PTR)); + ha_data[binlog_hton.slot]= 0; + DBUG_RETURN(1); // Didn't manage to set it up + } + trx_data->trans_log.end_of_file= max_binlog_cache_size; + DBUG_RETURN(0); +} + +Rows_log_event* +THD::binlog_get_pending_rows_event() const +{ + binlog_trx_data *const trx_data= + (binlog_trx_data*) ha_data[binlog_hton.slot]; + /* + This is less than ideal, but here's the story: If there is no + trx_data, prepare_pending_rows_event() has never been called + (since the trx_data is set up there). In that case, we just return + NULL. + */ + return trx_data ? trx_data->pending : NULL; +} + +void +THD::binlog_set_pending_rows_event(Rows_log_event* ev) +{ + binlog_trx_data *const trx_data= + (binlog_trx_data*) ha_data[binlog_hton.slot]; + DBUG_ASSERT(trx_data); + trx_data->pending= ev; +} + + +/* + Moves the last bunch of rows from the pending Rows event to the binlog + (either cached binlog if transaction, or disk binlog). Sets a new pending + event. +*/ +int MYSQL_LOG::flush_and_set_pending_rows_event(THD *thd, Rows_log_event* event) +{ + DBUG_ENTER("MYSQL_LOG::flush_and_set_pending_rows_event(event)"); + DBUG_ASSERT(binlog_row_based && mysql_bin_log.is_open()); + DBUG_PRINT("enter", ("event=%p", event)); + + int error= 0; + + binlog_trx_data *const trx_data= + (binlog_trx_data*) thd->ha_data[binlog_hton.slot]; + + DBUG_ASSERT(trx_data); + + if (Rows_log_event* pending= trx_data->pending) + { + IO_CACHE *file= &log_file; + + /* + Decide if we should write to the log file directly or to the + transaction log. + */ + if (pending->get_cache_stmt() || my_b_tell(&trx_data->trans_log)) + file= &trx_data->trans_log; + + /* + If we are writing to the log file directly, we could avoid + locking the log. This does not work since we need to step the + m_table_map_version below, and that change has to be protected + by the LOCK_log mutex. + */ + pthread_mutex_lock(&LOCK_log); + + /* + Write a table map if necessary + */ + if (pending->maybe_write_table_map(thd, file, this)) + { + pthread_mutex_unlock(&LOCK_log); + DBUG_RETURN(2); + } + + /* + Write pending event to log file or transaction cache + */ + if (pending->write(file)) + { + pthread_mutex_unlock(&LOCK_log); + DBUG_RETURN(1); + } + + /* + We step the table map version if we are writing an event + representing the end of a statement. We do this regardless of + wheather we write to the transaction cache or to directly to the + file. + + In an ideal world, we could avoid stepping the table map version + if we were writing to a transaction cache, since we could then + reuse the table map that was written earlier in the transaction + cache. This does not work since STMT_END_F implies closing all + table mappings on the slave side. + + TODO: Find a solution so that table maps does not have to be + written several times within a transaction. + */ + if (pending->get_flags(Rows_log_event::STMT_END_F)) + ++m_table_map_version; + + delete pending; + + if (file == &log_file) + { + error= flush_and_sync(); + if (!error) + { + signal_update(); + rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED); + } + } + + pthread_mutex_unlock(&LOCK_log); + } + else if (event && event->get_cache_stmt()) /* && pending == 0 */ + { + /* + If we are setting a non-null event for a table that is + transactional, we start a transaction here as well. + */ + trans_register_ha(thd, + thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN), + &binlog_hton); + } + + trx_data->pending= event; + + DBUG_RETURN(error); +} + /* Write an event to the binary log */ @@ -1600,7 +1817,29 @@ bool MYSQL_LOG::write(Log_event *event_info) thd->binlog_evt_union.unioned_events_trans |= event_info->cache_stmt; DBUG_RETURN(0); } - + + /* + Flush the pending rows event to the transaction cache or to the + log file. Since this function potentially aquire the LOCK_log + mutex, we do this before aquiring the LOCK_log mutex in this + function. + + This is not optimal, but necessary in the current implementation + since there is code that writes rows to system tables without + using some way to flush the pending event (e.g., binlog_query()). + + TODO: There shall be no writes to any system table after calling + binlog_query(), so these writes has to be moved to before the call + of binlog_query() for correct functioning. + + This is necessesary not only for RBR, but the master might crash + after binlogging the query but before changing the system tables. + This means that the slave and the master are not in the same state + (after the master has restarted), so therefore we have to + eliminate this problem. + */ + thd->binlog_flush_pending_rows_event(true); + pthread_mutex_lock(&LOCK_log); /* @@ -1619,10 +1858,11 @@ bool MYSQL_LOG::write(Log_event *event_info) binlog_[wild_]{do|ignore}_table?" (WL#1049)" */ if ((thd && !(thd->options & OPTION_BIN_LOG)) || - (!db_ok(local_db, binlog_do_db, binlog_ignore_db))) + (!binlog_filter->db_ok(local_db))) { VOID(pthread_mutex_unlock(&LOCK_log)); - DBUG_PRINT("error",("!db_ok('%s')", local_db)); + DBUG_PRINT("info",("db_ok('%s')==%d", local_db, + binlog_filter->db_ok(local_db))); DBUG_RETURN(0); } #endif /* HAVE_REPLICATION */ @@ -1639,37 +1879,26 @@ bool MYSQL_LOG::write(Log_event *event_info) */ if (opt_using_transactions && thd) { - IO_CACHE *trans_log= (IO_CACHE*)thd->ha_data[binlog_hton.slot]; + if (thd->binlog_setup_trx_data()) + goto err; - if (event_info->get_cache_stmt()) - { - if (!trans_log) - { - thd->ha_data[binlog_hton.slot]= trans_log= (IO_CACHE *) - my_malloc(sizeof(IO_CACHE), MYF(MY_ZEROFILL)); - if (!trans_log || open_cached_file(trans_log, mysql_tmpdir, - LOG_PREFIX, - binlog_cache_size, MYF(MY_WME))) - { - my_free((gptr)trans_log, MYF(MY_ALLOW_ZERO_PTR)); - thd->ha_data[binlog_hton.slot]= trans_log= 0; - goto err; - } - trans_log->end_of_file= max_binlog_cache_size; - trans_register_ha(thd, - thd->options & (OPTION_NOT_AUTOCOMMIT | - OPTION_BEGIN), - &binlog_hton); - } - else if (!my_b_tell(trans_log)) - trans_register_ha(thd, - thd->options & (OPTION_NOT_AUTOCOMMIT | - OPTION_BEGIN), - &binlog_hton); - file= trans_log; - } - else if (trans_log && my_b_tell(trans_log)) + binlog_trx_data *const trx_data= + (binlog_trx_data*) thd->ha_data[binlog_hton.slot]; + IO_CACHE *trans_log= &trx_data->trans_log; + + if (event_info->get_cache_stmt() && !my_b_tell(trans_log)) + trans_register_ha(thd, + thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN), + &binlog_hton); + + if (event_info->get_cache_stmt() || my_b_tell(trans_log)) file= trans_log; + /* + Note: as Mats suggested, for all the cases above where we write to + trans_log, it sounds unnecessary to lock LOCK_log. We should rather + test first if we want to write to trans_log, and if not, lock + LOCK_log. TODO. + */ } #endif DBUG_PRINT("info",("event type=%d",event_info->get_type_code())); @@ -1684,42 +1913,49 @@ bool MYSQL_LOG::write(Log_event *event_info) of the SQL command */ + /* + If row-based binlogging, Insert_id, Rand and other kind of "setting + context" events are not needed. + */ if (thd) { - if (thd->last_insert_id_used) - { - Intvar_log_event e(thd,(uchar) LAST_INSERT_ID_EVENT, - thd->current_insert_id); - if (e.write(file)) - goto err; - } - if (thd->insert_id_used) - { - Intvar_log_event e(thd,(uchar) INSERT_ID_EVENT,thd->last_insert_id); - if (e.write(file)) - goto err; - } - if (thd->rand_used) + if (!binlog_row_based) { - Rand_log_event e(thd,thd->rand_saved_seed1,thd->rand_saved_seed2); - if (e.write(file)) - goto err; - } - if (thd->user_var_events.elements) - { - for (uint i= 0; i < thd->user_var_events.elements; i++) - { - BINLOG_USER_VAR_EVENT *user_var_event; - get_dynamic(&thd->user_var_events,(gptr) &user_var_event, i); - User_var_log_event e(thd, user_var_event->user_var_event->name.str, - user_var_event->user_var_event->name.length, - user_var_event->value, - user_var_event->length, - user_var_event->type, - user_var_event->charset_number); - if (e.write(file)) - goto err; - } + if (thd->last_insert_id_used) + { + Intvar_log_event e(thd,(uchar) LAST_INSERT_ID_EVENT, + thd->current_insert_id); + if (e.write(file)) + goto err; + } + if (thd->insert_id_used) + { + Intvar_log_event e(thd,(uchar) INSERT_ID_EVENT,thd->last_insert_id); + if (e.write(file)) + goto err; + } + if (thd->rand_used) + { + Rand_log_event e(thd,thd->rand_saved_seed1,thd->rand_saved_seed2); + if (e.write(file)) + goto err; + } + if (thd->user_var_events.elements) + { + for (uint i= 0; i < thd->user_var_events.elements; i++) + { + BINLOG_USER_VAR_EVENT *user_var_event; + get_dynamic(&thd->user_var_events,(gptr) &user_var_event, i); + User_var_log_event e(thd, user_var_event->user_var_event->name.str, + user_var_event->user_var_event->name.length, + user_var_event->value, + user_var_event->length, + user_var_event->type, + user_var_event->charset_number); + if (e.write(file)) + goto err; + } + } } } @@ -1750,6 +1986,9 @@ err: } } + if (event_info->flags & LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F) + ++m_table_map_version; + pthread_mutex_unlock(&LOCK_log); DBUG_RETURN(error); } @@ -2297,6 +2536,44 @@ void MYSQL_LOG::signal_update() DBUG_VOID_RETURN; } +#ifndef MYSQL_CLIENT +bool MYSQL_LOG::write_table_map(THD *thd, IO_CACHE *file, TABLE* table, + bool is_transactional) +{ + DBUG_ENTER("MYSQL_LOG::write_table_map()"); + DBUG_PRINT("enter", ("table=%p (%s: %u)", + table, table->s->table_name, table->s->table_map_id)); + + /* Pre-conditions */ + DBUG_ASSERT(binlog_row_based && is_open()); + DBUG_ASSERT(table->s->table_map_id != ULONG_MAX); + +#ifndef DBUG_OFF + /* + We only need to execute under the LOCK_log mutex if we are writing + to the log file; otherwise, we are writing to a thread-specific + transaction cache and there is no need to serialize this event + with events in other threads. + */ + if (file == &log_file) + safe_mutex_assert_owner(&LOCK_log); +#endif + + Table_map_log_event::flag_set const + flags= Table_map_log_event::NO_FLAGS; + + Table_map_log_event + the_event(thd, table, table->s->table_map_id, is_transactional, flags); + + if (the_event.write(file)) + DBUG_RETURN(1); + + table->s->table_map_version= m_table_map_version; + DBUG_RETURN(0); +} +#endif /* !defined(MYSQL_CLIENT) */ + + #ifdef __NT__ void print_buffer_to_nt_eventlog(enum loglevel level, char *buff, uint length, int buffLen) @@ -3003,9 +3280,11 @@ void TC_LOG_BINLOG::close() */ int TC_LOG_BINLOG::log(THD *thd, my_xid xid) { + DBUG_ENTER("TC_LOG_BINLOG::log"); Xid_log_event xle(thd, xid); - IO_CACHE *trans_log= (IO_CACHE*)thd->ha_data[binlog_hton.slot]; - return !binlog_end_trans(thd, trans_log, &xle); // invert return value + binlog_trx_data *trx_data= + (binlog_trx_data*) thd->ha_data[binlog_hton.slot]; + DBUG_RETURN(!binlog_end_trans(thd, trx_data, &xle)); // invert return value } void TC_LOG_BINLOG::unlog(ulong cookie, my_xid xid) diff --git a/sql/log.h b/sql/log.h new file mode 100644 index 00000000000..ea2946c2d86 --- /dev/null +++ b/sql/log.h @@ -0,0 +1,332 @@ +/* Copyright (C) 2005 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef LOG_H +#define LOG_H + +struct st_relay_log_info; + +class Format_description_log_event; + +/* + Transaction Coordinator log - a base abstract class + for two different implementations +*/ +class TC_LOG +{ + public: + int using_heuristic_recover(); + TC_LOG() {} + virtual ~TC_LOG() {} + + virtual int open(const char *opt_name)=0; + virtual void close()=0; + virtual int log(THD *thd, my_xid xid)=0; + virtual void unlog(ulong cookie, my_xid xid)=0; +}; + +class TC_LOG_DUMMY: public TC_LOG // use it to disable the logging +{ + public: + int open(const char *opt_name) { return 0; } + void close() { } + int log(THD *thd, my_xid xid) { return 1; } + void unlog(ulong cookie, my_xid xid) { } +}; + +#ifdef HAVE_MMAP +class TC_LOG_MMAP: public TC_LOG +{ + public: // only to keep Sun Forte on sol9x86 happy + typedef enum { + POOL, // page is in pool + ERROR, // last sync failed + DIRTY // new xids added since last sync + } PAGE_STATE; + + private: + typedef struct st_page { + struct st_page *next; // page a linked in a fifo queue + my_xid *start, *end; // usable area of a page + my_xid *ptr; // next xid will be written here + int size, free; // max and current number of free xid slots on the page + int waiters; // number of waiters on condition + PAGE_STATE state; // see above + pthread_mutex_t lock; // to access page data or control structure + pthread_cond_t cond; // to wait for a sync + } PAGE; + + char logname[FN_REFLEN]; + File fd; + my_off_t file_length; + uint npages, inited; + uchar *data; + struct st_page *pages, *syncing, *active, *pool, *pool_last; + /* + note that, e.g. LOCK_active is only used to protect + 'active' pointer, to protect the content of the active page + one has to use active->lock. + Same for LOCK_pool and LOCK_sync + */ + pthread_mutex_t LOCK_active, LOCK_pool, LOCK_sync; + pthread_cond_t COND_pool, COND_active; + + public: + TC_LOG_MMAP(): inited(0) {} + int open(const char *opt_name); + void close(); + int log(THD *thd, my_xid xid); + void unlog(ulong cookie, my_xid xid); + int recover(); + + private: + void get_active_from_pool(); + int sync(); + int overflow(); +}; +#else +#define TC_LOG_MMAP TC_LOG_DUMMY +#endif + +extern TC_LOG *tc_log; +extern TC_LOG_MMAP tc_log_mmap; +extern TC_LOG_DUMMY tc_log_dummy; + +/* log info errors */ +#define LOG_INFO_EOF -1 +#define LOG_INFO_IO -2 +#define LOG_INFO_INVALID -3 +#define LOG_INFO_SEEK -4 +#define LOG_INFO_MEM -6 +#define LOG_INFO_FATAL -7 +#define LOG_INFO_IN_USE -8 + +/* bitmap to SQL_LOG::close() */ +#define LOG_CLOSE_INDEX 1 +#define LOG_CLOSE_TO_BE_OPENED 2 +#define LOG_CLOSE_STOP_EVENT 4 + +struct st_relay_log_info; + +typedef struct st_log_info +{ + char log_file_name[FN_REFLEN]; + my_off_t index_file_offset, index_file_start_offset; + my_off_t pos; + bool fatal; // if the purge happens to give us a negative offset + pthread_mutex_t lock; + st_log_info():fatal(0) { pthread_mutex_init(&lock, MY_MUTEX_INIT_FAST);} + ~st_log_info() { pthread_mutex_destroy(&lock);} +} LOG_INFO; + +class Log_event; +class Rows_log_event; + +enum enum_log_type { LOG_CLOSED, LOG_TO_BE_OPENED, LOG_NORMAL, LOG_NEW, LOG_BIN}; + +/* + TODO split MYSQL_LOG into base MYSQL_LOG and + MYSQL_QUERY_LOG, MYSQL_SLOW_LOG, MYSQL_BIN_LOG + most of the code from MYSQL_LOG should be in the MYSQL_BIN_LOG + only (TC_LOG included) + + TODO use mmap instead of IO_CACHE for binlog + (mmap+fsync is two times faster than write+fsync) +*/ + +class MYSQL_LOG: public TC_LOG +{ + private: + /* LOCK_log and LOCK_index are inited by init_pthread_objects() */ + pthread_mutex_t LOCK_log, LOCK_index; + pthread_mutex_t LOCK_prep_xids; + pthread_cond_t COND_prep_xids; + pthread_cond_t update_cond; + ulonglong bytes_written; + time_t last_time,query_start; + IO_CACHE log_file; + IO_CACHE index_file; + char *name; + char time_buff[20],db[NAME_LEN+1]; + char log_file_name[FN_REFLEN],index_file_name[FN_REFLEN]; + /* + The max size before rotation (usable only if log_type == LOG_BIN: binary + logs and relay logs). + For a binlog, max_size should be max_binlog_size. + For a relay log, it should be max_relay_log_size if this is non-zero, + max_binlog_size otherwise. + max_size is set in init(), and dynamically changed (when one does SET + GLOBAL MAX_BINLOG_SIZE|MAX_RELAY_LOG_SIZE) by fix_max_binlog_size and + fix_max_relay_log_size). + */ + ulong max_size; + ulong prepared_xids; /* for tc log - number of xids to remember */ + volatile enum_log_type log_type; + enum cache_type io_cache_type; + // current file sequence number for load data infile binary logging + uint file_id; + uint open_count; // For replication + int readers_count; + bool write_error, inited; + bool need_start_event; + /* + no_auto_events means we don't want any of these automatic events : + Start/Rotate/Stop. That is, in 4.x when we rotate a relay log, we don't + want a Rotate_log event to be written to the relay log. When we start a + relay log etc. So in 4.x this is 1 for relay logs, 0 for binlogs. + In 5.0 it's 0 for relay logs too! + */ + bool no_auto_events; + friend class Log_event; + +public: + ulonglong m_table_map_version; + + /* + These describe the log's format. This is used only for relay logs. + _for_exec is used by the SQL thread, _for_queue by the I/O thread. It's + necessary to have 2 distinct objects, because the I/O thread may be reading + events in a different format from what the SQL thread is reading (consider + the case of a master which has been upgraded from 5.0 to 5.1 without doing + RESET MASTER, or from 4.x to 5.0). + */ + Format_description_log_event *description_event_for_exec, + *description_event_for_queue; + + MYSQL_LOG(); + /* + note that there's no destructor ~MYSQL_LOG() ! + The reason is that we don't want it to be automatically called + on exit() - but only during the correct shutdown process + */ + + int open(const char *opt_name); + void close(); + int log(THD *thd, my_xid xid); + void unlog(ulong cookie, my_xid xid); + int recover(IO_CACHE *log, Format_description_log_event *fdle); +#if !defined(MYSQL_CLIENT) + bool is_table_mapped(TABLE *table) const + { + return table->s->table_map_version == m_table_map_version; + } + + int flush_and_set_pending_rows_event(THD *thd, Rows_log_event* event); + +#endif /* !defined(MYSQL_CLIENT) */ + void reset_bytes_written() + { + bytes_written = 0; + } + void harvest_bytes_written(ulonglong* counter) + { +#ifndef DBUG_OFF + char buf1[22],buf2[22]; +#endif + DBUG_ENTER("harvest_bytes_written"); + (*counter)+=bytes_written; + DBUG_PRINT("info",("counter: %s bytes_written: %s", llstr(*counter,buf1), + llstr(bytes_written,buf2))); + bytes_written=0; + DBUG_VOID_RETURN; + } + void set_max_size(ulong max_size_arg); + void signal_update(); + void wait_for_update(THD* thd, bool master_or_slave); + void set_need_start_event() { need_start_event = 1; } + void init(enum_log_type log_type_arg, + enum cache_type io_cache_type_arg, + bool no_auto_events_arg, ulong max_size); + void init_pthread_objects(); + void cleanup(); + bool open(const char *log_name, + enum_log_type log_type, + const char *new_name, + enum cache_type io_cache_type_arg, + bool no_auto_events_arg, ulong max_size, + bool null_created); + const char *generate_name(const char *log_name, const char *suffix, + bool strip_ext, char *buff); + /* simplified open_xxx wrappers for the gigantic open above */ + bool open_query_log(const char *log_name) + { + char buf[FN_REFLEN]; + return open(generate_name(log_name, ".log", 0, buf), + LOG_NORMAL, 0, WRITE_CACHE, 0, 0, 0); + } + bool open_slow_log(const char *log_name) + { + char buf[FN_REFLEN]; + return open(generate_name(log_name, "-slow.log", 0, buf), + LOG_NORMAL, 0, WRITE_CACHE, 0, 0, 0); + } + bool open_index_file(const char *index_file_name_arg, + const char *log_name); + void new_file(bool need_lock); + bool write(THD *thd, enum enum_server_command command, + const char *format,...); + bool write(THD *thd, const char *query, uint query_length, + time_t query_start=0); + bool write(Log_event* event_info); // binary log write + bool write(THD *thd, IO_CACHE *cache, Log_event *commit_event); + + bool write_table_map(THD *thd, IO_CACHE *cache, TABLE *table, bool is_trans); + + void start_union_events(THD *thd); + void stop_union_events(THD *thd); + bool is_query_in_union(THD *thd, query_id_t query_id_param); + + /* + v stands for vector + invoked as appendv(buf1,len1,buf2,len2,...,bufn,lenn,0) + */ + bool appendv(const char* buf,uint len,...); + bool append(Log_event* ev); + + int generate_new_name(char *new_name,const char *old_name); + void make_log_name(char* buf, const char* log_ident); + bool is_active(const char* log_file_name); + int update_log_index(LOG_INFO* linfo, bool need_update_threads); + void rotate_and_purge(uint flags); + bool flush_and_sync(); + int purge_logs(const char *to_log, bool included, + bool need_mutex, bool need_update_threads, + ulonglong *decrease_log_space); + int purge_logs_before_date(time_t purge_time); + int purge_first_log(struct st_relay_log_info* rli, bool included); + bool reset_logs(THD* thd); + void close(uint exiting); + + // iterating through the log index file + int find_log_pos(LOG_INFO* linfo, const char* log_name, + bool need_mutex); + int find_next_log(LOG_INFO* linfo, bool need_mutex); + int get_current_log(LOG_INFO* linfo); + uint next_file_id(); + inline bool is_open() { return log_type != LOG_CLOSED; } + inline char* get_index_fname() { return index_file_name;} + inline char* get_log_fname() { return log_file_name; } + inline char* get_name() { return name; } + inline pthread_mutex_t* get_log_lock() { return &LOCK_log; } + inline IO_CACHE* get_log_file() { return &log_file; } + + inline void lock_index() { pthread_mutex_lock(&LOCK_index);} + inline void unlock_index() { pthread_mutex_unlock(&LOCK_index);} + inline IO_CACHE *get_index_file() { return &index_file;} + inline uint32 get_open_count() { return open_count; } +}; + +#endif /* LOG_H */ diff --git a/sql/log_event.cc b/sql/log_event.cc index 519b077b17b..eb61cd1f407 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -21,10 +21,14 @@ #pragma implementation // gcc: Class implementation #endif -#include "mysql_priv.h" +#include "mysql_priv.h" #include "slave.h" +#include "rpl_filter.h" #include <my_dir.h> #endif /* MYSQL_CLIENT */ +#include <base64.h> +#include <my_bitmap.h> +#include <my_vle.h> #define log_cs &my_charset_latin1 @@ -244,6 +248,7 @@ char *str_to_hex(char *to, const char *from, uint len) commands just before it prints a query. */ +#ifdef MYSQL_CLIENT static void print_set_option(FILE* file, uint32 bits_changed, uint32 option, uint32 flags, const char* name, bool* need_comma) { @@ -255,6 +260,7 @@ static void print_set_option(FILE* file, uint32 bits_changed, uint32 option, *need_comma= 1; } } +#endif /************************************************************************** Log_event methods (= the parent class of all events) @@ -283,6 +289,10 @@ const char* Log_event::get_type_str() case XID_EVENT: return "Xid"; case USER_VAR_EVENT: return "User var"; case FORMAT_DESCRIPTION_EVENT: return "Format_desc"; + case TABLE_MAP_EVENT: return "Table_map"; + case WRITE_ROWS_EVENT: return "Write_rows"; + case UPDATE_ROWS_EVENT: return "Update_rows"; + case DELETE_ROWS_EVENT: return "Delete_rows"; case BEGIN_LOAD_QUERY_EVENT: return "Begin_load_query"; case EXECUTE_LOAD_QUERY_EVENT: return "Execute_load_query"; default: return "Unknown"; /* impossible */ @@ -790,6 +800,9 @@ Log_event* Log_event::read_log_event(const char* buf, uint event_len, DBUG_RETURN(NULL); // general sanity check - will fail on a partial read } + /* To check the integrity of the Log_event_type enumeration */ + DBUG_ASSERT(buf[EVENT_TYPE_OFFSET] < ENUM_END_EVENT); + switch(buf[EVENT_TYPE_OFFSET]) { case QUERY_EVENT: ev = new Query_log_event(buf, event_len, description_event, QUERY_EVENT); @@ -841,6 +854,20 @@ Log_event* Log_event::read_log_event(const char* buf, uint event_len, case FORMAT_DESCRIPTION_EVENT: ev = new Format_description_log_event(buf, event_len, description_event); break; +#if defined(HAVE_REPLICATION) && defined(HAVE_ROW_BASED_REPLICATION) + case WRITE_ROWS_EVENT: + ev = new Write_rows_log_event(buf, event_len, description_event); + break; + case UPDATE_ROWS_EVENT: + ev = new Update_rows_log_event(buf, event_len, description_event); + break; + case DELETE_ROWS_EVENT: + ev = new Delete_rows_log_event(buf, event_len, description_event); + break; + case TABLE_MAP_EVENT: + ev = new Table_map_log_event(buf, event_len, description_event); + break; +#endif case BEGIN_LOAD_QUERY_EVENT: ev = new Begin_load_query_log_event(buf, event_len, description_event); break; @@ -960,6 +987,24 @@ void Log_event::print_header(FILE* file, PRINT_EVENT_INFO* print_event_info) } +void Log_event::print_base64(FILE* file, PRINT_EVENT_INFO* print_event_info) +{ + uchar *ptr= (uchar*)temp_buf; + my_off_t size= uint4korr(ptr + EVENT_LEN_OFFSET); + + char *tmp_str= + (char *) my_malloc(base64_needed_encoded_length(size), MYF(MY_WME)); + if (!tmp_str) { + fprintf(stderr, "\nError: Out of memory. " + "Could not print correct binlog event.\n"); + return; + } + int res= base64_encode(ptr, size, tmp_str); + fprintf(file, "\nBINLOG '\n%s\n';\n", tmp_str); + my_free(tmp_str, MYF(0)); +} + + /* Log_event::print_timestamp() */ @@ -1604,7 +1649,7 @@ int Query_log_event::exec_event(struct st_relay_log_info* rli, const char *query */ thd->catalog= catalog_len ? (char *) catalog : (char *)""; thd->db_length= db_len; - thd->db= (char*) rewrite_db(db, &thd->db_length); + thd->db= (char *) rpl_filter->get_rewrite_db(db, &thd->db_length); thd->variables.auto_increment_increment= auto_increment_increment; thd->variables.auto_increment_offset= auto_increment_offset; @@ -1633,7 +1678,7 @@ int Query_log_event::exec_event(struct st_relay_log_info* rli, const char *query ::exec_event(), then the companion SET also have so we don't need to reset_one_shot_variables(). */ - if (db_ok(thd->db, replicate_do_db, replicate_ignore_db)) + if (rpl_filter->db_ok(thd->db)) { thd->set_time((time_t)when); thd->query_length= q_len_arg; @@ -1722,7 +1767,7 @@ int Query_log_event::exec_event(struct st_relay_log_info* rli, const char *query clear_all_errors(thd, rli); /* Can ignore query */ else { - slave_print_error(rli,expected_error, + slave_print_msg(ERROR_LEVEL, rli, expected_error, "\ Query partially completed on the master (error on master: %d) \ and was aborted. There is a chance that your master is inconsistent at this \ @@ -1751,16 +1796,16 @@ compare_errors: !ignored_error_code(actual_error) && !ignored_error_code(expected_error)) { - slave_print_error(rli, 0, - "\ -Query caused different errors on master and slave. \ + slave_print_msg(ERROR_LEVEL, rli, 0, + "\ +Query caused different errors on master and slave. \ Error on master: '%s' (%d), Error on slave: '%s' (%d). \ Default database: '%s'. Query: '%s'", - ER_SAFE(expected_error), - expected_error, - actual_error ? thd->net.last_error: "no error", - actual_error, - print_slave_db_safe(db), query_arg); + ER_SAFE(expected_error), + expected_error, + actual_error ? thd->net.last_error: "no error", + actual_error, + print_slave_db_safe(db), query_arg); thd->query_error= 1; } /* @@ -1777,11 +1822,11 @@ Default database: '%s'. Query: '%s'", */ else if (thd->query_error || thd->is_fatal_error) { - slave_print_error(rli,actual_error, - "Error '%s' on query. Default database: '%s'. Query: '%s'", - (actual_error ? thd->net.last_error : - "unexpected success or fatal error"), - print_slave_db_safe(thd->db), query_arg); + slave_print_msg(ERROR_LEVEL, rli, actual_error, + "Error '%s' on query. Default database: '%s'. Query: '%s'", + (actual_error ? thd->net.last_error : + "unexpected success or fatal error"), + print_slave_db_safe(thd->db), query_arg); thd->query_error= 1; } @@ -2063,6 +2108,25 @@ Format_description_log_event(uint8 binlog_ver, const char* server_ver) post_header_len[DELETE_FILE_EVENT-1]= DELETE_FILE_HEADER_LEN; post_header_len[NEW_LOAD_EVENT-1]= post_header_len[LOAD_EVENT-1]; post_header_len[FORMAT_DESCRIPTION_EVENT-1]= FORMAT_DESCRIPTION_HEADER_LEN; + post_header_len[TABLE_MAP_EVENT-1]= TABLE_MAP_HEADER_LEN; + post_header_len[WRITE_ROWS_EVENT-1]= ROWS_HEADER_LEN; + post_header_len[UPDATE_ROWS_EVENT-1]= ROWS_HEADER_LEN; + post_header_len[DELETE_ROWS_EVENT-1]= ROWS_HEADER_LEN; + /* + We here have the possibility to simulate a master of before we changed + the table map id to be stored in 6 bytes: when it was stored in 4 + bytes (=> post_header_len was 6). This is used to test backward + compatibility. + This code can be removed after a few months (today is Dec 21st 2005), + when we know that the 4-byte masters are not deployed anymore (check + with Tomas Ulin first!), and the accompanying test (rpl_row_4_bytes) + too. + */ + DBUG_EXECUTE_IF("old_row_based_repl_4_byte_map_id_master", + post_header_len[TABLE_MAP_EVENT-1]= + post_header_len[WRITE_ROWS_EVENT-1]= + post_header_len[UPDATE_ROWS_EVENT-1]= + post_header_len[DELETE_ROWS_EVENT-1]= 6;); post_header_len[BEGIN_LOAD_QUERY_EVENT-1]= post_header_len[APPEND_BLOCK_EVENT-1]; post_header_len[EXECUTE_LOAD_QUERY_EVENT-1]= EXECUTE_LOAD_QUERY_HEADER_LEN; } @@ -2197,10 +2261,8 @@ int Format_description_log_event::exec_event(struct st_relay_log_info* rli) As a transaction NEVER spans on 2 or more binlogs: if we have an active transaction at this point, the master died while writing the transaction to the binary log, i.e. while - flushing the binlog cache to the binlog. As the write was started, - the transaction had been committed on the master, so we lack of - information to replay this transaction on the slave; all we can do - is stop with error. + flushing the binlog cache to the binlog. XA guarantees that master has + rolled back. So we roll back. Note: this event could be sent by the master to inform us of the format of its binlog; in other words maybe it is not at its original place when it comes to us; we'll know this by checking @@ -2208,11 +2270,13 @@ int Format_description_log_event::exec_event(struct st_relay_log_info* rli) */ if (!artificial_event && created && thd->transaction.all.nht) { - slave_print_error(rli, 0, "Rolling back unfinished transaction (no " - "COMMIT or ROLLBACK) from relay log. A probable cause " - "is that the master died while writing the transaction " - "to its binary log."); - end_trans(thd, ROLLBACK); + /* This is not an error (XA is safe), just an information */ + slave_print_msg(INFORMATION_LEVEL, rli, 0, + "Rolling back unfinished transaction (no COMMIT " + "or ROLLBACK in relay log). A probable cause is that " + "the master died while writing the transaction to " + "its binary log, thus rolled back too."); + rli->cleanup_context(thd, 1); } #endif /* @@ -2754,11 +2818,14 @@ int Load_log_event::exec_event(NET* net, struct st_relay_log_info* rli, bool use_rli_only_for_errors) { thd->db_length= db_len; - thd->db= (char*) rewrite_db(db, &thd->db_length); + thd->db= (char *) rpl_filter->get_rewrite_db(db, &thd->db_length); DBUG_ASSERT(thd->query == 0); thd->query_length= 0; // Should not be needed thd->query_error= 0; clear_all_errors(thd, rli); + + /* see Query_log_event::exec_event() and BUG#13360 */ + DBUG_ASSERT(!rli->m_table_map.count()); /* Usually mysql_init_query() is called by mysql_parse(), but we need it here as the present method does not call mysql_parse(). @@ -2793,7 +2860,7 @@ int Load_log_event::exec_event(NET* net, struct st_relay_log_info* rli, ::exec_event(), then the companion SET also have so we don't need to reset_one_shot_variables(). */ - if (db_ok(thd->db, replicate_do_db, replicate_ignore_db)) + if (rpl_filter->db_ok(thd->db)) { thd->set_time((time_t)when); VOID(pthread_mutex_lock(&LOCK_thread_count)); @@ -2815,7 +2882,7 @@ int Load_log_event::exec_event(NET* net, struct st_relay_log_info* rli, tables.updating= 1; // the table will be opened in mysql_load - if (table_rules_on && !tables_ok(thd, &tables)) + if (rpl_filter->is_on() && !rpl_filter->tables_ok(thd->db, &tables)) { // TODO: this is a bug - this needs to be moved to the I/O thread if (net) @@ -2970,9 +3037,9 @@ error: sql_errno=ER_UNKNOWN_ERROR; err=ER(sql_errno); } - slave_print_error(rli,sql_errno,"\ + slave_print_msg(ERROR_LEVEL, rli, sql_errno,"\ Error '%s' running LOAD DATA INFILE on table '%s'. Default database: '%s'", - err, (char*)table_name, print_slave_db_safe(save_db)); + err, (char*)table_name, print_slave_db_safe(save_db)); free_root(thd->mem_root,MYF(MY_KEEP_PREALLOC)); return 1; } @@ -2980,9 +3047,9 @@ Error '%s' running LOAD DATA INFILE on table '%s'. Default database: '%s'", if (thd->is_fatal_error) { - slave_print_error(rli,ER_UNKNOWN_ERROR, "\ + slave_print_msg(ERROR_LEVEL, rli, ER_UNKNOWN_ERROR, "\ Fatal error running LOAD DATA INFILE on table '%s'. Default database: '%s'", - (char*)table_name, print_slave_db_safe(save_db)); + (char*)table_name, print_slave_db_safe(save_db)); return 1; } @@ -3043,8 +3110,7 @@ void Rotate_log_event::print(FILE* file, PRINT_EVENT_INFO* print_event_info) #ifndef MYSQL_CLIENT -Rotate_log_event::Rotate_log_event(THD* thd_arg, - const char* new_log_ident_arg, +Rotate_log_event::Rotate_log_event(const char* new_log_ident_arg, uint ident_len_arg, ulonglong pos_arg, uint flags_arg) :Log_event(), new_log_ident(new_log_ident_arg), @@ -3053,7 +3119,7 @@ Rotate_log_event::Rotate_log_event(THD* thd_arg, { #ifndef DBUG_OFF char buff[22]; - DBUG_ENTER("Rotate_log_event::Rotate_log_event(THD*,...)"); + DBUG_ENTER("Rotate_log_event::Rotate_log_event(...,flags)"); DBUG_PRINT("enter",("new_log_ident %s pos %s flags %lu", new_log_ident_arg, llstr(pos_arg, buff), flags)); #endif @@ -3361,12 +3427,24 @@ int Rand_log_event::exec_event(struct st_relay_log_info* rli) Xid_log_event methods **************************************************************************/ +#if !defined(DBUG_OFF) && !defined(MYSQL_CLIENT) +/* + This static class member could be removed when mysqltest is made to support + a --replace-regex command: then tests which have XIDs in their output can + use this command to suppress non-deterministic XID values. +*/ +my_bool Xid_log_event::show_xid; +#endif + #if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT) void Xid_log_event::pack_info(Protocol *protocol) { char buf[128], *pos; pos= strmov(buf, "COMMIT /* xid="); - pos= longlong10_to_str(xid, pos, 10); +#if !defined(DBUG_OFF) && !defined(MYSQL_CLIENT) + if (show_xid) +#endif + pos= longlong10_to_str(xid, pos, 10); pos= strmov(pos, " */"); protocol->store(buf, (uint) (pos-buf), &my_charset_bin); } @@ -4186,9 +4264,8 @@ int Create_file_log_event::exec_event(struct st_relay_log_info* rli) init_io_cache(&file, fd, IO_SIZE, WRITE_CACHE, (my_off_t)0, 0, MYF(MY_WME|MY_NABP))) { - slave_print_error(rli,my_errno, - "Error in Create_file event: could not open file '%s'", - fname_buf); + slave_print_msg(ERROR_LEVEL, rli, my_errno, "Error in Create_file event: " + "could not open file '%s'", fname_buf); goto err; } @@ -4198,10 +4275,9 @@ int Create_file_log_event::exec_event(struct st_relay_log_info* rli) if (write_base(&file)) { strmov(ext, ".info"); // to have it right in the error message - slave_print_error(rli,my_errno, - "Error in Create_file event: could not write to file " - "'%s'", - fname_buf); + slave_print_msg(ERROR_LEVEL, rli, my_errno, + "Error in Create_file event: could not write to file '%s'", + fname_buf); goto err; } end_io_cache(&file); @@ -4213,16 +4289,14 @@ int Create_file_log_event::exec_event(struct st_relay_log_info* rli) O_WRONLY | O_BINARY | O_EXCL | O_NOFOLLOW, MYF(MY_WME))) < 0) { - slave_print_error(rli,my_errno, - "Error in Create_file event: could not open file '%s'", - fname_buf); + slave_print_msg(ERROR_LEVEL, rli, my_errno, "Error in Create_file event: " + "could not open file '%s'", fname_buf); goto err; } if (my_write(fd, (byte*) block, block_len, MYF(MY_WME+MY_NABP))) { - slave_print_error(rli,my_errno, - "Error in Create_file event: write to '%s' failed", - fname_buf); + slave_print_msg(ERROR_LEVEL, rli, my_errno, "Error in Create_file event: " + "write to '%s' failed", fname_buf); goto err; } error=0; // Everything is ok @@ -4360,25 +4434,25 @@ int Append_block_log_event::exec_event(struct st_relay_log_info* rli) O_WRONLY | O_BINARY | O_EXCL | O_NOFOLLOW, MYF(MY_WME))) < 0) { - slave_print_error(rli, my_errno, - "Error in %s event: could not create file '%s'", - get_type_str(), fname); + slave_print_msg(ERROR_LEVEL, rli, my_errno, + "Error in %s event: could not create file '%s'", + get_type_str(), fname); goto err; } } else if ((fd = my_open(fname, O_WRONLY | O_APPEND | O_BINARY | O_NOFOLLOW, MYF(MY_WME))) < 0) { - slave_print_error(rli, my_errno, - "Error in %s event: could not open file '%s'", - get_type_str(), fname); + slave_print_msg(ERROR_LEVEL, rli, my_errno, + "Error in %s event: could not open file '%s'", + get_type_str(), fname); goto err; } if (my_write(fd, (byte*) block, block_len, MYF(MY_WME+MY_NABP))) { - slave_print_error(rli, my_errno, - "Error in %s event: write to '%s' failed", - get_type_str(), fname); + slave_print_msg(ERROR_LEVEL, rli, my_errno, + "Error in %s event: write to '%s' failed", + get_type_str(), fname); goto err; } error=0; @@ -4584,9 +4658,8 @@ int Execute_load_log_event::exec_event(struct st_relay_log_info* rli) init_io_cache(&file, fd, IO_SIZE, READ_CACHE, (my_off_t)0, 0, MYF(MY_WME|MY_NABP))) { - slave_print_error(rli,my_errno, - "Error in Exec_load event: could not open file '%s'", - fname); + slave_print_msg(ERROR_LEVEL, rli, my_errno, "Error in Exec_load event: " + "could not open file '%s'", fname); goto err; } if (!(lev = (Load_log_event*)Log_event::read_log_event(&file, @@ -4594,9 +4667,8 @@ int Execute_load_log_event::exec_event(struct st_relay_log_info* rli) rli->relay_log.description_event_for_exec)) || lev->get_type_code() != NEW_LOAD_EVENT) { - slave_print_error(rli,0, - "Error in Exec_load event: file '%s' appears corrupted", - fname); + slave_print_msg(ERROR_LEVEL, rli, 0, "Error in Exec_load event: " + "file '%s' appears corrupted", fname); goto err; } @@ -4622,10 +4694,10 @@ int Execute_load_log_event::exec_event(struct st_relay_log_info* rli) char *tmp= my_strdup(rli->last_slave_error,MYF(MY_WME)); if (tmp) { - slave_print_error(rli, - rli->last_slave_errno, /* ok to re-use error code */ - "%s. Failed executing load from '%s'", - tmp, fname); + slave_print_msg(ERROR_LEVEL, rli, + rli->last_slave_errno, /* ok to re-use error code */ + "%s. Failed executing load from '%s'", + tmp, fname); my_free(tmp,MYF(0)); } goto err; @@ -4831,7 +4903,7 @@ Execute_load_query_log_event::exec_event(struct st_relay_log_info* rli) if (!(buf = my_malloc(q_len + 1 - (fn_pos_end - fn_pos_start) + (FN_REFLEN + 10) + 10 + 8 + 5, MYF(MY_WME)))) { - slave_print_error(rli, my_errno, "Not enough memory"); + slave_print_msg(ERROR_LEVEL, rli, my_errno, "Not enough memory"); return 1; } @@ -4956,3 +5028,1727 @@ char* sql_ex_info::init(char* buf,char* buf_end,bool use_new_format) } return buf; } + + +#ifdef HAVE_ROW_BASED_REPLICATION + +/************************************************************************** + Rows_log_event member functions +**************************************************************************/ + +#ifndef MYSQL_CLIENT +Rows_log_event::Rows_log_event(THD *thd_arg, TABLE *tbl_arg, ulong tid, + MY_BITMAP const *cols, bool is_transactional) + : Log_event(thd_arg, 0, is_transactional), + m_table(tbl_arg), + m_table_id(tid), + m_width(tbl_arg->s->fields), + m_rows_buf(my_malloc(opt_binlog_rows_event_max_size * sizeof(*m_rows_buf), MYF(MY_WME))), + m_rows_cur(m_rows_buf), + m_rows_end(m_rows_buf + opt_binlog_rows_event_max_size), + m_flags(0) +{ + DBUG_ASSERT(m_table && m_table->s); + DBUG_ASSERT(m_table_id != ULONG_MAX); + + if (thd_arg->options & OPTION_NO_FOREIGN_KEY_CHECKS) + set_flags(NO_FOREIGN_KEY_CHECKS_F); + if (thd_arg->options & OPTION_RELAXED_UNIQUE_CHECKS) + set_flags(RELAXED_UNIQUE_CHECKS_F); + /* if bitmap_init fails, catched in is_valid() */ + if (likely(!bitmap_init(&m_cols, + m_width <= sizeof(m_bitbuf)*8 ? m_bitbuf : NULL, + (m_width + 7) & ~7UL, + false))) + memcpy(m_cols.bitmap, cols->bitmap, no_bytes_in_map(cols)); + else + m_cols.bitmap= 0; // to not free it +} +#endif + +Rows_log_event::Rows_log_event(const char *buf, uint event_len, + Log_event_type event_type, + const Format_description_log_event + *description_event) + : Log_event(buf, description_event), + m_rows_buf(0), m_rows_cur(0), m_rows_end(0) +{ + DBUG_ENTER("Rows_log_event::Rows_log_event(const char*,...)"); + uint8 const common_header_len= description_event->common_header_len; + uint8 const post_header_len= description_event->post_header_len[event_type-1]; + + DBUG_PRINT("enter",("event_len=%ld, common_header_len=%d, " + "post_header_len=%d", + event_len, common_header_len, + post_header_len)); + + const char *post_start= buf + common_header_len; + post_start+= RW_MAPID_OFFSET; + if (post_header_len == 6) + { + /* Master is of an intermediate source tree before 5.1.4. Id is 4 bytes */ + m_table_id= uint4korr(post_start); + post_start+= 4; + } + else + { + m_table_id= uint6korr(post_start); + post_start+= RW_FLAGS_OFFSET; + } + + DBUG_ASSERT(m_table_id != ULONG_MAX); + + m_flags= uint2korr(post_start); + + byte const *const var_start= buf + common_header_len + post_header_len; + byte const *const ptr_width= var_start; + byte const *const ptr_after_width= my_vle_decode(&m_width, ptr_width); + + const uint byte_count= (m_width + 7) / 8; + const char* const ptr_rows_data= var_start + byte_count + 1; + + my_size_t const data_size= event_len - (ptr_rows_data - buf); + DBUG_PRINT("info",("m_table_id=%lu, m_flags=%d, m_width=%u, data_size=%lu", + m_table_id, m_flags, m_width, data_size)); + + m_rows_buf= my_malloc(data_size, MYF(MY_WME)); + if (likely((bool)m_rows_buf)) + { + /* if bitmap_init fails, catched in is_valid() */ + if (likely(!bitmap_init(&m_cols, + m_width <= sizeof(m_bitbuf)*8 ? m_bitbuf : NULL, + (m_width + 7) & ~7UL, + false))) + memcpy(m_cols.bitmap, ptr_after_width, byte_count); + m_rows_end= m_rows_buf + data_size; + m_rows_cur= m_rows_end; + memcpy(m_rows_buf, ptr_rows_data, data_size); + } + else + m_cols.bitmap= 0; // to not free it + + DBUG_VOID_RETURN; +} + +Rows_log_event::~Rows_log_event() +{ + if (m_cols.bitmap == m_bitbuf) // no my_malloc happened + m_cols.bitmap= 0; // so no my_free in bitmap_free + bitmap_free(&m_cols); // To pair with bitmap_init(). + my_free(m_rows_buf, MYF(MY_ALLOW_ZERO_PTR)); +} + +#ifndef MYSQL_CLIENT +int Rows_log_event::do_add_row_data(byte *const row_data, + my_size_t const length) +{ + /* + When the table has a primary key, we would probably want, by default, to + log only the primary key value instead of the entire "before image". This + would save binlog space. TODO + */ + DBUG_ENTER("Rows_log_event::do_add_row_data(byte *data, my_size_t length)"); + DBUG_PRINT("enter", ("row_data= %p, length= %lu", row_data, length)); + DBUG_DUMP("row_data", row_data, min(length, 32)); + + DBUG_ASSERT(m_rows_buf <= m_rows_cur); + DBUG_ASSERT(m_rows_buf < m_rows_end); + DBUG_ASSERT(m_rows_cur <= m_rows_end); + + /* The cast will always work since m_rows_cur <= m_rows_end */ + if (static_cast<my_size_t>(m_rows_end - m_rows_cur) < length) + { + my_size_t const block_size= 1024; + my_ptrdiff_t const old_alloc= m_rows_end - m_rows_buf; + my_ptrdiff_t const new_alloc= + old_alloc + block_size * (length / block_size + block_size - 1); + my_ptrdiff_t const cur_size= m_rows_cur - m_rows_buf; + + byte* const new_buf= my_realloc(m_rows_buf, new_alloc, MYF(MY_WME)); + if (unlikely(!new_buf)) + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + + /* If the memory moved, we need to move the pointers */ + if (new_buf != m_rows_buf) + { + m_rows_buf= new_buf; + m_rows_cur= m_rows_buf + cur_size; + } + + /* + The end pointer should always be changed to point to the end of + the allocated memory. + */ + m_rows_end= m_rows_buf + new_alloc; + } + + DBUG_ASSERT(m_rows_cur + length < m_rows_end); + memcpy(m_rows_cur, row_data, length); + m_rows_cur+= length; + DBUG_RETURN(0); +} +#endif + +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) +/* + Unpack a row into a record. The row is assumed to only consist of the fields + for which the bitset represented by 'arr' and 'bits'; the other parts of the + record are left alone. + */ +static char const *unpack_row(TABLE *table, + char *record, char const *row, + MY_BITMAP const *cols) +{ + DBUG_ASSERT(record && row); + + MY_BITMAP *write_set= table->file->write_set; + my_size_t const n_null_bytes= table->s->null_bytes; + my_ptrdiff_t const offset= record - (byte*) table->record[0]; + + memcpy(record, row, n_null_bytes); + char const *ptr= row + n_null_bytes; + + bitmap_set_all(write_set); + Field **const begin_ptr = table->field; + for (Field **field_ptr= begin_ptr ; *field_ptr ; ++field_ptr) + { + Field *const f= *field_ptr; + + if (bitmap_is_set(cols, field_ptr - begin_ptr)) + { + /* Field...::unpack() cannot return 0 */ + ptr= f->unpack(f->ptr + offset, ptr); + } + else + bitmap_clear_bit(write_set, (field_ptr - begin_ptr) + 1); + } + return ptr; +} + +int Rows_log_event::exec_event(st_relay_log_info *rli) +{ + DBUG_ENTER("Rows_log_event::exec_event(st_relay_log_info*)"); + DBUG_ASSERT(m_table_id != ULONG_MAX); + int error= 0; + char const *row_start= m_rows_buf; + TABLE* table= rli->m_table_map.get_table(m_table_id); + + /* + 'thd' has been set by exec_relay_log_event(), just before calling + exec_event(). We still check here to prevent future coding errors. + */ + DBUG_ASSERT(rli->sql_thd == thd); + + /* + lock_tables() reads the contents of thd->lex, so they must be + initialized, so we should call lex_start(); to be even safer, we call + mysql_init_query() which does a more complete set of inits. + */ + mysql_init_query(thd, NULL, 0); + + if (table) + { + /* + table == NULL means that this table should not be + replicated (this was set up by Table_map_log_event::exec_event() which + tested replicate-* rules). + */ + TABLE_LIST table_list; + bool need_reopen; + uint count= 1; + bzero(&table_list, sizeof(table_list)); + table_list.lock_type= TL_WRITE; + table_list.next_global= table_list.next_local= 0; + table_list.table= table; + + for ( ; ; ) + { + table_list.db= const_cast<char*>(table->s->db.str); + table_list.alias= table_list.table_name= + const_cast<char*>(table->s->table_name.str); + + if ((error= lock_tables(thd, &table_list, count, &need_reopen)) == 0) + break; + if (!need_reopen) + { + slave_print_msg(ERROR_LEVEL, rli, error, + "Error in %s event: error during table %s.%s lock", + get_type_str(), table->s->db, table->s->table_name); + DBUG_RETURN(error); + } + /* + we need to store a local copy of the table names since the table object + will become invalid after close_tables_for_reopen + */ + char *db= my_strdup(table->s->db.str, MYF(MY_WME)); + char *table_name= my_strdup(table->s->table_name.str, MYF(MY_WME)); + + if (db == 0 || table_name == 0) + { + /* + Since the lock_tables() failed, the table is not locked, so + we don't need to unlock them. + */ + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + + /* + We also needs to flush the pending RBR event, since it keeps a + pointer to an open table. + + ALTERNATIVE SOLUTION: Extract a pointer to the pending RBR + event and reset the table pointer after the tables has been + reopened. + */ + thd->binlog_flush_pending_rows_event(false); + + close_tables_for_reopen(thd, &table_list); + + /* open the table again, same as in Table_map_event::exec_event */ + table_list.db= const_cast<char*>(db); + table_list.alias= table_list.table_name= const_cast<char*>(table_name); + table_list.updating= 1; + TABLE_LIST *tables= &table_list; + if ((error= open_tables(thd, &tables, &count, 0)) == 0) + { + /* reset some variables for the table list*/ + table_list.updating= 0; + /* retrieve the new table reference and update the table map */ + table= table_list.table; + error= rli->m_table_map.set_table(m_table_id, table); + } + else /* error in open_tables */ + { + if (thd->query_error || thd->is_fatal_error) + { + /* + Error reporting borrowed from Query_log_event with many excessive + simplifications (we don't honour --slave-skip-errors) + */ + uint actual_error= thd->net.last_errno; + slave_print_msg(ERROR_LEVEL, rli, actual_error, + "Error '%s' on reopening table `%s`.`%s`", + (actual_error ? thd->net.last_error : + "unexpected success or fatal error"), + db, table_name); + thd->query_error= 1; + } + } + my_free((char*) db, MYF(MY_ALLOW_ZERO_PTR)); + my_free((char*) table_name, MYF(MY_ALLOW_ZERO_PTR)); + + if (error) + DBUG_RETURN(error); + } + + /* + It's not needed to set_time() but + 1) it continues the property that "Time" in SHOW PROCESSLIST shows how + much slave is behind + 2) it will be needed when we allow replication from a table with no + TIMESTAMP column to a table with one. + So we call set_time(), like in SBR. Presently it changes nothing. + */ + thd->set_time((time_t)when); + /* + There are a few flags that are replicated with each row event. + Make sure to set/clear them before executing the main body of + the event. + */ + if (get_flags(NO_FOREIGN_KEY_CHECKS_F)) + thd->options|= OPTION_NO_FOREIGN_KEY_CHECKS; + else + thd->options&= ~OPTION_NO_FOREIGN_KEY_CHECKS; + + if (get_flags(RELAXED_UNIQUE_CHECKS_F)) + thd->options|= OPTION_RELAXED_UNIQUE_CHECKS; + else + thd->options&= ~OPTION_RELAXED_UNIQUE_CHECKS; + /* A small test to verify that objects have consistent types */ + DBUG_ASSERT(sizeof(thd->options) == sizeof(OPTION_RELAXED_UNIQUE_CHECKS)); + + error= do_before_row_operations(table); + while (error == 0 && row_start < m_rows_end) { + char const *row_end= do_prepare_row(thd, table, row_start); + DBUG_ASSERT(row_end != NULL); // cannot happen + DBUG_ASSERT(row_end <= m_rows_end); + + /* in_use can have been set to NULL in close_tables_for_reopen */ + THD* old_thd= table->in_use; + if (!table->in_use) + table->in_use= thd; + error= do_exec_row(table); + table->in_use = old_thd; + switch (error) + { + /* Some recoverable errors */ + case HA_ERR_RECORD_CHANGED: + case HA_ERR_KEY_NOT_FOUND: /* Idempotency support: OK if + tuple does not exist */ + error= 0; + case 0: + break; + + default: + slave_print_msg(ERROR_LEVEL, rli, error, + "Error in %s event: row application failed", + get_type_str()); + thd->query_error= 1; + break; + } + + row_start= row_end; + } + DBUG_EXECUTE_IF("STOP_SLAVE_after_first_Rows_event", + rli->abort_slave=1;); + error= do_after_row_operations(table, error); + if (!cache_stmt) + thd->options|= OPTION_STATUS_NO_TRANS_UPDATE; + + } + + if (error) + { /* error has occured during the transaction */ + /* + If one day we honour --skip-slave-errors in row-based replication, and + the error should be skipped, then we would clear mappings, rollback, + close tables, but the slave SQL thread would not stop and then may + assume the mapping is still available, the tables are still open... + So then we should clear mappings/rollback/close here only if this is a + STMT_END_F. + For now we code, knowing that error is not skippable and so slave SQL + thread is certainly going to stop. + */ + rli->cleanup_context(thd, 1); + thd->query_error= 1; + DBUG_RETURN(error); + } + + if (get_flags(STMT_END_F)) + { + /* + This is the end of a statement or transaction, so close (and + unlock) the tables we opened when processing the + Table_map_log_event starting the statement. + + OBSERVER. This will clear *all* mappings, not only those that + are open for the table. There is not good handle for on-close + actions for tables. + + NOTE. Even if we have no table ('table' == 0) we still need to be + here, so that we increase the group relay log position. If we didn't, we + could have a group relay log position which lags behind "forever" + (assume the last master's transaction is ignored by the slave because of + replicate-ignore rules). + */ + thd->binlog_flush_pending_rows_event(true); + /* + If this event is not in a transaction, the call below will, if some + transactional storage engines are involved, commit the statement into + them and flush the pending event to binlog. + If this event is in a transaction, the call will do nothing, but a + Xid_log_event will come next which will, if some transactional engines + are involved, commit the transaction and flush the pending event to the + binlog. + */ + error= ha_autocommit_or_rollback(thd, 0); + /* + Now what if this is not a transactional engine? we still need to + flush the pending event to the binlog; we did it with + thd->binlog_flush_pending_rows_event(). Note that we imitate + what is done for real queries: a call to + ha_autocommit_or_rollback() (sometimes only if involves a + transactional engine), and a call to be sure to have the pending + event flushed. + */ + + rli->cleanup_context(thd, 0); + rli->transaction_end(thd); + + if (error == 0) + { + /* + Clear any errors pushed in thd->net.last_err* if for example "no key + found" (as this is allowed). This is a safety measure; apparently + those errors (e.g. when executing a Delete_rows_log_event of a + non-existing row, like in rpl_row_mystery22.test, + thd->net.last_error = "Can't find record in 't1'" and last_errno=1032) + do not become visible. We still prefer to wipe them out. + */ + thd->clear_error(); + error= Log_event::exec_event(rli); + } + else + slave_print_msg(ERROR_LEVEL, rli, error, + "Error in %s event: commit of row events failed, " + "table `%s`.`%s`", + get_type_str(), table->s->db, table->s->table_name); + DBUG_RETURN(error); + } + + if (table) + { + /* + As "table" is not NULL, we did a successful lock_tables(), without any + prior LOCK TABLES and are not in prelocked mode, so this assertion should + be true. + */ + DBUG_ASSERT(thd->lock); + /* + If we are here, there are more events to come which may use our mappings + and our table. So don't clear mappings or close tables, just unlock + tables. + Why don't we lock the table once for all in + Table_map_log_event::exec_event() ? Because we could have in binlog: + BEGIN; + Table_map t1 -> 1 + Write_rows to id 1 + Table_map t2 -> 2 + Write_rows to id 2 + Xid_log_event + So we cannot lock t1 when executing the first Table_map, because at that + moment we don't know we'll also have to lock t2, and all tables must be + locked at once in MySQL. + */ + mysql_unlock_tables(thd, thd->lock); + thd->lock= 0; + if ((table->s->primary_key == MAX_KEY) && + !cache_stmt) + { + /* + ------------ Temporary fix until WL#2975 is implemented --------- + This event is not the last one (no STMT_END_F). If we stop now (in + case of terminate_slave_thread()), how will we restart? We have to + restart from Table_map_log_event, but as this table is not + transactional, the rows already inserted will still be present, and + idempotency is not guaranteed (no PK) so we risk that repeating leads + to double insert. So we desperately try to continue, hope we'll + eventually leave this buggy situation (by executing the final + Rows_log_event). If we are in a hopeless wait (reached end of last + relay log and nothing gets appended there), we timeout after one + minute, and notify DBA about the problem. + When WL#2975 is implemented, just remove the member + st_relay_log_info::unsafe_to_stop_at and all its occurences. + */ + rli->unsafe_to_stop_at= time(0); + } + } + + DBUG_ASSERT(error == 0); + thd->clear_error(); + rli->inc_event_relay_log_pos(); + + DBUG_RETURN(0); +} +#endif /* !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) */ + +#ifndef MYSQL_CLIENT +bool Rows_log_event::write_data_header(IO_CACHE *file) +{ + DBUG_ASSERT(m_table_id != ULONG_MAX); + byte buf[ROWS_HEADER_LEN]; // No need to init the buffer + DBUG_EXECUTE_IF("old_row_based_repl_4_byte_map_id_master", + { + int4store(buf + 0, m_table_id); + int2store(buf + 4, m_flags); + return (my_b_safe_write(file, buf, 6)); + }); + int6store(buf + RW_MAPID_OFFSET, (ulonglong)m_table_id); + int2store(buf + RW_FLAGS_OFFSET, m_flags); + return (my_b_safe_write(file, buf, ROWS_HEADER_LEN)); +} + +bool Rows_log_event::write_data_body(IO_CACHE*file) +{ + /* + Note that this should be the number of *bits*, not the number of + bytes. + */ + byte sbuf[my_vle_sizeof(m_width)]; + my_ptrdiff_t const data_size= m_rows_cur - m_rows_buf; + + char *const sbuf_end= my_vle_encode(sbuf, sizeof(sbuf), m_width); + DBUG_ASSERT(static_cast<my_size_t>(sbuf_end - sbuf) <= sizeof(sbuf)); + + return (my_b_safe_write(file, sbuf, sbuf_end - sbuf) || + my_b_safe_write(file, reinterpret_cast<byte*>(m_cols.bitmap), + no_bytes_in_map(&m_cols)) || + my_b_safe_write(file, m_rows_buf, data_size)); +} +#endif + +#if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT) && defined(DBUG_RBR) +void Rows_log_event::pack_info(Protocol *protocol) +{ + char buf[256]; + char const *const flagstr= get_flags(STMT_END_F) ? "STMT_END_F" : ""; + char const *const dbnam= m_table->s->db; + char const *const tblnam= m_table->s->table_name; + my_size_t bytes= snprintf(buf, sizeof(buf), + "%s.%s - %s", dbnam, tblnam, flagstr); + protocol->store(buf, bytes, &my_charset_bin); +} +#endif + +/************************************************************************** + Table_map_log_event member functions +**************************************************************************/ + +/* + Constructor used to build an event for writing to the binary log. + Mats says tbl->s lives longer than this event so it's ok to copy pointers + (tbl->s->db etc) and not pointer content. + */ +#if !defined(MYSQL_CLIENT) +Table_map_log_event::Table_map_log_event(THD *thd, TABLE *tbl, ulong tid, + bool is_transactional, uint16 flags) + : Log_event(thd, 0, is_transactional), + m_table(tbl), + m_dbnam(tbl->s->db.str), + m_dblen(m_dbnam ? tbl->s->db.length : 0), + m_tblnam(tbl->s->table_name.str), + m_tbllen(tbl->s->table_name.length), + m_colcnt(tbl->s->fields), m_coltype(0), + m_table_id(tid), + m_flags(flags) +{ + DBUG_ASSERT(m_table_id != ULONG_MAX); + /* + In TABLE_SHARE, "db" and "table_name" are 0-terminated (see this comment in + table.cc / alloc_table_share(): + Use the fact the key is db/0/table_name/0 + As we rely on this let's assert it. + */ + DBUG_ASSERT((tbl->s->db.str == 0) || + (tbl->s->db.str[tbl->s->db.length] == 0)); + DBUG_ASSERT(tbl->s->table_name.str[tbl->s->table_name.length] == 0); + + + m_data_size= TABLE_MAP_HEADER_LEN; + DBUG_EXECUTE_IF("old_row_based_repl_4_byte_map_id_master", m_data_size= 6;) + m_data_size+= m_dblen + 2; // Include length and terminating \0 + m_data_size+= m_tbllen + 2; // Include length and terminating \0 + m_data_size+= 1 + m_colcnt; // COLCNT and column types + + /* If malloc fails, catched in is_valid() */ + if ((m_memory= my_malloc(m_colcnt, MYF(MY_WME)))) + { + m_coltype= reinterpret_cast<unsigned char*>(m_memory); + for (unsigned int i= 0 ; i < m_table->s->fields ; ++i) + m_coltype[i]= m_table->field[i]->type(); + } +} +#endif /* !defined(MYSQL_CLIENT) */ + +/* + Constructor used by slave to read the event from the binary log. + */ +#if defined(HAVE_REPLICATION) +Table_map_log_event::Table_map_log_event(const char *buf, uint event_len, + const Format_description_log_event + *description_event) + + : Log_event(buf, description_event), +#ifndef MYSQL_CLIENT + m_table(NULL), +#endif + m_memory(NULL) +{ + DBUG_ENTER("Table_map_log_event::Table_map_log_event(const char*,uint,...)"); + + uint8 common_header_len= description_event->common_header_len; + uint8 post_header_len= description_event->post_header_len[TABLE_MAP_EVENT-1]; + DBUG_PRINT("info",("event_len=%ld, common_header_len=%d, post_header_len=%d", + event_len, common_header_len, post_header_len)); + + DBUG_DUMP("event buffer", buf, event_len); + + /* Read the post-header */ + const char *post_start= buf + common_header_len; + + post_start+= TM_MAPID_OFFSET; + if (post_header_len == 6) + { + /* Master is of an intermediate source tree before 5.1.4. Id is 4 bytes */ + m_table_id= uint4korr(post_start); + post_start+= 4; + } + else + { + DBUG_ASSERT(post_header_len == TABLE_MAP_HEADER_LEN); + m_table_id= uint6korr(post_start); + post_start+= TM_FLAGS_OFFSET; + } + + DBUG_ASSERT(m_table_id != ULONG_MAX); + + m_flags= uint2korr(post_start); + + /* Read the variable part of the event */ + const char *const vpart= buf + common_header_len + post_header_len; + + /* Extract the length of the various parts from the buffer */ + byte const* const ptr_dblen= vpart + 0; + m_dblen= *(unsigned char*) ptr_dblen; + + /* Length of database name + counter + terminating null */ + byte const* const ptr_tbllen= ptr_dblen + m_dblen + 2; + m_tbllen= *(unsigned char*) ptr_tbllen; + + /* Length of table name + counter + terminating null */ + byte const* const ptr_colcnt= ptr_tbllen + m_tbllen + 2; + byte const* const ptr_after_colcnt= my_vle_decode(&m_colcnt, ptr_colcnt); + + DBUG_PRINT("info",("m_dblen=%d off=%d m_tbllen=%d off=%d m_colcnt=%d off=%d", + m_dblen, ptr_dblen-vpart, m_tbllen, ptr_tbllen-vpart, + m_colcnt, ptr_colcnt-vpart)); + + /* Allocate mem for all fields in one go. If fails, catched in is_valid() */ + m_memory= my_multi_malloc(MYF(MY_WME), + &m_dbnam, m_dblen + 1, + &m_tblnam, m_tbllen + 1, + &m_coltype, m_colcnt, + NULL); + + if (m_memory) + { + /* Copy the different parts into their memory */ + strncpy(const_cast<char*>(m_dbnam), ptr_dblen + 1, m_dblen + 1); + strncpy(const_cast<char*>(m_tblnam), ptr_tbllen + 1, m_tbllen + 1); + memcpy(m_coltype, ptr_after_colcnt, m_colcnt); + } + + DBUG_VOID_RETURN; +} +#endif + +Table_map_log_event::~Table_map_log_event() +{ + my_free(m_memory, MYF(MY_ALLOW_ZERO_PTR)); +} + +/* + Find a table based on database name and table name. + + DESCRIPTION + + Currently, only the first table of the 'table_list' is located. If the + table is found in the list of open tables for the thread, the 'table' + field of 'table_list' is filled in. + + PARAMETERS + + thd Thread structure + table_list List of tables to locate in the thd->open_tables list. + count Pointer to a variable that will be set to the number of + tables found. If the pointer is NULL, nothing will be stored. + + RETURN VALUE + + The number of tables found. + + TO DO + + Replace the list of table searches with a hash based on the combined + database and table name. The handler_tables_hash is inappropriate since + it hashes on the table alias. At the same time, the function can be + extended to handle a full list of table names, in the same spirit as + open_tables() and lock_tables(). +*/ +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) +static uint find_tables(THD *thd, TABLE_LIST *table_list, uint *count) +{ + uint result= 0; + + /* we verify that the caller knows our limitation */ + DBUG_ASSERT(table_list->next_global == 0); + for (TABLE *table= thd->open_tables; table ; table= table->next) + { + if (strcmp(table->s->db.str, table_list->db) == 0 + && strcmp(table->s->table_name.str, table_list->table_name) == 0) + { + /* Copy the table pointer into the table list. */ + table_list->table= table; + result= 1; + break; + } + } + + if (count) + *count= result; + return result; +} +#endif + +/* + Return value is an error code, one of: + + -1 Failure to open table [from open_tables()] + 0 Success + 1 No room for more tables [from set_table()] + 2 Out of memory [from set_table()] + 3 Wrong table definition + 4 Daisy-chaining RBR with SBR not possible + */ + +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) +int Table_map_log_event::exec_event(st_relay_log_info *rli) +{ + DBUG_ENTER("Table_map_log_event::exec_event(st_relay_log_info*)"); + + DBUG_ASSERT(rli->sql_thd == thd); + + /* Step the query id to mark what columns that are actually used. */ + pthread_mutex_lock(&LOCK_thread_count); + thd->query_id= next_query_id(); + pthread_mutex_unlock(&LOCK_thread_count); + + TABLE_LIST table_list; + uint32 dummy_len; + bzero(&table_list, sizeof(table_list)); + table_list.db= const_cast<char *> + (rpl_filter->get_rewrite_db(m_dbnam, &dummy_len)); + table_list.alias= table_list.table_name= const_cast<char*>(m_tblnam); + table_list.lock_type= TL_WRITE; + table_list.next_global= table_list.next_local= 0; + table_list.updating= 1; + + int error= 0; + + if (rpl_filter->db_ok(table_list.db) && + (!rpl_filter->is_on() || rpl_filter->tables_ok("", &table_list))) + { + /* + Check if the slave is set to use SBR. If so, the slave should + stop immediately since it is not possible to daisy-chain from + RBR to SBR. Once RBR is used, the rest of the chain has to use + RBR. + */ + if (mysql_bin_log.is_open() && (thd->options & OPTION_BIN_LOG) && + !binlog_row_based) + { + slave_print_msg(ERROR_LEVEL, rli, ER_BINLOG_ROW_RBR_TO_SBR, + "It is not possible to use statement-based binlogging " + "on a slave that replicates row-based. Please use " + "--binrow-format=row on slave if you want to use " + "--log-slave-updates and read row-based binlog events."); + DBUG_RETURN(ERR_RBR_TO_SBR); + } + + /* + Open the table if it is not already open and add the table to table map. + If the table should not be replicated, we don't bother to do anything. + The table map will return NULL and the row-level event will effectively + be a no-op. + */ + uint count; + if (find_tables(thd, &table_list, &count) == 0) + { + /* + open_tables() reads the contents of thd->lex, so they must be + initialized, so we should call lex_start(); to be even safer, we call + mysql_init_query() which does a more complete set of inits. + */ + mysql_init_query(thd, NULL, 0); + TABLE_LIST *tables= &table_list; + if ((error= open_tables(thd, &tables, &count, 0))) + { + if (thd->query_error || thd->is_fatal_error) + { + /* + Error reporting borrowed from Query_log_event with many excessive + simplifications (we don't honour --slave-skip-errors) + */ + uint actual_error= thd->net.last_errno; + slave_print_msg(ERROR_LEVEL, rli, actual_error, + "Error '%s' on opening table `%s`.`%s`", + (actual_error ? thd->net.last_error : + "unexpected success or fatal error"), + table_list.db, table_list.table_name); + thd->query_error= 1; + } + DBUG_RETURN(error); + } + } + + m_table= table_list.table; + + /* + This will fail later otherwise, the 'in_use' field should be + set to the current thread. + */ + DBUG_ASSERT(m_table->in_use); + + /* + Check that the number of columns and the field types in the + event match the number of columns and field types in the opened + table. + */ + uint col= m_table->s->fields; + + if (col == m_colcnt) + { + while (col-- > 0) + if (m_table->field[col]->type() != m_coltype[col]) + break; + } + + TABLE_SHARE const *const tsh= m_table->s; + + /* + Check the following termination conditions: + + (col == m_table->s->fields) + ==> (m_table->s->fields != m_colcnt) + (0 <= col < m_table->s->fields) + ==> (m_table->field[col]->type() != m_coltype[col]) + + Logically, A ==> B is equivalent to !A || B + + Since col is unsigned, is suffices to check that col <= + tsh->fields. If col wrapped (by decreasing col when it is 0), + the number will be UINT_MAX, which is greater than tsh->fields. + */ + DBUG_ASSERT(!(col == tsh->fields) || tsh->fields != m_colcnt); + DBUG_ASSERT(!(col < tsh->fields) || + (m_table->field[col]->type() != m_coltype[col])); + + if (col <= tsh->fields) + { + /* + If we get here, the number of columns in the event didn't + match the number of columns in the table on the slave, *or* + there were a column in the table on the slave that did not + have the same type as given in the event. + + If 'col' has the value that was assigned to it, it was a + mismatch between the number of columns on the master and the + slave. + */ + if (col == tsh->fields) + { + DBUG_ASSERT(tsh->db.str && tsh->table_name.str); + slave_print_msg(ERROR_LEVEL, rli, ER_BINLOG_ROW_WRONG_TABLE_DEF, + "Table width mismatch - " + "received %u columns, %s.%s has %u columns", + m_colcnt, tsh->db.str, tsh->table_name.str, tsh->fields); + } + else + { + DBUG_ASSERT(col < m_colcnt && col < tsh->fields); + DBUG_ASSERT(tsh->db.str && tsh->table_name.str); + slave_print_msg(ERROR_LEVEL, rli, ER_BINLOG_ROW_WRONG_TABLE_DEF, + "Column %d type mismatch - " + "received type %d, %s.%s has type %d", + col, m_coltype[col], tsh->db.str, tsh->table_name.str, + m_table->field[col]->type()); + } + + thd->query_error= 1; + DBUG_RETURN(ERR_BAD_TABLE_DEF); + } + + /* + We record in the slave's information that the number m_table_id is + mapped to the m_table object + */ + if (!error) + error= rli->m_table_map.set_table(m_table_id, m_table); + + /* + Tell the RLI that we are touching a table. + + TODO: Maybe we can combine this with the previous operation? + */ + if (!error) + rli->touching_table(m_dbnam, m_tblnam, m_table_id); + } + + /* + We explicitly do not call Log_event::exec_event() here since we do not + want the relay log position to be flushed to disk. The flushing will be + done by the last Rows_log_event that either ends a statement (outside a + transaction) or a transaction. + + A table map event can *never* end a transaction or a statement, so we + just step the relay log position. + */ + + if (likely(!error)) + rli->inc_event_relay_log_pos(); + + DBUG_RETURN(error); +} +#endif /* !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) */ + +#ifndef MYSQL_CLIENT +bool Table_map_log_event::write_data_header(IO_CACHE *file) +{ + DBUG_ASSERT(m_table_id != ULONG_MAX); + byte buf[TABLE_MAP_HEADER_LEN]; + DBUG_EXECUTE_IF("old_row_based_repl_4_byte_map_id_master", + { + int4store(buf + 0, m_table_id); + int2store(buf + 4, m_flags); + return (my_b_safe_write(file, buf, 6)); + }); + int6store(buf + TM_MAPID_OFFSET, (ulonglong)m_table_id); + int2store(buf + TM_FLAGS_OFFSET, m_flags); + return (my_b_safe_write(file, buf, TABLE_MAP_HEADER_LEN)); +} + +bool Table_map_log_event::write_data_body(IO_CACHE *file) +{ + DBUG_ASSERT(m_dbnam != NULL); + DBUG_ASSERT(m_tblnam != NULL); + /* We use only one byte per length for storage in event: */ + DBUG_ASSERT(m_dblen < 128); + DBUG_ASSERT(m_tbllen < 128); + + byte const dbuf[]= { m_dblen }; + byte const tbuf[]= { m_tbllen }; + + byte cbuf[my_vle_sizeof(m_colcnt)]; + byte *const cbuf_end= my_vle_encode(cbuf, sizeof(cbuf), m_colcnt); + DBUG_ASSERT(static_cast<my_size_t>(cbuf_end - cbuf) <= sizeof(cbuf)); + + return (my_b_safe_write(file, dbuf, sizeof(dbuf)) || + my_b_safe_write(file, m_dbnam, m_dblen+1) || + my_b_safe_write(file, tbuf, sizeof(tbuf)) || + my_b_safe_write(file, m_tblnam, m_tbllen+1) || + my_b_safe_write(file, cbuf, cbuf_end - cbuf) || + my_b_safe_write(file, reinterpret_cast<char*>(m_coltype), m_colcnt)); + } +#endif + +#if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT) + +/* + Print some useful information for the SHOW BINARY LOG information + field. + */ + +void Table_map_log_event::pack_info(Protocol *protocol) +{ + char buf[256]; + my_size_t bytes= snprintf(buf, sizeof(buf), "%s.%s", m_dbnam, m_tblnam); + protocol->store(buf, bytes, &my_charset_bin); +} + +#endif + + +#ifdef MYSQL_CLIENT +void Table_map_log_event::print(FILE *file, PRINT_EVENT_INFO *print_event_info) +{ + if (!print_event_info->short_form) + { + print_header(file, print_event_info); + fprintf(file, "\tTable_map: `%s`.`%s` mapped to number %lu\n", + m_dbnam, m_tblnam, m_table_id); + print_base64(file, print_event_info); + } +} +#endif + +/************************************************************************** + Write_rows_log_event member functions +**************************************************************************/ + +/* + Constructor used to build an event for writing to the binary log. + */ +#if !defined(MYSQL_CLIENT) +Write_rows_log_event::Write_rows_log_event(THD *thd_arg, TABLE *tbl_arg, + ulong tid_arg, + MY_BITMAP const *cols, + bool is_transactional) + : Rows_log_event(thd_arg, tbl_arg, tid_arg, cols, is_transactional) +{ +} +#endif + +/* + Constructor used by slave to read the event from the binary log. + */ +#ifdef HAVE_REPLICATION +Write_rows_log_event::Write_rows_log_event(const char *buf, uint event_len, + const Format_description_log_event + *description_event) +: Rows_log_event(buf, event_len, WRITE_ROWS_EVENT, description_event) +{ +} +#endif + +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) +int Write_rows_log_event::do_before_row_operations(TABLE *table) +{ + int error= 0; + + /* + We are using REPLACE semantics and not INSERT IGNORE semantics + when writing rows, that is: new rows replace old rows. We need to + inform the storage engine that it should use this behaviour. + */ + + /* Tell the storage engine that we are using REPLACE semantics. */ + thd->lex->duplicates= DUP_REPLACE; + + /* + Pretend we're executing a REPLACE command: this is needed for + InnoDB and NDB Cluster since they are not (properly) checking the + lex->duplicates flag. + */ + thd->lex->sql_command= SQLCOM_REPLACE; + + table->file->extra(HA_EXTRA_IGNORE_DUP_KEY); // needed for ndbcluster + /* + TODO: the cluster team (Tomas?) says that it's better if the engine knows + how many rows are going to be inserted, then it can allocate needed memory + from the start. + */ + table->file->start_bulk_insert(0); + /* + We need TIMESTAMP_NO_AUTO_SET otherwise ha_write_row() will not use fill + any TIMESTAMP column with data from the row but instead will use + the event's current time. + As we replicate from TIMESTAMP to TIMESTAMP and slave has no extra + columns, we know that all TIMESTAMP columns on slave will receive explicit + data from the row, so TIMESTAMP_NO_AUTO_SET is ok. + When we allow a table without TIMESTAMP to be replicated to a table having + more columns including a TIMESTAMP column, or when we allow a TIMESTAMP + column to be replicated into a BIGINT column and the slave's table has a + TIMESTAMP column, then the slave's TIMESTAMP column will take its value + from set_time() which we called earlier (consistent with SBR). And then in + some cases we won't want TIMESTAMP_NO_AUTO_SET (will require some code to + analyze if explicit data is provided for slave's TIMESTAMP columns). + */ + table->timestamp_field_type= TIMESTAMP_NO_AUTO_SET; + return error; +} + +int Write_rows_log_event::do_after_row_operations(TABLE *table, int error) +{ + if (error == 0) + error= table->file->end_bulk_insert(); + return error; +} + +char const *Write_rows_log_event::do_prepare_row(THD *thd, TABLE *table, + char const *row_start) +{ + char const *ptr= row_start; + DBUG_ASSERT(table != NULL); + /* + This assertion actually checks that there is at least as many + columns on the slave as on the master. + */ + DBUG_ASSERT(table->s->fields >= m_width); + DBUG_ASSERT(ptr); + ptr= unpack_row(table, table->record[0], ptr, &m_cols); + return ptr; +} + +/* + Check if there are more UNIQUE keys after the given key. +*/ +static int +last_uniq_key(TABLE *table, uint keyno) +{ + while (++keyno < table->s->keys) + if (table->key_info[keyno].flags & HA_NOSAME) + return 0; + return 1; +} + +/* Anonymous namespace for template functions/classes */ +namespace { + + /* + Smart pointer that will automatically call my_afree (a macro) when + the pointer goes out of scope. This is used so that I do not have + to remember to call my_afree() before each return. There is no + overhead associated with this, since all functions are inline. + + I (Matz) would prefer to use the free function as a template + parameter, but that is not possible when the "function" is a + macro. + */ + template <class Obj> + class auto_afree_ptr + { + Obj* m_ptr; + public: + auto_afree_ptr(Obj* ptr) : m_ptr(ptr) { } + ~auto_afree_ptr() { if (m_ptr) my_afree(m_ptr); } + void assign(Obj* ptr) { + /* Only to be called if it hasn't been given a value before. */ + DBUG_ASSERT(m_ptr == NULL); + m_ptr= ptr; + } + Obj* get() { return m_ptr; } + }; + +} + + +/* + Replace the provided record in the database. + + Similar to how it is done in <code>mysql_insert()</code>, we first + try to do a <code>ha_write_row()</code> and of that fails due to + duplicated keys (or indices), we do an <code>ha_update_row()</code> + or a <code>ha_delete_row()</code> instead. + + @param thd Thread context for writing the record. + @param table Table to which record should be written. + + @return Error code on failure, 0 on success. + */ +static int +replace_record(THD *thd, TABLE *table) +{ + DBUG_ASSERT(table != NULL && thd != NULL); + + int error; + int keynum; + auto_afree_ptr<char> key(NULL); + + while ((error= table->file->ha_write_row(table->record[0]))) + { + if ((keynum= table->file->get_dup_key(error)) < 0) + { + /* We failed to retrieve the duplicate key */ + return HA_ERR_FOUND_DUPP_KEY; + } + + /* + We need to retrieve the old row into record[1] to be able to + either update or delete the offending record. We either: + + - use rnd_pos() with a row-id (available as dupp_row) to the + offending row, if that is possible (MyISAM and Blackhole), or else + + - use index_read_idx() with the key that is duplicated, to + retrieve the offending row. + */ + if (table->file->table_flags() & HA_DUPP_POS) + { + error= table->file->rnd_pos(table->record[1], table->file->dupp_ref); + if (error) + return error; + } + else + { + if (table->file->extra(HA_EXTRA_FLUSH_CACHE)) + { + return my_errno; + } + + if (key.get() == NULL) + { + key.assign(static_cast<char*>(my_alloca(table->s->max_unique_length))); + if (key.get() == NULL) + return ENOMEM; + } + + key_copy(key.get(), table->record[0], table->key_info + keynum, 0); + error= table->file->index_read_idx(table->record[1], keynum, key.get(), + table->key_info[keynum].key_length, + HA_READ_KEY_EXACT); + if (error) + return error; + } + + /* + Now, table->record[1] should contain the offending row. That + will enable us to update it or, alternatively, delete it (so + that we can insert the new row afterwards). + + REPLACE is defined as either INSERT or DELETE + INSERT. If + possible, we can replace it with an UPDATE, but that will not + work on InnoDB if FOREIGN KEY checks are necessary. + + I (Matz) am not sure of the reason for the last_uniq_key() + check as, but I'm guessing that it's something along the + following lines. + + Suppose that we got the duplicate key to be a key that is not + the last unique key for the table and we perform an update: + then there might be another key for which the unique check will + fail, so we're better off just deleting the row and inserting + the correct row. + */ + if (last_uniq_key(table, keynum) && + !table->file->referenced_by_foreign_key()) + { + error=table->file->ha_update_row(table->record[1], + table->record[0]); + return error; + } + else + { + if ((error= table->file->ha_delete_row(table->record[1]))) + return error; + /* Will retry ha_write_row() with the offending row removed. */ + } + } + return error; +} + +int Write_rows_log_event::do_exec_row(TABLE *table) +{ + DBUG_ASSERT(table != NULL); + int error= replace_record(thd, table); + return error; +} +#endif /* !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) */ + +#ifdef MYSQL_CLIENT +void Write_rows_log_event::print(FILE *file, PRINT_EVENT_INFO* print_event_info) +{ + if (!print_event_info->short_form) + { + print_header(file, print_event_info); + fprintf(file, "\tWrite_rows: table id %lu", m_table_id); + print_base64(file, print_event_info); + } +} +#endif + +/************************************************************************** + Delete_rows_log_event member functions +**************************************************************************/ + +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) +static int record_compare(TABLE *table, byte const *a, byte const *b) +{ + for (my_size_t i= 0 ; i < table->s->fields ; ++i) + { + uint const off= table->field[i]->offset(); + uint const res= table->field[i]->cmp_binary(a + off, b + off); + if (res != 0) { + return res; + } + } + return 0; +} + + +/* + Find the row given by 'key', if the table has keys, or else use a table scan + to find (and fetch) the row. If the engine allows random access of the + records, a combination of position() and rnd_pos() will be used. + + The 'record_buf' will be used as buffer for records while locating the + correct row. + */ +static int find_and_fetch_row(TABLE *table, byte *key, byte *record_buf) +{ + DBUG_ENTER("find_and_fetch_row(TABLE *table, byte *key, byte *record)"); + DBUG_PRINT("enter", ("table=%p, key=%p, record=%p", + table, key, record_buf)); + + DBUG_ASSERT(table->in_use != NULL); + + if ((table->file->table_flags() & HA_PRIMARY_KEY_ALLOW_RANDOM_ACCESS) + && table->s->primary_key < MAX_KEY) + { + /* + Use a more efficient method to fetch the record given by + table->record[0] if the engine allows it. We first compute a + row reference using the position() member function (it will be + stored in table->file->ref) and the use rnd_pos() to position + the "cursor" at the correct row. + */ + table->file->position(table->record[0]); + DBUG_RETURN(table->file->rnd_pos(table->record[0], table->file->ref)); + } + + DBUG_ASSERT(record_buf); + + if (table->s->keys > 0) + { + int error; + if ((error= table->file->index_read_idx(record_buf, 0, key, + table->key_info->key_length, + HA_READ_KEY_EXACT))) + { + table->file->print_error(error, MYF(0)); + DBUG_RETURN(error); + } + + /* + Below is a minor "optimization". If the key (i.e., key number + 0) has the HA_NOSAME flag set, we know that we have found the + correct record (since there can be no duplicates); otherwise, we + have to compare the record with the one found to see if it is + the correct one. + + CAVEAT! This behaviour is essential for the replication of, + e.g., the mysql.proc table since the correct record *shall* be + found using the primary key *only*. There shall be no + comparison of non-PK columns to decide if the correct record is + found. I can see no scenario where it would be incorrect to + chose the row to change only using a PK or an UNNI. + */ + if (table->key_info->flags & HA_NOSAME) + DBUG_RETURN(0); + + while (record_compare(table, table->record[0], record_buf) != 0) + { + int error; + if ((error= table->file->index_next(record_buf))) + { + table->file->print_error(error, MYF(0)); + DBUG_RETURN(error); + } + } + } + else + { + /* Continue until we find the right record or have made a full loop */ + int restart_count= 0; // Number of times scanning has restarted from top + int error= 0; + do + { + error= table->file->rnd_next(record_buf); + switch (error) + { + case 0: + case HA_ERR_RECORD_DELETED: + break; + + case HA_ERR_END_OF_FILE: + if (++restart_count < 2) + table->file->ha_rnd_init(1); + break; + + default: + table->file->print_error(error, MYF(0)); + DBUG_RETURN(error); + } + } + while (restart_count < 2 && + record_compare(table, table->record[0], record_buf) != 0); + + DBUG_ASSERT(error == HA_ERR_END_OF_FILE || error == 0); + DBUG_RETURN(error); + } + + DBUG_RETURN(0); +} +#endif + +/* + Constructor used to build an event for writing to the binary log. + */ + +#ifndef MYSQL_CLIENT +Delete_rows_log_event::Delete_rows_log_event(THD *thd_arg, TABLE *tbl_arg, + ulong tid, MY_BITMAP const *cols, + bool is_transactional) + : Rows_log_event(thd_arg, tbl_arg, tid, cols, is_transactional) +#ifdef HAVE_REPLICATION + ,m_memory(NULL), m_key(NULL), m_search_record(NULL) +#endif +{ +} +#endif /* #if !defined(MYSQL_CLIENT) */ + +/* + Constructor used by slave to read the event from the binary log. + */ +#ifdef HAVE_REPLICATION +Delete_rows_log_event::Delete_rows_log_event(const char *buf, uint event_len, + const Format_description_log_event + *description_event) +#if defined(MYSQL_CLIENT) + : Rows_log_event(buf, event_len, DELETE_ROWS_EVENT, description_event) +#else + : Rows_log_event(buf, event_len, DELETE_ROWS_EVENT, description_event), + m_memory(NULL), m_key(NULL), m_search_record(NULL) +#endif +{ +} +#endif + +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) +int Delete_rows_log_event::do_before_row_operations(TABLE *table) +{ + DBUG_ASSERT(m_memory == NULL); + + if ((table->file->table_flags() & HA_PRIMARY_KEY_ALLOW_RANDOM_ACCESS) && + table->s->primary_key < MAX_KEY) + { + /* + We don't need to allocate any memory for m_search_record and + m_key since they are not used. + */ + return 0; + } + + int error= 0; + + if (table->s->keys > 0) + { + m_memory= + my_multi_malloc(MYF(MY_WME), + &m_search_record, table->s->reclength, + &m_key, table->key_info->key_length, + NULL); + } + else + { + m_memory= m_search_record= my_malloc(table->s->reclength, MYF(MY_WME)); + m_key= NULL; + } + if (!m_memory) + return HA_ERR_OUT_OF_MEM; + + if (table->s->keys > 0) + { + /* We have a key: search the table using the index */ + if (!table->file->inited) + error= table->file->ha_index_init(0, FALSE); + } + else + { + /* We doesn't have a key: search the table using rnd_next() */ + error= table->file->ha_rnd_init(1); + } + + return error; +} + +int Delete_rows_log_event::do_after_row_operations(TABLE *table, int error) +{ + /*error= ToDo:find out what this should really be, this triggers close_scan in nbd, returning error?*/ + table->file->ha_index_or_rnd_end(); + my_free(m_memory, MYF(MY_ALLOW_ZERO_PTR)); // Free for multi_malloc + m_memory= m_search_record= m_key= NULL; + + return error; +} + +char const *Delete_rows_log_event::do_prepare_row(THD *thd, TABLE *table, + char const *row_start) +{ + char const *ptr= row_start; + DBUG_ASSERT(ptr); + /* + This assertion actually checks that there is at least as many + columns on the slave as on the master. + */ + DBUG_ASSERT(table->s->fields >= m_width); + + DBUG_ASSERT(ptr != NULL); + ptr= unpack_row(table, table->record[0], ptr, &m_cols); + + /* + If we will access rows using the random access method, m_key will + be set to NULL, so we do not need to make a key copy in that case. + */ + if (m_key) + { + KEY *const key_info= table->key_info; + + key_copy(m_key, table->record[0], key_info, 0); + } + + return ptr; +} + +int Delete_rows_log_event::do_exec_row(TABLE *table) +{ + DBUG_ASSERT(table != NULL); + + int error= find_and_fetch_row(table, m_key, m_search_record); + if (error) + return error; + + /* + Now we should have the right row to delete. We are using + record[0] since it is guaranteed to point to a record with the + correct value. + */ + error= table->file->ha_delete_row(table->record[0]); + + return error; +} + +#endif /* !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) */ + +#ifdef MYSQL_CLIENT +void Delete_rows_log_event::print(FILE *file, + PRINT_EVENT_INFO* print_event_info) +{ + if (!print_event_info->short_form) + { + print_header(file, print_event_info); + fprintf(file, "\tDelete_rows: table id %lu", m_table_id); + print_base64(file, print_event_info); + } +} +#endif + + +/************************************************************************** + Update_rows_log_event member functions +**************************************************************************/ + +/* + Constructor used to build an event for writing to the binary log. + */ +#if !defined(MYSQL_CLIENT) +Update_rows_log_event::Update_rows_log_event(THD *thd_arg, TABLE *tbl_arg, + ulong tid, MY_BITMAP const *cols, + bool is_transactional) +: Rows_log_event(thd_arg, tbl_arg, tid, cols, is_transactional) +#ifdef HAVE_REPLICATION + , m_memory(NULL), m_key(NULL) +#endif +{ +} +#endif /* !defined(MYSQL_CLIENT) */ + +/* + Constructor used by slave to read the event from the binary log. + */ +#ifdef HAVE_REPLICATION +Update_rows_log_event::Update_rows_log_event(const char *buf, uint event_len, + const + Format_description_log_event + *description_event) +#if defined(MYSQL_CLIENT) + : Rows_log_event(buf, event_len, UPDATE_ROWS_EVENT, description_event) +#else + : Rows_log_event(buf, event_len, UPDATE_ROWS_EVENT, description_event), + m_memory(NULL), m_key(NULL) +#endif +{ +} +#endif + +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) +int Update_rows_log_event::do_before_row_operations(TABLE *table) +{ + DBUG_ASSERT(m_memory == NULL); + + if ((table->file->table_flags() & HA_PRIMARY_KEY_ALLOW_RANDOM_ACCESS) && + table->s->primary_key < MAX_KEY) + { + /* + We don't need to allocate any memory for m_search_record and + m_key since they are not used. + */ + return 0; + } + + int error= 0; + + if (table->s->keys > 0) + { + m_memory= + my_multi_malloc(MYF(MY_WME), + &m_search_record, table->s->reclength, + &m_key, table->key_info->key_length, + NULL); + } + else + { + m_memory= m_search_record= my_malloc(table->s->reclength, MYF(MY_WME)); + m_key= NULL; + } + if (!m_memory) + return HA_ERR_OUT_OF_MEM; + + if (table->s->keys > 0) + { + /* We have a key: search the table using the index */ + if (!table->file->inited) + error= table->file->ha_index_init(0, FALSE); + } + else + { + /* We doesn't have a key: search the table using rnd_next() */ + error= table->file->ha_rnd_init(1); + } + table->timestamp_field_type= TIMESTAMP_NO_AUTO_SET; + + return error; +} + +int Update_rows_log_event::do_after_row_operations(TABLE *table, int error) +{ + /*error= ToDo:find out what this should really be, this triggers close_scan in nbd, returning error?*/ + table->file->ha_index_or_rnd_end(); + my_free(m_memory, MYF(MY_ALLOW_ZERO_PTR)); + m_memory= m_search_record= m_key= NULL; + + return error; +} + +char const *Update_rows_log_event::do_prepare_row(THD *thd, TABLE *table, + char const *row_start) +{ + char const *ptr= row_start; + DBUG_ASSERT(ptr); + /* + This assertion actually checks that there is at least as many + columns on the slave as on the master. + */ + DBUG_ASSERT(table->s->fields >= m_width); + + /* record[0] is the before image for the update */ + ptr= unpack_row(table, table->record[0], ptr, &m_cols); + DBUG_ASSERT(ptr != NULL); + /* record[1] is the after image for the update */ + ptr= unpack_row(table, table->record[1], ptr, &m_cols); + + /* + If we will access rows using the random access method, m_key will + be set to NULL, so we do not need to make a key copy in that case. + */ + if (m_key) + { + KEY *const key_info= table->key_info; + + key_copy(m_key, table->record[0], key_info, 0); + } + + return ptr; +} + +int Update_rows_log_event::do_exec_row(TABLE *table) +{ + DBUG_ASSERT(table != NULL); + + int error= find_and_fetch_row(table, m_key, m_search_record); + if (error) + return error; + + /* + Now we should have the right row to update. The record that has + been fetched is guaranteed to be in record[0], so we use that. + */ + error= table->file->ha_update_row(table->record[0], table->record[1]); + + return error; +} +#endif /* !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) */ + +#ifdef MYSQL_CLIENT +void Update_rows_log_event::print(FILE *file, + PRINT_EVENT_INFO* print_event_info) +{ + if (!print_event_info->short_form) + { + print_header(file, print_event_info); + fprintf(file, "\tUpdate_rows: table id %lu", m_table_id); + print_base64(file, print_event_info); + } +} +#endif + +#endif /* defined(HAVE_ROW_BASED_REPLICATION) */ diff --git a/sql/log_event.h b/sql/log_event.h index 7783a97f03f..5d58a204ec9 100644 --- a/sql/log_event.h +++ b/sql/log_event.h @@ -26,6 +26,16 @@ #pragma interface /* gcc class implementation */ #endif +#include <my_bitmap.h> + +#if !defined(MYSQL_CLIENT) +#ifdef HAVE_ROW_BASED_REPLICATION +extern my_bool binlog_row_based; +#else +extern const my_bool binlog_row_based; +#endif +#endif + #define LOG_READ_EOF -1 #define LOG_READ_BOGUS -2 #define LOG_READ_IO -3 @@ -196,6 +206,8 @@ struct sql_ex_info #define EXEC_LOAD_HEADER_LEN 4 #define DELETE_FILE_HEADER_LEN 4 #define FORMAT_DESCRIPTION_HEADER_LEN (START_V3_HEADER_LEN+1+LOG_EVENT_TYPES) +#define ROWS_HEADER_LEN 8 +#define TABLE_MAP_HEADER_LEN 8 #define EXECUTE_LOAD_QUERY_EXTRA_HEADER_LEN (4 + 4 + 4 + 1) #define EXECUTE_LOAD_QUERY_HEADER_LEN (QUERY_HEADER_LEN + EXECUTE_LOAD_QUERY_EXTRA_HEADER_LEN) @@ -302,6 +314,14 @@ struct sql_ex_info /* DF = "Delete File" */ #define DF_FILE_ID_OFFSET 0 +/* TM = "Table Map" */ +#define TM_MAPID_OFFSET 0 +#define TM_FLAGS_OFFSET 6 + +/* RW = "RoWs" */ +#define RW_MAPID_OFFSET 0 +#define RW_FLAGS_OFFSET 6 + /* ELQ = "Execute Load Query" */ #define ELQ_FILE_ID_OFFSET QUERY_HEADER_LEN #define ELQ_FN_POS_START_OFFSET ELQ_FILE_ID_OFFSET + 4 @@ -373,6 +393,12 @@ struct sql_ex_info #define LOG_EVENT_SUPPRESS_USE_F 0x8 /* + The table map version internal to the log should be increased after + the event has been written to the binary log. + */ +#define LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F 0x10 + +/* OPTIONS_WRITTEN_TO_BIN_LOG are the bits of thd->options which must be written to the binlog. OPTIONS_WRITTEN_TO_BINLOG could be written into the Format_description_log_event, so that if later we don't want @@ -427,6 +453,10 @@ enum Log_event_type XID_EVENT= 16, BEGIN_LOAD_QUERY_EVENT= 17, EXECUTE_LOAD_QUERY_EVENT= 18, + TABLE_MAP_EVENT = 19, + WRITE_ROWS_EVENT = 20, + UPDATE_ROWS_EVENT = 21, + DELETE_ROWS_EVENT = 22, /* Add new events here - right above this comment! @@ -504,6 +534,7 @@ typedef struct st_print_event_info /* Settings on how to print the events */ bool short_form; + bool base64_output; my_off_t hexdump_from; uint8 common_header_len; @@ -616,9 +647,10 @@ public: static Log_event* read_log_event(IO_CACHE* file, const Format_description_log_event *description_event); /* print*() functions are used by mysqlbinlog */ - virtual void print(FILE* file, PRINT_EVENT_INFO* print_event_info= 0) = 0; + virtual void print(FILE* file, PRINT_EVENT_INFO* print_event_info) = 0; void print_timestamp(FILE* file, time_t *ts = 0); - void print_header(FILE* file, PRINT_EVENT_INFO* print_event_info= 0); + void print_header(FILE* file, PRINT_EVENT_INFO* print_event_info); + void print_base64(FILE* file, PRINT_EVENT_INFO* print_event_info); #endif static void *operator new(size_t size) @@ -646,7 +678,7 @@ public: virtual Log_event_type get_type_code() = 0; virtual bool is_valid() const = 0; virtual bool is_artificial_event() { return 0; } - inline bool get_cache_stmt() { return cache_stmt; } + inline bool get_cache_stmt() const { return cache_stmt; } Log_event(const char* buf, const Format_description_log_event* description_event); virtual ~Log_event() { free_temp_buf();} void register_temp_buf(char* buf) { temp_buf = buf; } @@ -778,8 +810,8 @@ public: uint32 q_len_arg); #endif /* HAVE_REPLICATION */ #else - void print_query_header(FILE* file, PRINT_EVENT_INFO* print_event_info= 0); - void print(FILE* file, PRINT_EVENT_INFO* print_event_info= 0); + void print_query_header(FILE* file, PRINT_EVENT_INFO* print_event_info); + void print(FILE* file, PRINT_EVENT_INFO* print_event_info); #endif Query_log_event(const char* buf, uint event_len, @@ -833,7 +865,7 @@ public: void pack_info(Protocol* protocol); int exec_event(struct st_relay_log_info* rli); #else - void print(FILE* file, PRINT_EVENT_INFO* print_event_info= 0); + void print(FILE* file, PRINT_EVENT_INFO* print_event_info); #endif Slave_log_event(const char* buf, uint event_len); @@ -921,7 +953,7 @@ public: bool use_rli_only_for_errors); #endif /* HAVE_REPLICATION */ #else - void print(FILE* file, PRINT_EVENT_INFO* print_event_info = 0); + void print(FILE* file, PRINT_EVENT_INFO* print_event_info); void print(FILE* file, PRINT_EVENT_INFO* print_event_info, bool commented); #endif @@ -1011,7 +1043,7 @@ public: #endif /* HAVE_REPLICATION */ #else Start_log_event_v3() {} - void print(FILE* file, PRINT_EVENT_INFO* print_event_info= 0); + void print(FILE* file, PRINT_EVENT_INFO* print_event_info); #endif Start_log_event_v3(const char* buf, @@ -1106,7 +1138,7 @@ public: int exec_event(struct st_relay_log_info* rli); #endif /* HAVE_REPLICATION */ #else - void print(FILE* file, PRINT_EVENT_INFO* print_event_info= 0); + void print(FILE* file, PRINT_EVENT_INFO* print_event_info); #endif Intvar_log_event(const char* buf, const Format_description_log_event* description_event); @@ -1147,7 +1179,7 @@ class Rand_log_event: public Log_event int exec_event(struct st_relay_log_info* rli); #endif /* HAVE_REPLICATION */ #else - void print(FILE* file, PRINT_EVENT_INFO* print_event_info= 0); + void print(FILE* file, PRINT_EVENT_INFO* print_event_info); #endif Rand_log_event(const char* buf, const Format_description_log_event* description_event); @@ -1184,7 +1216,7 @@ class Xid_log_event: public Log_event int exec_event(struct st_relay_log_info* rli); #endif /* HAVE_REPLICATION */ #else - void print(FILE* file, PRINT_EVENT_INFO* print_event_info= 0); + void print(FILE* file, PRINT_EVENT_INFO* print_event_info); #endif Xid_log_event(const char* buf, const Format_description_log_event* description_event); @@ -1195,6 +1227,9 @@ class Xid_log_event: public Log_event bool write(IO_CACHE* file); #endif bool is_valid() const { return 1; } +#if !defined(DBUG_OFF) && !defined(MYSQL_CLIENT) + static my_bool show_xid; +#endif }; /***************************************************************************** @@ -1226,7 +1261,7 @@ public: void pack_info(Protocol* protocol); int exec_event(struct st_relay_log_info* rli); #else - void print(FILE* file, PRINT_EVENT_INFO* print_event_info= 0); + void print(FILE* file, PRINT_EVENT_INFO* print_event_info); #endif User_var_log_event(const char* buf, const Format_description_log_event* description_event); @@ -1252,7 +1287,7 @@ public: {} int exec_event(struct st_relay_log_info* rli); #else - void print(FILE* file, PRINT_EVENT_INFO* print_event_info= 0); + void print(FILE* file, PRINT_EVENT_INFO* print_event_info); #endif Stop_log_event(const char* buf, const Format_description_log_event* description_event): @@ -1282,7 +1317,7 @@ public: uint ident_len; uint flags; #ifndef MYSQL_CLIENT - Rotate_log_event(THD* thd_arg, const char* new_log_ident_arg, + Rotate_log_event(const char* new_log_ident_arg, uint ident_len_arg, ulonglong pos_arg, uint flags); #ifdef HAVE_REPLICATION @@ -1290,7 +1325,7 @@ public: int exec_event(struct st_relay_log_info* rli); #endif /* HAVE_REPLICATION */ #else - void print(FILE* file, PRINT_EVENT_INFO* print_event_info= 0); + void print(FILE* file, PRINT_EVENT_INFO* print_event_info); #endif Rotate_log_event(const char* buf, uint event_len, @@ -1343,7 +1378,7 @@ public: int exec_event(struct st_relay_log_info* rli); #endif /* HAVE_REPLICATION */ #else - void print(FILE* file, PRINT_EVENT_INFO* print_event_info= 0); + void print(FILE* file, PRINT_EVENT_INFO* print_event_info); void print(FILE* file, PRINT_EVENT_INFO* print_event_info, bool enable_local); #endif @@ -1411,7 +1446,7 @@ public: virtual int get_create_or_append() const; #endif /* HAVE_REPLICATION */ #else - void print(FILE* file, PRINT_EVENT_INFO* print_event_info= 0); + void print(FILE* file, PRINT_EVENT_INFO* print_event_info); #endif Append_block_log_event(const char* buf, uint event_len, @@ -1422,8 +1457,8 @@ public: bool is_valid() const { return block != 0; } #ifndef MYSQL_CLIENT bool write(IO_CACHE* file); -#endif const char* get_db() { return db; } +#endif }; @@ -1446,7 +1481,7 @@ public: int exec_event(struct st_relay_log_info* rli); #endif /* HAVE_REPLICATION */ #else - void print(FILE* file, PRINT_EVENT_INFO* print_event_info= 0); + void print(FILE* file, PRINT_EVENT_INFO* print_event_info); void print(FILE* file, PRINT_EVENT_INFO* print_event_info, bool enable_local); #endif @@ -1458,8 +1493,8 @@ public: bool is_valid() const { return file_id != 0; } #ifndef MYSQL_CLIENT bool write(IO_CACHE* file); -#endif const char* get_db() { return db; } +#endif }; @@ -1482,7 +1517,7 @@ public: int exec_event(struct st_relay_log_info* rli); #endif /* HAVE_REPLICATION */ #else - void print(FILE* file, PRINT_EVENT_INFO* print_event_info= 0); + void print(FILE* file, PRINT_EVENT_INFO* print_event_info); #endif Execute_load_log_event(const char* buf, uint event_len, @@ -1493,8 +1528,8 @@ public: bool is_valid() const { return file_id != 0; } #ifndef MYSQL_CLIENT bool write(IO_CACHE* file); -#endif const char* get_db() { return db; } +#endif }; @@ -1567,7 +1602,7 @@ public: int exec_event(struct st_relay_log_info* rli); #endif /* HAVE_REPLICATION */ #else - void print(FILE* file, PRINT_EVENT_INFO* print_event_info= 0); + void print(FILE* file, PRINT_EVENT_INFO* print_event_info); /* Prints the query as LOAD DATA LOCAL and with rewritten filename */ void print(FILE* file, PRINT_EVENT_INFO* print_event_info, const char *local_fname); @@ -1599,10 +1634,523 @@ public: Log_event(buf, description_event) {} ~Unknown_log_event() {} - void print(FILE* file, PRINT_EVENT_INFO* print_event_info= 0); + void print(FILE* file, PRINT_EVENT_INFO* print_event_info); Log_event_type get_type_code() { return UNKNOWN_EVENT;} bool is_valid() const { return 1; } }; #endif char *str_to_hex(char *to, const char *from, uint len); + +/***************************************************************************** + + Table map log event class + + Create a mapping from a (database name, table name) couple to a table + identifier (an integer number). + + ****************************************************************************/ + +class Table_map_log_event : public Log_event +{ +public: + /* Constants */ + enum + { + TYPE_CODE = TABLE_MAP_EVENT + }; + + enum enum_error + { + ERR_OPEN_FAILURE = -1, /* Failure to open table */ + ERR_OK = 0, /* No error */ + ERR_TABLE_LIMIT_EXCEEDED = 1, /* No more room for tables */ + ERR_OUT_OF_MEM = 2, /* Out of memory */ + ERR_BAD_TABLE_DEF = 3, /* Table definition does not match */ + ERR_RBR_TO_SBR = 4 /* daisy-chanining RBR to SBR not allowed */ + }; + + enum enum_flag + { + /* + Nothing here right now, but the flags support is there in + preparation for changes that are coming. + */ + }; + + typedef uint16 flag_set; + + /* Special constants representing sets of flags */ + enum + { + NO_FLAGS = 0U + }; + + void set_flags(flag_set flag) { m_flags |= flag; } + void clear_flags(flag_set flag) { m_flags &= ~flag; } + flag_set get_flags(flag_set flag) const { return m_flags & flag; } + +#ifndef MYSQL_CLIENT + Table_map_log_event(THD *thd, TABLE *tbl, ulong tid, + bool is_transactional, uint16 flags); +#endif +#ifdef HAVE_REPLICATION + Table_map_log_event(const char *buf, uint event_len, + const Format_description_log_event *description_event); +#endif + + ~Table_map_log_event(); + + virtual Log_event_type get_type_code() { return TABLE_MAP_EVENT; } + virtual bool is_valid() const { return m_memory; /* we check malloc */ } + + virtual int get_data_size() { return m_data_size; } +#ifndef MYSQL_CLIENT + virtual bool write_data_header(IO_CACHE *file); + virtual bool write_data_body(IO_CACHE *file); + virtual const char *get_db() { return m_dbnam; } +#endif + +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) + virtual int exec_event(struct st_relay_log_info *rli); + virtual void pack_info(Protocol *protocol); +#endif + +#ifdef MYSQL_CLIENT + virtual void print(FILE *file, PRINT_EVENT_INFO *print_event_info); +#endif + + +private: +#ifndef MYSQL_CLIENT + TABLE *m_table; +#endif + char const *m_dbnam; + my_size_t m_dblen; + char const *m_tblnam; + my_size_t m_tbllen; + ulong m_colcnt; + unsigned char *m_coltype; + + gptr m_memory; + ulong m_table_id; + flag_set m_flags; + + my_size_t m_data_size; +}; + + +/***************************************************************************** + + Row level log event class. + + Common base class for all row-level log events. + + RESPONSIBILITIES + + Encode the common parts of all events containing rows, which are: + - Write data header and data body to an IO_CACHE. + - Provide an interface for adding an individual row to the event. + + ****************************************************************************/ + +class Rows_log_event : public Log_event +{ +public: + /* + These definitions allow you to combine the flags into an + appropriate flag set using the normal bitwise operators. The + implicit conversion from an enum-constant to an integer is + accepted by the compiler, which is then used to set the real set + of flags. + */ + + enum enum_flag + { + /* Last event of a statement */ + STMT_END_F = (1U << 0), + + /* Value of the OPTION_NO_FOREIGN_KEY_CHECKS flag in thd->options */ + NO_FOREIGN_KEY_CHECKS_F = (1U << 1), + + /* Value of the OPTION_RELAXED_UNIQUE_CHECKS flag in thd->options */ + RELAXED_UNIQUE_CHECKS_F = (1U << 2) + }; + + typedef uint16 flag_set; + + /* Special constants representing sets of flags */ + enum + { + NO_FLAGS = 0U + }; + + virtual ~Rows_log_event(); + + void set_flags(flag_set flags) { m_flags |= flags; } + void clear_flags(flag_set flags) { m_flags &= ~flags; } + flag_set get_flags(flag_set flags) const { return m_flags & flags; } + +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) + virtual int exec_event(struct st_relay_log_info *rli); +#ifdef DBUG_RBR + virtual void pack_info(Protocol *protocol); +#endif +#endif + +#ifdef MYSQL_CLIENT + /* not for direct call, each derived has its own ::print() */ + virtual void print(FILE *file, PRINT_EVENT_INFO *print_event_info)= 0; +#endif + +#ifndef MYSQL_CLIENT + int add_row_data(byte *data, my_size_t length) + { + return do_add_row_data(data,length); + } +#endif + + /* Member functions to implement superclass interface */ + virtual int get_data_size() + { + DBUG_EXECUTE_IF("old_row_based_repl_4_byte_map_id_master", + return 6 + 1 + no_bytes_in_map(&m_cols) + + (m_rows_cur - m_rows_buf);); + return ROWS_HEADER_LEN + 1 + no_bytes_in_map(&m_cols) + + (m_rows_cur - m_rows_buf); + } + + MY_BITMAP const *get_cols() const { return &m_cols; } + my_size_t get_width() const { return m_width; } + ulong get_table_id() const { return m_table_id; } + +#ifndef MYSQL_CLIENT + virtual bool write_data_header(IO_CACHE *file); + virtual bool write_data_body(IO_CACHE *file); + virtual const char *get_db() { return m_table->s->db.str; } +#endif + virtual bool is_valid() const + { + /* that's how we check malloc() succeeded */ + return m_rows_buf && m_cols.bitmap; + } + + /* + If there is no table map active for the event, write one to the + binary log. + + LOCK_log has to be aquired before calling this function. + + PARAMETERS + thd - Thread to use when writing the table map + + RETURN VALUE + Error code, or zero if write succeeded. + */ +#if !defined(MYSQL_CLIENT) && defined(HAVE_ROW_BASED_REPLICATION) + int maybe_write_table_map(THD *thd, IO_CACHE *file, MYSQL_LOG *log) const + { + /* + N.B., get_cache_stmt() returns the value of 'using_trans' that + was provided to the constructor, i.e., get_cache_stmt() == true + if and only if the table is transactional. + */ + + int result= 0; + if (!log->is_table_mapped(m_table)) + result= log->write_table_map(thd, file, m_table, get_cache_stmt()); + return result; + } +#endif + +protected: + /* + The constructors are protected since you're supposed to inherit + this class, not create instances of this class. + */ +#ifndef MYSQL_CLIENT + Rows_log_event(THD*, TABLE*, ulong table_id, + MY_BITMAP const *cols, bool is_transactional); +#endif + Rows_log_event(const char *row_data, uint event_len, + Log_event_type event_type, + const Format_description_log_event *description_event); + +#ifndef MYSQL_CLIENT + virtual int do_add_row_data(byte *data, my_size_t length); +#endif + +#ifndef MYSQL_CLIENT + TABLE *m_table; /* The table the rows belong to */ +#endif + ulong m_table_id; /* Table ID */ + MY_BITMAP m_cols; /* Bitmap denoting columns available */ + ulong m_width; /* The width of the columns bitmap */ + + /* Bit buffer in the same memory as the class */ + uint32 m_bitbuf[128/(sizeof(uint32)*8)]; + + byte *m_rows_buf; /* The rows in packed format */ + byte *m_rows_cur; /* One-after the end of the data */ + byte *m_rows_end; /* One-after the end of the allocated space */ + + flag_set m_flags; /* Flags for row-level events */ + +private: + +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) + /* + Primitive to prepare for a sequence of row executions. + + DESCRIPTION + + Before doing a sequence of do_prepare_row() and do_exec_row() + calls, this member function should be called to prepare for the + entire sequence. Typically, this member function will allocate + space for any buffers that are needed for the two member + functions mentioned above. + + RETURN VALUE + + The member function will return 0 if all went OK, or a non-zero + error code otherwise. + */ + virtual int do_before_row_operations(TABLE *table) = 0; + + /* + Primitive to clean up after a sequence of row executions. + + DESCRIPTION + + After doing a sequence of do_prepare_row() and do_exec_row(), + this member function should be called to clean up and release + any allocated buffers. + */ + virtual int do_after_row_operations(TABLE *table, int error) = 0; + + /* + Primitive to prepare for handling one row in a row-level event. + + DESCRIPTION + + The member function prepares for execution of operations needed for one + row in a row-level event by reading up data from the buffer containing + the row. No specific interpretation of the data is normally done here, + since SQL thread specific data is not available: that data is made + available for the do_exec function. + + RETURN VALUE + A pointer to the start of the next row, or NULL if the preparation + failed. Currently, preparation cannot fail, but don't rely on this + behavior. + */ + virtual char const *do_prepare_row(THD*, TABLE*, char const *row_start) = 0; + + /* + Primitive to do the actual execution necessary for a row. + + DESCRIPTION + The member function will do the actual execution needed to handle a row. + + RETURN VALUE + 0 if execution succeeded, 1 if execution failed. + + */ + virtual int do_exec_row(TABLE *table) = 0; +#endif /* !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) */ +}; + + +/***************************************************************************** + + Write row log event class + + Log row insertions and updates. The event contain several + insert/update rows for a table. Note that each event contains only + rows for one table. + + ****************************************************************************/ +class Write_rows_log_event : public Rows_log_event +{ +public: + enum + { + /* Support interface to THD::binlog_prepare_pending_rows_event */ + TYPE_CODE = WRITE_ROWS_EVENT + }; + +#if !defined(MYSQL_CLIENT) + Write_rows_log_event(THD*, TABLE*, ulong table_id, + MY_BITMAP const *cols, bool is_transactional); +#endif +#ifdef HAVE_REPLICATION + Write_rows_log_event(const char *buf, uint event_len, + const Format_description_log_event *description_event); +#endif +#if !defined(MYSQL_CLIENT) && defined(HAVE_ROW_BASED_REPLICATION) + static bool binlog_row_logging_function(THD *thd, TABLE *table, + bool is_transactional, + MY_BITMAP *cols, + uint fields, + const byte *before_record + __attribute__((unused)), + const byte *after_record) + { + return thd->binlog_write_row(table, is_transactional, + cols, fields, after_record); + } +#endif + +private: + virtual Log_event_type get_type_code() { return (Log_event_type)TYPE_CODE; } + +#ifdef MYSQL_CLIENT + void print(FILE *file, PRINT_EVENT_INFO *print_event_info); +#endif + +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) + gptr m_memory; + byte *m_search_record; + + virtual int do_before_row_operations(TABLE *table); + virtual int do_after_row_operations(TABLE *table, int error); + virtual char const *do_prepare_row(THD*, TABLE*, char const *row_start); + virtual int do_exec_row(TABLE *table); +#endif +}; + + +/***************************************************************************** + + Update rows log event class + + Log row updates with a before image. The event contain several + update rows for a table. Note that each event contains only rows for + one table. + + Also note that the row data consists of pairs of row data: one row + for the old data and one row for the new data. + + ****************************************************************************/ +class Update_rows_log_event : public Rows_log_event +{ +public: + enum + { + /* Support interface to THD::binlog_prepare_pending_rows_event */ + TYPE_CODE = UPDATE_ROWS_EVENT + }; + +#ifndef MYSQL_CLIENT + Update_rows_log_event(THD*, TABLE*, ulong table_id, + MY_BITMAP const *cols, bool is_transactional); +#endif + +#ifdef HAVE_REPLICATION + Update_rows_log_event(const char *buf, uint event_len, + const Format_description_log_event *description_event); +#endif + +#if !defined(MYSQL_CLIENT) && defined(HAVE_ROW_BASED_REPLICATION) + static bool binlog_row_logging_function(THD *thd, TABLE *table, + bool is_transactional, + MY_BITMAP *cols, + uint fields, + const byte *before_record, + const byte *after_record) + { + return thd->binlog_update_row(table, is_transactional, + cols, fields, before_record, after_record); + } +#endif + +private: + virtual Log_event_type get_type_code() { return (Log_event_type)TYPE_CODE; } + +#ifdef MYSQL_CLIENT + void print(FILE *file, PRINT_EVENT_INFO *print_event_info); +#endif + +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) + gptr m_memory; + byte *m_key; + byte *m_search_record; + + virtual int do_before_row_operations(TABLE *table); + virtual int do_after_row_operations(TABLE *table, int error); + virtual char const *do_prepare_row(THD*, TABLE*, char const *row_start); + virtual int do_exec_row(TABLE *table); +#endif /* !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) */ +}; + +/***************************************************************************** + + Delete rows log event class. + + Log row deletions. The event contain several delete rows for a + table. Note that each event contains only rows for one table. + + RESPONSIBILITIES + + - Act as a container for rows that has been deleted on the master + and should be deleted on the slave. + + COLLABORATION + + Row_writer + Create the event and add rows to the event. + Row_reader + Extract the rows from the event. + + ****************************************************************************/ +class Delete_rows_log_event : public Rows_log_event +{ +public: + enum + { + /* Support interface to THD::binlog_prepare_pending_rows_event */ + TYPE_CODE = DELETE_ROWS_EVENT + }; + +#ifndef MYSQL_CLIENT + Delete_rows_log_event(THD*, TABLE*, ulong, + MY_BITMAP const *cols, bool is_transactional); +#endif +#ifdef HAVE_REPLICATION + Delete_rows_log_event(const char *buf, uint event_len, + const Format_description_log_event *description_event); +#endif +#if !defined(MYSQL_CLIENT) && defined(HAVE_ROW_BASED_REPLICATION) + static bool binlog_row_logging_function(THD *thd, TABLE *table, + bool is_transactional, + MY_BITMAP *cols, + uint fields, + const byte *before_record, + const byte *after_record + __attribute__((unused))) + { + return thd->binlog_delete_row(table, is_transactional, + cols, fields, before_record); + } +#endif + +private: + virtual Log_event_type get_type_code() { return (Log_event_type)TYPE_CODE; } + +#ifdef MYSQL_CLIENT + void print(FILE *file, PRINT_EVENT_INFO *print_event_info); +#endif + +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) + gptr m_memory; + byte *m_key; + byte *m_search_record; + + virtual int do_before_row_operations(TABLE *table); + virtual int do_after_row_operations(TABLE *table, int error); + virtual char const *do_prepare_row(THD*, TABLE*, char const *row_start); + virtual int do_exec_row(TABLE *table); +#endif +}; + + #endif /* _log_event_h */ diff --git a/sql/my_decimal.cc b/sql/my_decimal.cc index 1bd16940b47..89607129026 100644 --- a/sql/my_decimal.cc +++ b/sql/my_decimal.cc @@ -193,16 +193,23 @@ int str2my_decimal(uint mask, const char *from, uint length, #ifndef DBUG_OFF /* routines for debugging print */ +#define DIG_PER_DEC1 9 +#define ROUND_UP(X) (((X)+DIG_PER_DEC1-1)/DIG_PER_DEC1) + /* print decimal */ void print_decimal(const my_decimal *dec) { - fprintf(DBUG_FILE, - "\nDecimal: sign: %d intg: %d frac: %d \n\ -%09d,%09d,%09d,%09d,%09d,%09d,%09d,%09d\n", - dec->sign(), dec->intg, dec->frac, - dec->buf[0], dec->buf[1], dec->buf[2], dec->buf[3], - dec->buf[4], dec->buf[5], dec->buf[6], dec->buf[7]); + int i, end; + char buff[512], *pos; + pos= buff; + pos+= my_sprintf(buff, (buff, "Decimal: sign: %d intg: %d frac: %d { ", + dec->sign(), dec->intg, dec->frac)); + end= ROUND_UP(dec->frac)+ROUND_UP(dec->intg)-1; + for (i=0; i < end; i++) + pos+= my_sprintf(pos, (pos, "%09d, ", dec->buf[i])); + pos+= my_sprintf(pos, (pos, "%09d }\n", dec->buf[i])); + fputs(buff, DBUG_FILE); } diff --git a/sql/mysql_priv.h b/sql/mysql_priv.h index ca2cda0f7c6..b6be03003c6 100644 --- a/sql/mysql_priv.h +++ b/sql/mysql_priv.h @@ -191,11 +191,6 @@ extern CHARSET_INFO *national_charset_info, *table_alias_charset; #define FLUSH_TIME 0 /* Don't flush tables */ #define MAX_CONNECT_ERRORS 10 // errors before disabling host -#ifdef HAVE_INNOBASE_DB -#define IF_INNOBASE_DB(A, B) (A) -#else -#define IF_INNOBASE_DB(A, B) (B) -#endif #ifdef __NETWARE__ #define IF_NETWARE(A,B) (A) #else @@ -253,50 +248,50 @@ extern CHARSET_INFO *national_charset_info, *table_alias_charset; TODO: separate three contexts above, move them to separate bitfields. */ -#define SELECT_DISTINCT (1L << 0) // SELECT, user -#define SELECT_STRAIGHT_JOIN (1L << 1) // SELECT, user -#define SELECT_DESCRIBE (1L << 2) // SELECT, user -#define SELECT_SMALL_RESULT (1L << 3) // SELECT, user -#define SELECT_BIG_RESULT (1L << 4) // SELECT, user -#define OPTION_FOUND_ROWS (1L << 5) // SELECT, user -#define OPTION_TO_QUERY_CACHE (1L << 6) // SELECT, user -#define SELECT_NO_JOIN_CACHE (1L << 7) // intern -#define OPTION_BIG_TABLES (1L << 8) // THD, user -#define OPTION_BIG_SELECTS (1L << 9) // THD, user -#define OPTION_LOG_OFF (1L << 10) // THD, user -#define OPTION_UPDATE_LOG (1L << 11) // THD, user, unused -#define TMP_TABLE_ALL_COLUMNS (1L << 12) // SELECT, intern -#define OPTION_WARNINGS (1L << 13) // THD, user -#define OPTION_AUTO_IS_NULL (1L << 14) // THD, user, binlog -#define OPTION_FOUND_COMMENT (1L << 15) // SELECT, intern, parser -#define OPTION_SAFE_UPDATES (1L << 16) // THD, user -#define OPTION_BUFFER_RESULT (1L << 17) // SELECT, user -#define OPTION_BIN_LOG (1L << 18) // THD, user -#define OPTION_NOT_AUTOCOMMIT (1L << 19) // THD, user -#define OPTION_BEGIN (1L << 20) // THD, intern -#define OPTION_TABLE_LOCK (1L << 21) // THD, intern -#define OPTION_QUICK (1L << 22) // SELECT (for DELETE) -#define OPTION_QUOTE_SHOW_CREATE (1L << 23) // THD, user +#define SELECT_DISTINCT (LL(1) << 0) // SELECT, user +#define SELECT_STRAIGHT_JOIN (LL(1) << 1) // SELECT, user +#define SELECT_DESCRIBE (LL(1) << 2) // SELECT, user +#define SELECT_SMALL_RESULT (LL(1) << 3) // SELECT, user +#define SELECT_BIG_RESULT (LL(1) << 4) // SELECT, user +#define OPTION_FOUND_ROWS (LL(1) << 5) // SELECT, user +#define OPTION_TO_QUERY_CACHE (LL(1) << 6) // SELECT, user +#define SELECT_NO_JOIN_CACHE (LL(1) << 7) // intern +#define OPTION_BIG_TABLES (LL(1) << 8) // THD, user +#define OPTION_BIG_SELECTS (LL(1) << 9) // THD, user +#define OPTION_LOG_OFF (LL(1) << 10) // THD, user +#define OPTION_UPDATE_LOG (LL(1) << 11) // THD, user, unused +#define TMP_TABLE_ALL_COLUMNS (LL(1) << 12) // SELECT, intern +#define OPTION_WARNINGS (LL(1) << 13) // THD, user +#define OPTION_AUTO_IS_NULL (LL(1) << 14) // THD, user, binlog +#define OPTION_FOUND_COMMENT (LL(1) << 15) // SELECT, intern, parser +#define OPTION_SAFE_UPDATES (LL(1) << 16) // THD, user +#define OPTION_BUFFER_RESULT (LL(1) << 17) // SELECT, user +#define OPTION_BIN_LOG (LL(1) << 18) // THD, user +#define OPTION_NOT_AUTOCOMMIT (LL(1) << 19) // THD, user +#define OPTION_BEGIN (LL(1) << 20) // THD, intern +#define OPTION_TABLE_LOCK (LL(1) << 21) // THD, intern +#define OPTION_QUICK (LL(1) << 22) // SELECT (for DELETE) +#define OPTION_QUOTE_SHOW_CREATE (LL(1) << 23) // THD, user /* Thr following is used to detect a conflict with DISTINCT in the user query has requested */ -#define SELECT_ALL (1L << 24) // SELECT, user, parser +#define SELECT_ALL (LL(1) << 24) // SELECT, user, parser /* Set if we are updating a non-transaction safe table */ -#define OPTION_STATUS_NO_TRANS_UPDATE (1L << 25) // THD, intern +#define OPTION_STATUS_NO_TRANS_UPDATE (LL(1) << 25) // THD, intern /* The following can be set when importing tables in a 'wrong order' to suppress foreign key checks */ -#define OPTION_NO_FOREIGN_KEY_CHECKS (1L << 26) // THD, user, binlog +#define OPTION_NO_FOREIGN_KEY_CHECKS (LL(1) << 26) // THD, user, binlog /* The following speeds up inserts to InnoDB tables by suppressing unique key checks in some cases */ -#define OPTION_RELAXED_UNIQUE_CHECKS (1L << 27) // THD, user, binlog -#define SELECT_NO_UNLOCK (1L << 28) // SELECT, intern -#define OPTION_SCHEMA_TABLE (1L << 29) // SELECT, intern +#define OPTION_RELAXED_UNIQUE_CHECKS (LL(1) << 27) // THD, user, binlog +#define SELECT_NO_UNLOCK (LL(1) << 28) // SELECT, intern +#define OPTION_SCHEMA_TABLE (LL(1) << 29) // SELECT, intern /* Flag set if setup_tables already done */ -#define OPTION_SETUP_TABLES_DONE (1L << 30) // intern +#define OPTION_SETUP_TABLES_DONE (LL(1) << 30) // intern /* If not set then the thread will ignore all warnings with level notes. */ -#define OPTION_SQL_NOTES (1UL << 31) // THD, user +#define OPTION_SQL_NOTES (LL(1) << 31) // THD, user /* Force the used temporary table to be a MyISAM table (because we will use fulltext functions when reading from it. @@ -414,6 +409,13 @@ void view_store_options(THD *thd, st_table_list *table, String *buff); #define STRING_BUFFER_USUAL_SIZE 80 +/* + Some defines for exit codes for ::is_equal class functions. +*/ +#define IS_EQUAL_NO 0 +#define IS_EQUAL_YES 1 +#define IS_EQUAL_PACK_LENGTH 2 + enum enum_parsing_place { NO_MATTER, @@ -494,6 +496,7 @@ typedef my_bool (*qc_engine_callback)(THD *thd, char *table_key, #include "sql_error.h" #include "field.h" /* Field definitions */ #include "protocol.h" +#include "sql_plugin.h" #include "sql_udf.h" class user_var_entry; class Security_context; @@ -597,6 +600,7 @@ bool mysql_create_db(THD *thd, char *db, HA_CREATE_INFO *create, bool silent); bool mysql_alter_db(THD *thd, const char *db, HA_CREATE_INFO *create); bool mysql_rm_db(THD *thd,char *db,bool if_exists, bool silent); void mysql_binlog_send(THD* thd, char* log_ident, my_off_t pos, ushort flags); +void mysql_client_binlog_statement(THD *thd); bool mysql_rm_table(THD *thd,TABLE_LIST *tables, my_bool if_exists, my_bool drop_temporary); int mysql_rm_table_part2(THD *thd, TABLE_LIST *tables, bool if_exists, @@ -604,8 +608,8 @@ int mysql_rm_table_part2(THD *thd, TABLE_LIST *tables, bool if_exists, int mysql_rm_table_part2_with_lock(THD *thd, TABLE_LIST *tables, bool if_exists, bool drop_temporary, bool log_query); -int quick_rm_table(enum db_type base,const char *db, - const char *table_name); +bool quick_rm_table(handlerton *base,const char *db, + const char *table_name); void close_cached_table(THD *thd, TABLE *table); bool mysql_rename_tables(THD *thd, TABLE_LIST *table_list); bool mysql_change_db(THD *thd,const char *name,bool no_access_check); @@ -636,7 +640,10 @@ bool check_dup(const char *db, const char *name, TABLE_LIST *tables); bool table_cache_init(void); void table_cache_free(void); -uint cached_tables(void); +bool table_def_init(void); +void table_def_free(void); +uint cached_open_tables(void); +uint cached_table_definitions(void); void kill_mysql(void); void close_connection(THD *thd, uint errcode, bool lock); bool reload_acl_and_cache(THD *thd, ulong options, TABLE_LIST *tables, @@ -647,6 +654,22 @@ bool check_table_access(THD *thd, ulong want_access, TABLE_LIST *tables, bool no_errors); bool check_global_access(THD *thd, ulong want_access); +/* + Support routine for SQL parser on partitioning syntax +*/ +my_bool is_partition_management(LEX *lex); +/* + General routine to change field->ptr of a NULL-terminated array of Field + objects. Useful when needed to call val_int, val_str or similar and the + field data is not in table->record[0] but in some other structure. + set_key_field_ptr changes all fields of an index using a key_info object. + All methods presume that there is at least one field to change. +*/ + +void set_field_ptr(Field **ptr, const byte *new_buf, const byte *old_buf); +void set_key_field_ptr(KEY *key_info, const byte *new_buf, + const byte *old_buf); + bool mysql_backup_table(THD* thd, TABLE_LIST* table_list); bool mysql_restore_table(THD* thd, TABLE_LIST* table_list); @@ -732,7 +755,7 @@ bool mysql_recreate_table(THD *thd, TABLE_LIST *table_list, bool do_send_ok); bool mysql_create_like_table(THD *thd, TABLE_LIST *table, HA_CREATE_INFO *create_info, Table_ident *src_table); -bool mysql_rename_table(enum db_type base, +bool mysql_rename_table(handlerton *base, const char *old_db, const char * old_name, const char *new_db, @@ -768,15 +791,18 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, bool reset_auto_increment); bool mysql_truncate(THD *thd, TABLE_LIST *table_list, bool dont_send_ok); bool mysql_create_or_drop_trigger(THD *thd, TABLE_LIST *tables, bool create); +uint create_table_def_key(THD *thd, char *key, TABLE_LIST *table_list, + bool tmp_table); +TABLE_SHARE *get_table_share(THD *thd, TABLE_LIST *table_list, char *key, + uint key_length, uint db_flags, int *error); +void release_table_share(TABLE_SHARE *share, enum release_type type); +TABLE_SHARE *get_cached_table_share(const char *db, const char *table_name); TABLE *open_ltable(THD *thd, TABLE_LIST *table_list, thr_lock_type update); TABLE *open_table(THD *thd, TABLE_LIST *table_list, MEM_ROOT* mem, bool *refresh, uint flags); bool reopen_name_locked_table(THD* thd, TABLE_LIST* table); TABLE *find_locked_table(THD *thd, const char *db,const char *table_name); -bool reopen_table(TABLE *table,bool locked); bool reopen_tables(THD *thd,bool get_locks,bool in_refresh); -void close_old_data_files(THD *thd, TABLE *table, bool abort_locks, - bool send_refresh); bool close_data_tables(THD *thd,const char *db, const char *table_name); bool wait_for_tables(THD *thd); bool table_is_used(TABLE *table, bool wait_for_name_lock); @@ -806,6 +832,8 @@ find_field_in_table_ref(THD *thd, TABLE_LIST *table_list, Field * find_field_in_table(THD *thd, TABLE *table, const char *name, uint length, bool allow_rowid, uint *cached_field_index_ptr); +Field * +find_field_in_table_sef(TABLE *table, const char *name); #ifdef HAVE_OPENSSL #include <openssl/des.h> @@ -847,6 +875,7 @@ int mysqld_show_variables(THD *thd,const char *wild); int mysql_find_files(THD *thd,List<char> *files, const char *db, const char *path, const char *wild, bool dir); bool mysqld_show_storage_engines(THD *thd); +bool mysqld_show_authors(THD *thd); bool mysqld_show_privileges(THD *thd); bool mysqld_show_column_types(THD *thd); bool mysqld_help (THD *thd, const char *text); @@ -950,10 +979,10 @@ bool setup_tables(THD *thd, Name_resolution_context *context, int setup_wild(THD *thd, TABLE_LIST *tables, List<Item> &fields, List<Item> *sum_func_list, uint wild_num); bool setup_fields(THD *thd, Item** ref_pointer_array, - List<Item> &item, bool set_query_id, + List<Item> &item, ulong set_query_id, List<Item> *sum_func_list, bool allow_sum_func); inline bool setup_fields_with_no_wrap(THD *thd, Item **ref_pointer_array, - List<Item> &item, bool set_query_id, + List<Item> &item, ulong set_query_id, List<Item> *sum_func_list, bool allow_sum_func) { @@ -968,7 +997,8 @@ int setup_conds(THD *thd, TABLE_LIST *tables, TABLE_LIST *leaves, COND **conds); int setup_ftfuncs(SELECT_LEX* select); int init_ftfuncs(THD *thd, SELECT_LEX* select, bool no_order); -void wait_for_refresh(THD *thd); +void wait_for_condition(THD *thd, pthread_mutex_t *mutex, + pthread_cond_t *cond); int open_tables(THD *thd, TABLE_LIST **tables, uint *counter, uint flags); int simple_open_n_lock_tables(THD *thd,TABLE_LIST *tables); bool open_and_lock_tables(THD *thd,TABLE_LIST *tables); @@ -976,7 +1006,7 @@ bool open_normal_and_derived_tables(THD *thd, TABLE_LIST *tables, uint flags); int lock_tables(THD *thd, TABLE_LIST *tables, uint counter, bool *need_reopen); TABLE *open_temporary_table(THD *thd, const char *path, const char *db, const char *table_name, bool link_in_list); -bool rm_temporary_table(enum db_type base, char *path); +bool rm_temporary_table(handlerton *base, char *path); void free_io_cache(TABLE *entry); void intern_close_table(TABLE *entry); bool close_thread_table(THD *thd, TABLE **table_ptr); @@ -987,9 +1017,12 @@ TABLE_LIST *find_table_in_list(TABLE_LIST *table, const char *db_name, const char *table_name); TABLE_LIST *unique_table(THD *thd, TABLE_LIST *table, TABLE_LIST *table_list); -TABLE **find_temporary_table(THD *thd, const char *db, const char *table_name); -bool close_temporary_table(THD *thd, const char *db, const char *table_name); -void close_temporary(TABLE *table, bool delete_table); +TABLE *find_temporary_table(THD *thd, const char *db, const char *table_name); +TABLE *find_temporary_table(THD *thd, TABLE_LIST *table_list); +bool close_temporary_table(THD *thd, TABLE_LIST *table_list); +void close_temporary_table(THD *thd, TABLE *table, bool free_share, + bool delete_table); +void close_temporary(TABLE *table, bool free_share, bool delete_table); bool rename_temporary_table(THD* thd, TABLE *table, const char *new_db, const char *table_name); void remove_db_from_cache(const char *db); @@ -1055,6 +1088,8 @@ extern ulong volatile manager_status; extern bool volatile manager_thread_in_use, mqh_used; extern pthread_t manager_thread; pthread_handler_t handle_manager(void *arg); +bool mysql_manager_submit(void (*action)()); + /* sql_test.cc */ #ifndef DBUG_OFF @@ -1066,7 +1101,7 @@ void print_plan(JOIN* join, double read_time, double record_count, #endif void mysql_print_status(); /* key.cc */ -int find_ref_key(TABLE *form,Field *field, uint *offset); +int find_ref_key(KEY *key, uint key_count, Field *field, uint *key_length); void key_copy(byte *to_key, byte *from_record, KEY *key_info, uint key_length); void key_restore(byte *to_record, byte *from_key, KEY *key_info, uint key_length); @@ -1074,6 +1109,7 @@ bool key_cmp_if_same(TABLE *form,const byte *key,uint index,uint key_length); void key_unpack(String *to,TABLE *form,uint index); bool check_if_key_used(TABLE *table, uint idx, List<Item> &fields); int key_cmp(KEY_PART_INFO *key_part, const byte *key, uint key_length); +int key_rec_cmp(void *key_info, byte *a, byte *b); bool init_errmessage(void); void sql_perror(const char *message); @@ -1138,6 +1174,7 @@ extern Lt_creator lt_creator; extern Ge_creator ge_creator; extern Le_creator le_creator; extern char language[FN_REFLEN], reg_ext[FN_EXTLEN]; +extern uint reg_ext_length; extern char glob_hostname[FN_REFLEN], mysql_home[FN_REFLEN]; extern char pidfile_name[FN_REFLEN], system_time_zone[30], *opt_init_file; extern char log_error_file[FN_REFLEN], *opt_tc_log_file; @@ -1155,7 +1192,7 @@ extern ulong delayed_rows_in_use,delayed_insert_errors; extern ulong slave_open_temp_tables; extern ulong query_cache_size, query_cache_min_res_unit; extern ulong slow_launch_threads, slow_launch_time; -extern ulong table_cache_size; +extern ulong table_cache_size, table_def_size; extern ulong max_connections,max_connect_errors, connect_timeout; extern ulong slave_net_timeout, slave_trans_retries; extern uint max_user_connections; @@ -1163,6 +1200,13 @@ extern ulong what_to_log,flush_time; extern ulong query_buff_size, thread_stack; extern ulong binlog_cache_size, max_binlog_cache_size, open_files_limit; extern ulong max_binlog_size, max_relay_log_size; +extern const char *opt_binlog_format; +#ifdef HAVE_ROW_BASED_REPLICATION +extern my_bool binlog_row_based; +extern ulong opt_binlog_rows_event_max_size; +#else +extern const my_bool binlog_row_based; +#endif extern ulong rpl_recovery_rank, thread_cache_size; extern ulong back_log; extern ulong specialflag, current_pid; @@ -1184,7 +1228,7 @@ extern bool volatile abort_loop, shutdown_in_progress, grant_option; extern bool mysql_proc_table_exists; extern uint volatile thread_count, thread_running, global_read_lock; extern my_bool opt_sql_bin_update, opt_safe_user_create, opt_no_mix_types; -extern my_bool opt_safe_show_db, opt_local_infile; +extern my_bool opt_safe_show_db, opt_local_infile, opt_myisam_use_mmap; extern my_bool opt_slave_compressed_protocol, use_temp_pool; extern my_bool opt_readonly, lower_case_file_system; extern my_bool opt_enable_named_pipe, opt_sync_frm, opt_allow_suspicious_udfs; @@ -1214,6 +1258,9 @@ extern pthread_mutex_t LOCK_mysql_create_db,LOCK_Acl,LOCK_open, #ifdef HAVE_OPENSSL extern pthread_mutex_t LOCK_des_key_file; #endif +extern pthread_mutex_t LOCK_server_started; +extern pthread_cond_t COND_server_started; +extern int mysqld_server_started; extern rw_lock_t LOCK_grant, LOCK_sys_init_connect, LOCK_sys_init_slave; extern pthread_cond_t COND_refresh, COND_thread_count, COND_manager; extern pthread_attr_t connection_attrib; @@ -1234,18 +1281,73 @@ extern KNOWN_DATE_TIME_FORMAT known_date_time_formats[]; extern String null_string; extern HASH open_cache; extern TABLE *unused_tables; -extern I_List<i_string> binlog_do_db, binlog_ignore_db; extern const char* any_db; extern struct my_option my_long_options[]; extern const LEX_STRING view_type; /* optional things, have_* variables */ -extern SHOW_COMP_OPTION have_isam, have_innodb, have_berkeley_db; -extern SHOW_COMP_OPTION have_example_db, have_archive_db, have_csv_db; +#ifdef WITH_INNOBASE_STORAGE_ENGINE +extern handlerton innobase_hton; +#define have_innodb innobase_hton.state +#else +extern SHOW_COMP_OPTION have_innodb; +#endif +#ifdef WITH_BERKELEY_STORAGE_ENGINE +extern handlerton berkeley_hton; +#define have_berkeley_db berkeley_hton.state +#else +extern SHOW_COMP_OPTION have_berkeley_db; +#endif +#ifdef WITH_EXAMPLE_STORAGE_ENGINE +extern handlerton example_hton; +#define have_example_db example_hton.state +#else +extern SHOW_COMP_OPTION have_example_db; +#endif +#ifdef WITH_ARCHIVE_STORAGE_ENGINE +extern handlerton archive_hton; +#define have_archive_db archive_hton.state +#else +extern SHOW_COMP_OPTION have_archive_db; +#endif +#ifdef WITH_CSV_STORAGE_ENGINE +extern handlerton tina_hton; +#define have_csv_db tina_hton.state +#else +extern SHOW_COMP_OPTION have_csv_db; +#endif +#ifdef WITH_FEDERATED_STORAGE_ENGINE +extern handlerton federated_hton; +#define have_federated_db federated_hton.state +#else extern SHOW_COMP_OPTION have_federated_db; +#endif +#ifdef WITH_BLACKHOLE_STORAGE_ENGINE +extern handlerton blackhole_hton; +#define have_blackhole_db blackhole_hton.state +#else extern SHOW_COMP_OPTION have_blackhole_db; +#endif +#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE +extern handlerton ndbcluster_hton; +#define have_ndbcluster ndbcluster_hton.state +#else extern SHOW_COMP_OPTION have_ndbcluster; +#endif +#ifdef WITH_PARTITION_STORAGE_ENGINE +extern handlerton partition_hton; +#define have_partition_db partition_hton.state +#else +extern SHOW_COMP_OPTION have_partition_db; +#endif + +extern handlerton myisam_hton; +extern handlerton myisammrg_hton; +extern handlerton heap_hton; + +extern SHOW_COMP_OPTION have_isam; +extern SHOW_COMP_OPTION have_row_based_replication; extern SHOW_COMP_OPTION have_raid, have_openssl, have_symlink; extern SHOW_COMP_OPTION have_query_cache; extern SHOW_COMP_OPTION have_geometry, have_rtree_keys; @@ -1299,23 +1401,36 @@ void unlock_table_names(THD *thd, TABLE_LIST *table_list, void unireg_init(ulong options); void unireg_end(void); -bool mysql_create_frm(THD *thd, my_string file_name, +bool mysql_create_frm(THD *thd, const char *file_name, const char *db, const char *table, HA_CREATE_INFO *create_info, List<create_field> &create_field, uint key_count,KEY *key_info,handler *db_type); -int rea_create_table(THD *thd, my_string file_name, - const char *db, const char *table, +int rea_create_table(THD *thd, const char *path, + const char *db, const char *table_name, HA_CREATE_INFO *create_info, - List<create_field> &create_field, - uint key_count,KEY *key_info); + List<create_field> &create_field, + uint key_count,KEY *key_info, + handler *file); int format_number(uint inputflag,uint max_length,my_string pos,uint length, my_string *errpos); + +/* table.cc */ +TABLE_SHARE *alloc_table_share(TABLE_LIST *table_list, char *key, + uint key_length); +void init_tmp_table_share(TABLE_SHARE *share, const char *key, uint key_length, + const char *table_name, const char *path); +void free_table_share(TABLE_SHARE *share); +int open_table_def(THD *thd, TABLE_SHARE *share, uint db_flags); +void open_table_error(TABLE_SHARE *share, int error, int db_errno, int errarg); +int open_table_from_share(THD *thd, TABLE_SHARE *share, const char *alias, + uint db_stat, uint prgflag, uint ha_open_flags, + TABLE *outparam); int openfrm(THD *thd, const char *name,const char *alias,uint filestat, uint prgflag, uint ha_open_flags, TABLE *outparam); int readfrm(const char *name, const void** data, uint* length); int writefrm(const char* name, const void* data, uint len); -int closefrm(TABLE *table); +int closefrm(TABLE *table, bool free_share); int read_string(File file, gptr *to, uint length); void free_blobs(TABLE *table); int set_zone(int nr,int min_zone,int max_zone); @@ -1374,8 +1489,8 @@ ulong make_new_entry(File file,uchar *fileinfo,TYPELIB *formnames, const char *newname); ulong next_io_size(ulong pos); void append_unescaped(String *res, const char *pos, uint length); -int create_frm(THD *thd, char *name, const char *db, const char *table, - uint reclength,uchar *fileinfo, +int create_frm(THD *thd, const char *name, const char *db, const char *table, + uint reclength, uchar *fileinfo, HA_CREATE_INFO *create_info, uint keys); void update_create_info_from_table(HA_CREATE_INFO *info, TABLE *form); int rename_file_ext(const char * from,const char * to,const char * ext); @@ -1385,7 +1500,15 @@ bool check_table_name(const char *name, uint length); char *get_field(MEM_ROOT *mem, Field *field); bool get_field(MEM_ROOT *mem, Field *field, class String *res); int wild_case_compare(CHARSET_INFO *cs, const char *str,const char *wildstr); - +char *fn_rext(char *name); + +/* Conversion functions */ +uint strconvert(CHARSET_INFO *from_cs, const char *from, + CHARSET_INFO *to_cs, char *to, uint to_length, uint *errors); +uint filename_to_tablename(const char *from, char *to, uint to_length); +uint tablename_to_filename(const char *from, char *to, uint to_length); +uint build_table_filename(char *buff, size_t bufflen, const char *db, + const char *table, const char *ext); /* from hostname.cc */ struct in_addr; my_string ip_to_hostname(struct in_addr *in,uint *errors); diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 1f32a8285d1..4a6dddcad11 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -19,28 +19,21 @@ #include <my_dir.h> #include "slave.h" #include "sql_repl.h" +#include "rpl_filter.h" #include "repl_failsafe.h" #include "stacktrace.h" #include "mysqld_suffix.h" #include "mysys_err.h" -#ifdef HAVE_BERKELEY_DB -#include "ha_berkeley.h" -#endif -#ifdef HAVE_INNOBASE_DB -#include "ha_innodb.h" -#endif + #include "ha_myisam.h" -#ifdef HAVE_NDBCLUSTER_DB -#include "ha_ndbcluster.h" -#endif -#ifdef HAVE_INNOBASE_DB +#ifdef WITH_INNOBASE_STORAGE_ENGINE #define OPT_INNODB_DEFAULT 1 #else #define OPT_INNODB_DEFAULT 0 #endif #define OPT_BDB_DEFAULT 0 -#ifdef HAVE_NDBCLUSTER_DB +#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE #define OPT_NDBCLUSTER_DEFAULT 0 #if defined(NOT_ENOUGH_TESTED) \ && defined(NDB_SHM_TRANSPORTER) && MYSQL_VERSION_ID >= 50000 @@ -329,7 +322,7 @@ static I_List<THD> thread_cache; static pthread_cond_t COND_thread_cache, COND_flush_thread_cache; -#ifdef HAVE_BERKELEY_DB +#ifdef WITH_BERKELEY_STORAGE_ENGINE static my_bool opt_sync_bdb_logs; #endif @@ -354,7 +347,67 @@ my_bool opt_safe_user_create = 0, opt_no_mix_types = 0; my_bool opt_show_slave_auth_info, opt_sql_bin_update = 0; my_bool opt_log_slave_updates= 0; my_bool opt_innodb; -#ifdef HAVE_NDBCLUSTER_DB +#ifdef WITH_INNOBASE_STORAGE_ENGINE +extern struct show_var_st innodb_status_variables[]; +extern uint innobase_init_flags, innobase_lock_type; +extern uint innobase_flush_log_at_trx_commit; +extern ulong innobase_cache_size, innobase_fast_shutdown; +extern ulong innobase_large_page_size; +extern char *innobase_home, *innobase_tmpdir, *innobase_logdir; +extern long innobase_lock_scan_time; +extern long innobase_mirrored_log_groups, innobase_log_files_in_group; +extern longlong innobase_log_file_size; +extern long innobase_log_buffer_size; +extern longlong innobase_buffer_pool_size; +extern long innobase_additional_mem_pool_size; +extern long innobase_buffer_pool_awe_mem_mb; +extern long innobase_file_io_threads, innobase_lock_wait_timeout; +extern long innobase_force_recovery; +extern long innobase_open_files; +extern char *innobase_data_home_dir, *innobase_data_file_path; +extern char *innobase_log_group_home_dir, *innobase_log_arch_dir; +extern char *innobase_unix_file_flush_method; +/* The following variables have to be my_bool for SHOW VARIABLES to work */ +extern my_bool innobase_log_archive, + innobase_use_doublewrite, + innobase_use_checksums, + innobase_use_large_pages, + innobase_use_native_aio, + innobase_file_per_table, innobase_locks_unsafe_for_binlog, + innobase_create_status_file; +extern my_bool innobase_very_fast_shutdown; /* set this to 1 just before + calling innobase_end() if you want + InnoDB to shut down without + flushing the buffer pool: this + is equivalent to a 'crash' */ +extern "C" { +extern ulong srv_max_buf_pool_modified_pct; +extern ulong srv_max_purge_lag; +extern ulong srv_auto_extend_increment; +extern ulong srv_n_spin_wait_rounds; +extern ulong srv_n_free_tickets_to_enter; +extern ulong srv_thread_sleep_delay; +extern ulong srv_thread_concurrency; +extern ulong srv_commit_concurrency; +} +#endif +#ifdef WITH_BERKELEY_STORAGE_ENGINE +#ifndef HAVE_U_INT32_T +typedef unsigned int u_int32_t; +#endif +extern const u_int32_t bdb_DB_TXN_NOSYNC, bdb_DB_RECOVER, bdb_DB_PRIVATE, + bdb_DB_DIRECT_DB, bdb_DB_DIRECT_LOG; +extern bool berkeley_shared_data; +extern u_int32_t berkeley_init_flags,berkeley_env_flags, berkeley_lock_type, + berkeley_lock_types[]; +extern ulong berkeley_max_lock, berkeley_log_buffer_size; +extern ulonglong berkeley_cache_size; +extern ulong berkeley_region_size, berkeley_cache_parts; +extern char *berkeley_home, *berkeley_tmpdir, *berkeley_logdir; +extern long berkeley_lock_scan_time; +extern TYPELIB berkeley_lock_typelib; +#endif +#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE const char *opt_ndbcluster_connectstring= 0; const char *opt_ndb_connectstring= 0; char opt_ndb_constrbuf[1024]; @@ -363,6 +416,12 @@ my_bool opt_ndb_shm, opt_ndb_optimized_node_selection; ulong opt_ndb_cache_check_time; const char *opt_ndb_mgmd; ulong opt_ndb_nodeid; + +extern struct show_var_st ndb_status_variables[]; +extern const char *ndb_distribution_names[]; +extern TYPELIB ndb_distribution_typelib; +extern const char *opt_ndb_distribution; +extern enum ndb_distribution opt_ndb_distribution_id; #endif my_bool opt_readonly, use_temp_pool, relay_log_purge; my_bool opt_sync_frm, opt_allow_suspicious_udfs; @@ -370,6 +429,7 @@ my_bool opt_secure_auth= 0; my_bool opt_log_slow_admin_statements= 0; my_bool lower_case_file_system= 0; my_bool opt_large_pages= 0; +my_bool opt_myisam_use_mmap= 0; uint opt_large_page_size= 0; my_bool opt_old_style_user_limits= 0, trust_function_creators= 0; /* @@ -381,6 +441,33 @@ volatile bool mqh_used = 0; my_bool opt_noacl; my_bool sp_automatic_privileges= 1; +#ifdef HAVE_ROW_BASED_REPLICATION +/* + This variable below serves as an optimization for (opt_binlog_format == + BF_ROW) as we need to do this test for every row. Stmt-based is default. +*/ +my_bool binlog_row_based= FALSE; +ulong opt_binlog_rows_event_max_size; +const char *binlog_format_names[]= {"STATEMENT", "ROW", NullS}; +/* + Note that BF_UNSPECIFIED is last, after the end of binlog_format_names: it + has no corresponding cell in this array. We use this value to be able to + know if the user has explicitely specified a binlog format (then we require + also --log-bin) or not (then we fall back to statement-based). +*/ +enum binlog_format { BF_STMT= 0, BF_ROW= 1, BF_UNSPECIFIED= 2 }; +#else +const my_bool binlog_row_based= FALSE; +const char *binlog_format_names[]= {"STATEMENT", NullS}; +enum binlog_format { BF_STMT= 0, BF_UNSPECIFIED= 2 }; +#endif + +TYPELIB binlog_format_typelib= + { array_elements(binlog_format_names)-1,"", + binlog_format_names, NULL }; +const char *opt_binlog_format= 0; +enum binlog_format opt_binlog_format_id= BF_UNSPECIFIED; + #ifdef HAVE_INITGROUPS static bool calling_initgroups= FALSE; /* Used in SIGSEGV handler. */ #endif @@ -392,7 +479,8 @@ uint tc_heuristic_recover= 0; uint volatile thread_count, thread_running; ulonglong thd_startup_options; ulong back_log, connect_timeout, concurrency, server_id; -ulong table_cache_size, thread_stack, what_to_log; +ulong table_cache_size, table_def_size; +ulong thread_stack, what_to_log; ulong query_buff_size, slow_launch_time, slave_open_temp_tables; ulong open_files_limit, max_binlog_size, max_relay_log_size; ulong slave_net_timeout, slave_trans_retries; @@ -423,7 +511,7 @@ char mysql_real_data_home[FN_REFLEN], language[FN_REFLEN], reg_ext[FN_EXTLEN], mysql_charsets_dir[FN_REFLEN], *opt_init_file, *opt_tc_log_file, def_ft_boolean_syntax[sizeof(ft_boolean_syntax)]; - +uint reg_ext_length; const key_map key_map_empty(0); key_map key_map_full(0); // Will be initialized later @@ -453,12 +541,10 @@ FILE *bootstrap_file; int bootstrap_error; FILE *stderror_file=0; -I_List<i_string_pair> replicate_rewrite_db; -I_List<i_string> replicate_do_db, replicate_ignore_db; -// allow the user to tell us which db to replicate and which to ignore -I_List<i_string> binlog_do_db, binlog_ignore_db; I_List<THD> threads; I_List<NAMED_LIST> key_caches; +Rpl_filter* rpl_filter; +Rpl_filter* binlog_filter; struct system_variables global_system_variables; struct system_variables max_system_variables; @@ -470,13 +556,10 @@ MY_BITMAP temp_pool; CHARSET_INFO *system_charset_info, *files_charset_info ; CHARSET_INFO *national_charset_info, *table_alias_charset; -SHOW_COMP_OPTION have_berkeley_db, have_innodb, have_isam, have_ndbcluster, - have_example_db, have_archive_db, have_csv_db; -SHOW_COMP_OPTION have_federated_db; +SHOW_COMP_OPTION have_row_based_replication; SHOW_COMP_OPTION have_raid, have_openssl, have_symlink, have_query_cache; SHOW_COMP_OPTION have_geometry, have_rtree_keys; SHOW_COMP_OPTION have_crypt, have_compress; -SHOW_COMP_OPTION have_blackhole_db; /* Thread specific variables */ @@ -496,6 +579,10 @@ rw_lock_t LOCK_grant, LOCK_sys_init_connect, LOCK_sys_init_slave; pthread_cond_t COND_refresh,COND_thread_count; pthread_t signal_thread; pthread_attr_t connection_attrib; +pthread_mutex_t LOCK_server_started; +pthread_cond_t COND_server_started; + +int mysqld_server_started= 0; File_parser_dummy_hook file_parser_dummy_hook; @@ -1060,16 +1147,20 @@ void clean_up(bool print_message) #endif query_cache_destroy(); table_cache_free(); + table_def_free(); hostname_cache_free(); item_user_lock_free(); lex_free(); /* Free some memory */ set_var_free(); free_charsets(); -#ifdef HAVE_DLOPEN + (void) ha_panic(HA_PANIC_CLOSE); /* close all tables and logs */ if (!opt_noacl) + { +#ifdef HAVE_DLOPEN udf_free(); #endif - (void) ha_panic(HA_PANIC_CLOSE); /* close all tables and logs */ + plugin_free(); + } if (tc_log) tc_log->close(); xid_cache_free(); @@ -1100,12 +1191,9 @@ void clean_up(bool print_message) free_max_user_conn(); #ifdef HAVE_REPLICATION end_slave_list(); - free_list(&replicate_do_db); - free_list(&replicate_ignore_db); - free_list(&binlog_do_db); - free_list(&binlog_ignore_db); - free_list(&replicate_rewrite_db); #endif + delete binlog_filter; + delete rpl_filter; #ifdef HAVE_OPENSSL if (ssl_acceptor_fd) my_free((gptr) ssl_acceptor_fd, MYF(MY_ALLOW_ZERO_PTR)); @@ -1362,7 +1450,7 @@ static void network_init(void) uint waited; uint this_wait; uint retry; - DBUG_ENTER("server_init"); + DBUG_ENTER("network_init"); LINT_INIT(ret); set_ports(); @@ -2482,7 +2570,7 @@ pthread_handler_t handle_shutdown(void *arg) static const char *load_default_groups[]= { -#ifdef HAVE_NDBCLUSTER_DB +#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE "mysql_cluster", #endif "mysqld","server", MYSQL_BASE_VERSION, 0, 0}; @@ -2588,6 +2676,18 @@ static int init_common_variables(const char *conf_file_name, int argc, strmake(pidfile_name, glob_hostname, sizeof(pidfile_name)-5); strmov(fn_ext(pidfile_name),".pid"); // Add proper extension + if (plugin_init()) + { + sql_print_error("Failed to init plugins."); + return 1; + } + + if (ha_register_builtin_plugins()) + { + sql_print_error("Failed to register built-in storage engines."); + return 1; + } + load_defaults(conf_file_name, groups, &argc, &argv); defaults_argv=argv; get_options(argc,argv); @@ -2602,7 +2702,7 @@ static int init_common_variables(const char *conf_file_name, int argc, { my_use_large_pages= 1; my_large_page_size= opt_large_page_size; -#ifdef HAVE_INNOBASE_DB +#ifdef WITH_INNOBASE_STORAGE_ENGINE innobase_use_large_pages= 1; innobase_large_page_size= opt_large_page_size; #endif @@ -2746,7 +2846,7 @@ static int init_thread_environment() { (void) pthread_mutex_init(&LOCK_mysql_create_db,MY_MUTEX_INIT_SLOW); (void) pthread_mutex_init(&LOCK_Acl,MY_MUTEX_INIT_SLOW); - (void) pthread_mutex_init(&LOCK_open,MY_MUTEX_INIT_FAST); + (void) pthread_mutex_init(&LOCK_open, NULL); (void) pthread_mutex_init(&LOCK_thread_count,MY_MUTEX_INIT_FAST); (void) pthread_mutex_init(&LOCK_mapped_file,MY_MUTEX_INIT_SLOW); (void) pthread_mutex_init(&LOCK_status,MY_MUTEX_INIT_FAST); @@ -2789,6 +2889,8 @@ static int init_thread_environment() (void) pthread_mutex_init(&LOCK_rpl_status, MY_MUTEX_INIT_FAST); (void) pthread_cond_init(&COND_rpl_status, NULL); #endif + (void) pthread_mutex_init(&LOCK_server_started, MY_MUTEX_INIT_FAST); + (void) pthread_cond_init(&COND_server_started,NULL); sp_cache_init(); /* Parameter for threads created for connections */ (void) pthread_attr_init(&connection_attrib); @@ -2906,7 +3008,11 @@ static void init_ssl() static int init_server_components() { DBUG_ENTER("init_server_components"); - if (table_cache_init() || hostname_cache_init()) + /* + We need to call each of these following functions to ensure that + all things are initialized so that unireg_abort() doesn't fail + */ + if (table_cache_init() | table_def_init() | hostname_cache_init()) unireg_abort(1); query_cache_result_size_limit(query_cache_limit); @@ -2978,8 +3084,44 @@ with --log-bin instead."); { sql_print_warning("You need to use --log-bin to make " "--log-slave-updates work."); - unireg_abort(1); + unireg_abort(1); + } + + if (!opt_bin_log && (opt_binlog_format_id != BF_UNSPECIFIED)) + { + sql_print_warning("You need to use --log-bin to make " + "--binlog-format work."); + unireg_abort(1); + } + if (opt_binlog_format_id == BF_UNSPECIFIED) + { + /* + We use statement-based by default, but could change this to be row-based + if this is a cluster build (i.e. have_ndbcluster is true)... + */ + opt_binlog_format_id= BF_STMT; + } +#ifdef HAVE_ROW_BASED_REPLICATION + if (opt_binlog_format_id == BF_ROW) + { + binlog_row_based= TRUE; + /* + Row-based binlogging turns on InnoDB unsafe locking, because the locks + are not needed when using row-based binlogging. In fact + innodb-locks-unsafe-for-binlog is unsafe only for stmt-based, it's + safe for row-based. + */ +#ifdef HAVE_INNOBASE_DB + innobase_locks_unsafe_for_binlog= TRUE; +#endif + /* Trust stored function creators because they can do no harm */ + trust_function_creators= 1; } +#endif + /* Check that we have not let the format to unspecified at this point */ + DBUG_ASSERT((uint)opt_binlog_format_id <= + array_elements(binlog_format_names)-1); + opt_binlog_format= binlog_format_names[opt_binlog_format_id]; if (opt_slow_log) mysql_slow_log.open_slow_log(opt_slow_logname); @@ -3065,17 +3207,15 @@ server."); /* Check that the default storage engine is actually available. */ - if (!ha_storage_engine_is_enabled((enum db_type) - global_system_variables.table_type)) + if (!ha_storage_engine_is_enabled(global_system_variables.table_type)) { if (!opt_bootstrap) { sql_print_error("Default storage engine (%s) is not available", - ha_get_storage_engine((enum db_type) - global_system_variables.table_type)); + global_system_variables.table_type->name); unireg_abort(1); } - global_system_variables.table_type= DB_TYPE_MYISAM; + global_system_variables.table_type= &myisam_hton; } tc_log= (total_ha_2pc > 1 ? (opt_bin_log ? @@ -3145,7 +3285,7 @@ server."); static void create_maintenance_thread() { if ( -#ifdef HAVE_BERKELEY_DB +#ifdef WITH_BERKELEY_STORAGE_ENGINE (have_berkeley_db == SHOW_OPTION_YES) || #endif (flush_time && flush_time != ~(ulong) 0L)) @@ -3258,6 +3398,15 @@ int main(int argc, char **argv) #endif { DEBUGGER_OFF; + + rpl_filter= new Rpl_filter; + binlog_filter= new Rpl_filter; + if (!rpl_filter || !binlog_filter) + { + sql_perror("Could not allocate replication and binlog filters"); + exit(1); + } + MY_INIT(argv[0]); // init my_sys library & pthreads #ifdef _CUSTOMSTARTUPCONFIG_ @@ -3339,9 +3488,7 @@ int main(int argc, char **argv) */ check_data_home(mysql_real_data_home); if (my_setwd(mysql_real_data_home,MYF(MY_WME))) - { unireg_abort(1); /* purecov: inspected */ - } mysql_data_home= mysql_data_home_buff; mysql_data_home[0]=FN_CURLIB; // all paths are relative from here mysql_data_home[1]=0; @@ -3356,7 +3503,6 @@ int main(int argc, char **argv) set_user(mysqld_user, user_info); } - if (opt_bin_log && !server_id) { server_id= !master_host ? 1 : 2; @@ -3378,7 +3524,7 @@ we force server id to 2, but this MySQL server will not act as a slave."); } if (init_server_components()) - exit(1); + unireg_abort(1); network_init(); @@ -3422,10 +3568,13 @@ we force server id to 2, but this MySQL server will not act as a slave."); if (!opt_noacl) (void) grant_init(); -#ifdef HAVE_DLOPEN if (!opt_noacl) + { + plugin_load(); +#ifdef HAVE_DLOPEN udf_init(); #endif + } if (opt_bootstrap) /* If running with bootstrap, do not start replication. */ opt_skip_slave_start= 1; /* @@ -3465,6 +3614,10 @@ we force server id to 2, but this MySQL server will not act as a slave."); mysqld_port, MYSQL_COMPILATION_COMMENT); + // Signal threads waiting for server to be started + mysqld_server_started= 1; + pthread_cond_signal(&COND_server_started); + #if defined(__NT__) || defined(HAVE_SMEM) handle_connections_methods(); #else @@ -3512,6 +3665,7 @@ we force server id to 2, but this MySQL server will not act as a slave."); CloseHandle(hEventShutdown); } #endif + clean_up(1); wait_for_signal_thread_to_end(); clean_up_mutexes(); my_end(opt_endinfo ? MY_CHECK_ERROR | MY_GIVE_INFO : 0); @@ -3546,8 +3700,8 @@ static char *add_quoted_string(char *to, const char *from, char *to_end) uint length= (uint) (to_end-to); if (!strchr(from, ' ')) - return strnmov(to, from, length); - return strxnmov(to, length, "\"", from, "\"", NullS); + return strmake(to, from, length-1); + return strxnmov(to, length-1, "\"", from, "\"", NullS); } @@ -3613,7 +3767,6 @@ default_service_handling(char **argv, int main(int argc, char **argv) { - /* When several instances are running on the same machine, we need to have an unique named hEventShudown through the @@ -4427,7 +4580,8 @@ enum options_mysqld OPT_BDB_HOME, OPT_BDB_LOG, OPT_BDB_TMP, OPT_BDB_SYNC, OPT_BDB_LOCK, OPT_BDB, - OPT_BDB_NO_RECOVER, OPT_BDB_SHARED, + OPT_BDB_NO_RECOVER, OPT_BDB_SHARED, + OPT_BDB_DATA_DIRECT, OPT_BDB_LOG_DIRECT, OPT_MASTER_HOST, OPT_MASTER_USER, OPT_MASTER_PASSWORD, OPT_MASTER_PORT, OPT_MASTER_INFO_FILE, OPT_MASTER_CONNECT_RETRY, @@ -4438,6 +4592,13 @@ enum options_mysqld OPT_SQL_BIN_UPDATE_SAME, OPT_REPLICATE_DO_DB, OPT_REPLICATE_IGNORE_DB, OPT_LOG_SLAVE_UPDATES, OPT_BINLOG_DO_DB, OPT_BINLOG_IGNORE_DB, + OPT_BINLOG_FORMAT, +#ifndef DBUG_OFF + OPT_BINLOG_SHOW_XID, +#endif +#ifdef HAVE_ROW_BASED_REPLICATION + OPT_BINLOG_ROWS_EVENT_MAX_SIZE, +#endif OPT_WANT_CORE, OPT_CONCURRENT_INSERT, OPT_MEMLOCK, OPT_MYISAM_RECOVER, OPT_REPLICATE_REWRITE_DB, OPT_SERVER_ID, @@ -4467,6 +4628,9 @@ enum options_mysqld OPT_NDB_FORCE_SEND, OPT_NDB_AUTOINCREMENT_PREFETCH_SZ, OPT_NDB_SHM, OPT_NDB_OPTIMIZED_NODE_SELECTION, OPT_NDB_CACHE_CHECK_TIME, OPT_NDB_MGMD, OPT_NDB_NODEID, + OPT_NDB_DISTRIBUTION, + OPT_NDB_INDEX_STAT_ENABLE, + OPT_NDB_INDEX_STAT_CACHE_ENTRIES, OPT_NDB_INDEX_STAT_UPDATE_FREQ, OPT_SKIP_SAFEMALLOC, OPT_TEMP_POOL, OPT_TX_ISOLATION, OPT_COMPLETION_TYPE, OPT_SKIP_STACK_TRACE, OPT_SKIP_SYMLINKS, @@ -4502,6 +4666,7 @@ enum options_mysqld OPT_MAX_ERROR_COUNT, OPT_MULTI_RANGE_COUNT, OPT_MYISAM_DATA_POINTER_SIZE, OPT_MYISAM_BLOCK_SIZE, OPT_MYISAM_MAX_EXTRA_SORT_FILE_SIZE, OPT_MYISAM_MAX_SORT_FILE_SIZE, OPT_MYISAM_SORT_BUFFER_SIZE, + OPT_MYISAM_USE_MMAP, OPT_MYISAM_STATS_METHOD, OPT_NET_BUFFER_LENGTH, OPT_NET_RETRY_COUNT, OPT_NET_READ_TIMEOUT, OPT_NET_WRITE_TIMEOUT, @@ -4513,7 +4678,7 @@ enum options_mysqld OPT_RELAY_LOG_PURGE, OPT_SLAVE_NET_TIMEOUT, OPT_SLAVE_COMPRESSED_PROTOCOL, OPT_SLOW_LAUNCH_TIME, OPT_SLAVE_TRANS_RETRIES, OPT_READONLY, OPT_DEBUGGING, - OPT_SORT_BUFFER, OPT_TABLE_CACHE, + OPT_SORT_BUFFER, OPT_TABLE_OPEN_CACHE, OPT_TABLE_DEF_CACHE, OPT_THREAD_CONCURRENCY, OPT_THREAD_CACHE_SIZE, OPT_TMP_TABLE_SIZE, OPT_THREAD_STACK, OPT_WAIT_TIMEOUT, OPT_MYISAM_REPAIR_THREADS, @@ -4539,8 +4704,10 @@ enum options_mysqld OPT_INNODB_CONCURRENCY_TICKETS, OPT_INNODB_THREAD_SLEEP_DELAY, OPT_BDB_CACHE_SIZE, + OPT_BDB_CACHE_PARTS, OPT_BDB_LOG_BUFFER_SIZE, OPT_BDB_MAX_LOCK, + OPT_BDB_REGION_SIZE, OPT_ERROR_LOG_FILE, OPT_DEFAULT_WEEK_FORMAT, OPT_RANGE_ALLOC_BLOCK_SIZE, OPT_ALLOW_SUSPICIOUS_UDFS, @@ -4554,6 +4721,7 @@ enum options_mysqld OPT_ENABLE_SHARED_MEMORY, OPT_SHARED_MEMORY_BASE_NAME, OPT_OLD_PASSWORDS, + OPT_OLD_ALTER_TABLE, OPT_EXPIRE_LOGS_DAYS, OPT_GROUP_CONCAT_MAX_LEN, OPT_DEFAULT_COLLATION, @@ -4577,6 +4745,7 @@ enum options_mysqld OPT_OLD_STYLE_USER_LIMITS, OPT_LOG_SLOW_ADMIN_STATEMENTS, OPT_TABLE_LOCK_WAIT_TIMEOUT, + OPT_PLUGIN_DIR, OPT_PORT_OPEN_TIMEOUT }; @@ -4625,12 +4794,18 @@ struct my_option my_long_options[] = Disable with --skip-bdb (will save memory).", (gptr*) &opt_bdb, (gptr*) &opt_bdb, 0, GET_BOOL, NO_ARG, OPT_BDB_DEFAULT, 0, 0, 0, 0, 0}, -#ifdef HAVE_BERKELEY_DB +#ifdef WITH_BERKELEY_STORAGE_ENGINE + {"bdb-data-direct", OPT_BDB_DATA_DIRECT, + "Turn off system buffering of BDB database files to avoid double caching.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"bdb-home", OPT_BDB_HOME, "Berkeley home directory.", (gptr*) &berkeley_home, (gptr*) &berkeley_home, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"bdb-lock-detect", OPT_BDB_LOCK, "Berkeley lock detect (DEFAULT, OLDEST, RANDOM or YOUNGEST, # sec).", 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"bdb-log-direct", OPT_BDB_LOG_DIRECT, + "Turn off system buffering of BDB log files to avoid double caching.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"bdb-logdir", OPT_BDB_LOG, "Berkeley DB log file directory.", (gptr*) &berkeley_logdir, (gptr*) &berkeley_logdir, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, @@ -4646,19 +4821,53 @@ Disable with --skip-bdb (will save memory).", {"bdb-tmpdir", OPT_BDB_TMP, "Berkeley DB tempfile name.", (gptr*) &berkeley_tmpdir, (gptr*) &berkeley_tmpdir, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, -#endif /* HAVE_BERKELEY_DB */ +#endif /* WITH_BERKELEY_STORAGE_ENGINE */ {"big-tables", OPT_BIG_TABLES, "Allow big result sets by saving all temporary sets on file (Solves most 'table full' errors).", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"bind-address", OPT_BIND_ADDRESS, "IP address to bind to.", (gptr*) &my_bind_addr_str, (gptr*) &my_bind_addr_str, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"binlog-format", OPT_BINLOG_FORMAT, +#ifdef HAVE_ROW_BASED_REPLICATION + "Tell the master the form of binary logging to use: either 'row' for " + "row-based binary logging (which automatically turns on " + "innodb_locks_unsafe_for_binlog as it is safe in this case), or " + "'statement' for statement-based logging. ", +#else + "Tell the master the form of binary logging to use: this release build " + "supports only statement-based binary logging, so only 'statement' is " + "a legal value; MySQL-Max release builds support row-based binary logging " + "in addition.", +#endif + 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0 }, {"binlog-do-db", OPT_BINLOG_DO_DB, "Tells the master it should log updates for the specified database, and exclude all others not explicitly mentioned.", 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"binlog-ignore-db", OPT_BINLOG_IGNORE_DB, "Tells the master that updates to the given database should not be logged tothe binary log.", 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, +#if !defined(DBUG_OFF) && !defined(MYSQL_CLIENT) + {"binlog-show-xid", OPT_BINLOG_SHOW_XID, + "Option used by mysql-test for debugging and testing: " + "do not display the XID in SHOW BINLOG EVENTS; " + "may be removed in future versions", + (gptr*) &Xid_log_event::show_xid, (gptr*) &Xid_log_event::show_xid, + 0, GET_BOOL, NO_ARG, 1, 0, 0, 0, 0, 0}, +#endif +#ifdef HAVE_ROW_BASED_REPLICATION + {"binlog-row-event-max-size", OPT_BINLOG_ROWS_EVENT_MAX_SIZE, + "The maximum size of a row-based binary log event in bytes. Rows will be " + "grouped into events smaller than this size if possible. " + "The value has to be a multiple of 256.", + (gptr*) &opt_binlog_rows_event_max_size, + (gptr*) &opt_binlog_rows_event_max_size, 0, + GET_ULONG, REQUIRED_ARG, + /* def_value */ 1024, /* min_value */ 256, /* max_value */ ULONG_MAX, + /* sub_size */ 0, /* block_size */ 256, + /* app_type */ 0 + }, +#endif {"bootstrap", OPT_BOOTSTRAP, "Used by mysql installation scripts.", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"character-set-client-handshake", OPT_CHARACTER_SET_CLIENT_HANDSHAKE, @@ -4782,7 +4991,7 @@ Disable with --skip-large-pages.", Disable with --skip-innodb (will save memory).", (gptr*) &opt_innodb, (gptr*) &opt_innodb, 0, GET_BOOL, NO_ARG, OPT_INNODB_DEFAULT, 0, 0, 0, 0, 0}, -#ifdef HAVE_INNOBASE_DB +#ifdef WITH_INNOBASE_STORAGE_ENGINE {"innodb_checksums", OPT_INNODB_CHECKSUMS, "Enable InnoDB checksums validation (enabled by default). \ Disable with --skip-innodb-checksums.", (gptr*) &innobase_use_checksums, (gptr*) &innobase_use_checksums, 0, GET_BOOL, NO_ARG, 1, 0, 0, 0, 0, 0}, @@ -4790,7 +4999,7 @@ Disable with --skip-innodb-checksums.", (gptr*) &innobase_use_checksums, {"innodb_data_file_path", OPT_INNODB_DATA_FILE_PATH, "Path to individual files and their sizes.", 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, -#ifdef HAVE_INNOBASE_DB +#ifdef WITH_INNOBASE_STORAGE_ENGINE {"innodb_data_home_dir", OPT_INNODB_DATA_HOME_DIR, "The common part for InnoDB table spaces.", (gptr*) &innobase_data_home_dir, (gptr*) &innobase_data_home_dir, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, @@ -4826,7 +5035,9 @@ Disable with --skip-innodb-doublewrite.", (gptr*) &innobase_use_doublewrite, (gptr*) &innobase_unix_file_flush_method, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"innodb_locks_unsafe_for_binlog", OPT_INNODB_LOCKS_UNSAFE_FOR_BINLOG, - "Force InnoDB not to use next-key locking. Instead use only row-level locking", + "Force InnoDB not to use next-key locking, to use only row-level locking." + " This is unsafe if you are using statement-based binary logging, and safe" + " if you are using row-based binary logging.", (gptr*) &innobase_locks_unsafe_for_binlog, (gptr*) &innobase_locks_unsafe_for_binlog, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, {"innodb_log_arch_dir", OPT_INNODB_LOG_ARCH_DIR, @@ -4861,7 +5072,7 @@ Disable with --skip-innodb-doublewrite.", (gptr*) &innobase_use_doublewrite, (gptr*) &global_system_variables.innodb_support_xa, (gptr*) &global_system_variables.innodb_support_xa, 0, GET_BOOL, OPT_ARG, 1, 0, 0, 0, 0, 0}, -#endif /* End HAVE_INNOBASE_DB */ +#endif /* End WITH_INNOBASE_STORAGE_ENGINE */ {"isam", OPT_ISAM, "Obsolete. ISAM storage engine is no longer supported.", (gptr*) &opt_isam, (gptr*) &opt_isam, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, @@ -4905,8 +5116,12 @@ Disable with --skip-innodb-doublewrite.", (gptr*) &innobase_use_doublewrite, {"log-bin-trust-function-creators", OPT_LOG_BIN_TRUST_FUNCTION_CREATORS, "If equal to 0 (the default), then when --log-bin is used, creation of " "a function is allowed only to users having the SUPER privilege and only " - "if this function may not break binary logging.", - (gptr*) &trust_function_creators, (gptr*) &trust_function_creators, 0, + "if this function may not break binary logging." +#ifdef HAVE_ROW_BASED_REPLICATION + " If using --binlog-format=row, the security issues do not exist and the " + "binary logging cannot break so this option is automatically set to 1." +#endif + ,(gptr*) &trust_function_creators, (gptr*) &trust_function_creators, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, {"log-error", OPT_ERROR_LOG_FILE, "Error log file.", (gptr*) &log_error_file_ptr, (gptr*) &log_error_file_ptr, 0, GET_STR, @@ -5032,7 +5247,7 @@ master-ssl", Disable with --skip-ndbcluster (will save memory).", (gptr*) &opt_ndbcluster, (gptr*) &opt_ndbcluster, 0, GET_BOOL, NO_ARG, OPT_NDBCLUSTER_DEFAULT, 0, 0, 0, 0, 0}, -#ifdef HAVE_NDBCLUSTER_DB +#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE {"ndb-connectstring", OPT_NDB_CONNECTSTRING, "Connect string for ndbcluster.", (gptr*) &opt_ndb_connectstring, @@ -5053,6 +5268,11 @@ Disable with --skip-ndbcluster (will save memory).", (gptr*) &global_system_variables.ndb_autoincrement_prefetch_sz, (gptr*) &global_system_variables.ndb_autoincrement_prefetch_sz, 0, GET_ULONG, REQUIRED_ARG, 32, 1, 256, 0, 0, 0}, + {"ndb-distribution", OPT_NDB_DISTRIBUTION, + "Default distribution for new tables in ndb", + (gptr*) &opt_ndb_distribution, + (gptr*) &opt_ndb_distribution, + 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"ndb-force-send", OPT_NDB_FORCE_SEND, "Force send of buffers to ndb immediately without waiting for " "other threads.", @@ -5089,6 +5309,23 @@ Disable with --skip-ndbcluster (will save memory).", "A dedicated thread is created to, at the given millisecons interval, invalidate the query cache if another MySQL server in the cluster has changed the data in the database.", (gptr*) &opt_ndb_cache_check_time, (gptr*) &opt_ndb_cache_check_time, 0, GET_ULONG, REQUIRED_ARG, 0, 0, LONG_TIMEOUT, 0, 1, 0}, + {"ndb-index-stat-enable", OPT_NDB_INDEX_STAT_ENABLE, + "Use ndb index statistics in query optimization.", + (gptr*) &global_system_variables.ndb_index_stat_enable, + (gptr*) &max_system_variables.ndb_index_stat_enable, + 0, GET_BOOL, OPT_ARG, 1, 0, 1, 0, 0, 0}, + {"ndb-index-stat-cache-entries", OPT_NDB_INDEX_STAT_CACHE_ENTRIES, + "Number of start/end keys to store in statistics memory cache." + " Zero means no cache and forces query of db nodes always.", + (gptr*) &global_system_variables.ndb_index_stat_cache_entries, + (gptr*) &max_system_variables.ndb_index_stat_cache_entries, + 0, GET_ULONG, OPT_ARG, 32, 0, ~0L, 0, 0, 0}, + {"ndb-index-stat-update-freq", OPT_NDB_INDEX_STAT_UPDATE_FREQ, + "How often, in the long run, to query db nodes instead of statistics cache." + " For example 20 means every 20th time.", + (gptr*) &global_system_variables.ndb_index_stat_update_freq, + (gptr*) &max_system_variables.ndb_index_stat_update_freq, + 0, GET_ULONG, OPT_ARG, 20, 0, ~0L, 0, 0, 0}, #endif {"new", 'n', "Use very new possible 'unsafe' functions.", (gptr*) &global_system_variables.new_mode, @@ -5099,6 +5336,11 @@ Disable with --skip-ndbcluster (will save memory).", (gptr*) &opt_no_mix_types, (gptr*) &opt_no_mix_types, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, #endif + {"old-alter-table", OPT_OLD_ALTER_TABLE, + "Use old, non-optimized alter table.", + (gptr*) &global_system_variables.old_alter_table, + (gptr*) &max_system_variables.old_alter_table, 0, GET_BOOL, NO_ARG, + 0, 0, 0, 0, 0, 0}, {"old-passwords", OPT_OLD_PASSWORDS, "Use old password encryption method (needed for 4.0 and older clients).", (gptr*) &global_system_variables.old_passwords, (gptr*) &max_system_variables.old_passwords, 0, GET_BOOL, NO_ARG, @@ -5340,11 +5582,15 @@ log and this option does nothing anymore.", "The number of outstanding connection requests MySQL can have. This comes into play when the main MySQL thread gets very many connection requests in a very short time.", (gptr*) &back_log, (gptr*) &back_log, 0, GET_ULONG, REQUIRED_ARG, 50, 1, 65535, 0, 1, 0 }, -#ifdef HAVE_BERKELEY_DB +#ifdef WITH_BERKELEY_STORAGE_ENGINE + { "bdb_cache_parts", OPT_BDB_CACHE_PARTS, + "Number of parts to use for BDB cache.", + (gptr*) &berkeley_cache_parts, (gptr*) &berkeley_cache_parts, 0, GET_ULONG, + REQUIRED_ARG, 1, 1, 1024, 0, 1, 0}, { "bdb_cache_size", OPT_BDB_CACHE_SIZE, "The buffer that is allocated to cache index and rows for BDB tables.", - (gptr*) &berkeley_cache_size, (gptr*) &berkeley_cache_size, 0, GET_ULONG, - REQUIRED_ARG, KEY_CACHE_SIZE, 20*1024, (long) ~0, 0, IO_SIZE, 0}, + (gptr*) &berkeley_cache_size, (gptr*) &berkeley_cache_size, 0, GET_ULL, + REQUIRED_ARG, KEY_CACHE_SIZE, 20*1024, (ulonglong) ~0, 0, IO_SIZE, 0}, /* QQ: The following should be removed soon! (bdb_max_lock preferred) */ {"bdb_lock_max", OPT_BDB_MAX_LOCK, "Synonym for bdb_max_lock.", (gptr*) &berkeley_max_lock, (gptr*) &berkeley_max_lock, 0, GET_ULONG, @@ -5357,7 +5603,11 @@ log and this option does nothing anymore.", "The maximum number of locks you can have active on a BDB table.", (gptr*) &berkeley_max_lock, (gptr*) &berkeley_max_lock, 0, GET_ULONG, REQUIRED_ARG, 10000, 0, (long) ~0, 0, 1, 0}, -#endif /* HAVE_BERKELEY_DB */ + {"bdb_region_size", OPT_BDB_REGION_SIZE, + "The size of the underlying logging area of the Berkeley DB environment.", + (gptr*) &berkeley_region_size, (gptr*) &berkeley_region_size, 0, GET_ULONG, + OPT_ARG, 60*1024L, 60*1024L, (long) ~0, 0, 1, 0}, +#endif /* WITH_BERKELEY_STORAGE_ENGINE */ {"binlog_cache_size", OPT_BINLOG_CACHE_SIZE, "The size of the cache to hold the SQL statements for the binary log during a transaction. If you often use big, multi-statement transactions you can increase this to get more performance.", (gptr*) &binlog_cache_size, (gptr*) &binlog_cache_size, 0, GET_ULONG, @@ -5433,7 +5683,7 @@ log and this option does nothing anymore.", (gptr*) &global_system_variables.group_concat_max_len, (gptr*) &max_system_variables.group_concat_max_len, 0, GET_ULONG, REQUIRED_ARG, 1024, 4, (long) ~0, 0, 1, 0}, -#ifdef HAVE_INNOBASE_DB +#ifdef WITH_INNOBASE_STORAGE_ENGINE {"innodb_additional_mem_pool_size", OPT_INNODB_ADDITIONAL_MEM_POOL_SIZE, "Size of a memory pool InnoDB uses to store data dictionary information and other internal data structures.", (gptr*) &innobase_additional_mem_pool_size, @@ -5512,7 +5762,7 @@ log and this option does nothing anymore.", (gptr*) &srv_thread_sleep_delay, (gptr*) &srv_thread_sleep_delay, 0, GET_LONG, REQUIRED_ARG, 10000L, 0L, ~0L, 0, 1L, 0}, -#endif /* HAVE_INNOBASE_DB */ +#endif /* WITH_INNOBASE_STORAGE_ENGINE */ {"interactive_timeout", OPT_INTERACTIVE_TIMEOUT, "The number of seconds the server waits for activity on an interactive connection before closing it.", (gptr*) &global_system_variables.net_interactive_timeout, @@ -5677,6 +5927,11 @@ The minimum value for this variable is 4096.", (gptr*) &global_system_variables.myisam_sort_buff_size, (gptr*) &max_system_variables.myisam_sort_buff_size, 0, GET_ULONG, REQUIRED_ARG, 8192*1024, 4, ~0L, 0, 1, 0}, + {"myisam_use_mmap", OPT_MYISAM_USE_MMAP, + "Use memory mapping for reading and writing MyISAM tables", + (gptr*) &opt_myisam_use_mmap, + (gptr*) &opt_myisam_use_mmap, 0, GET_BOOL, NO_ARG, 0, + 0, 0, 0, 0, 0}, {"myisam_stats_method", OPT_MYISAM_STATS_METHOD, "Specifies how MyISAM index statistics collection code should threat NULLs. " "Possible values of name are \"nulls_unequal\" (default behavior for 4.1/5.0), " @@ -5717,6 +5972,10 @@ The minimum value for this variable is 4096.", (gptr*) &global_system_variables.optimizer_search_depth, (gptr*) &max_system_variables.optimizer_search_depth, 0, GET_ULONG, OPT_ARG, MAX_TABLES+1, 0, MAX_TABLES+2, 0, 1, 0}, + {"plugin_dir", OPT_PLUGIN_DIR, + "Directory for plugins.", + (gptr*) &opt_plugin_dir_ptr, (gptr*) &opt_plugin_dir_ptr, 0, + GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"preload_buffer_size", OPT_PRELOAD_BUFFER_SIZE, "The size of the buffer that is allocated when preloading indexes", (gptr*) &global_system_variables.preload_buff_size, @@ -5833,12 +6092,12 @@ The minimum value for this variable is 4096.", (gptr*) &max_system_variables.sortbuff_size, 0, GET_ULONG, REQUIRED_ARG, MAX_SORT_MEMORY, MIN_SORT_MEMORY+MALLOC_OVERHEAD*2, ~0L, MALLOC_OVERHEAD, 1, 0}, -#ifdef HAVE_BERKELEY_DB +#ifdef WITH_BERKELEY_STORAGE_ENGINE {"sync-bdb-logs", OPT_BDB_SYNC, "Synchronously flush Berkeley DB logs. Enabled by default", (gptr*) &opt_sync_bdb_logs, (gptr*) &opt_sync_bdb_logs, 0, GET_BOOL, NO_ARG, 1, 0, 0, 0, 0, 0}, -#endif /* HAVE_BERKELEY_DB */ +#endif /* WITH_BERKELEY_STORAGE_ENGINE */ {"sync-binlog", OPT_SYNC_BINLOG, "Synchronously flush binary log to disk after every #th event. " "Use 0 (default) to disable synchronous flushing.", @@ -5864,13 +6123,21 @@ The minimum value for this variable is 4096.", (gptr*) &global_system_variables.sync_replication_timeout, 0, GET_ULONG, REQUIRED_ARG, 10, 0, ~0L, 0, 1, 0}, #endif /* HAVE_REPLICATION */ - {"table_cache", OPT_TABLE_CACHE, - "The number of open tables for all threads.", (gptr*) &table_cache_size, - (gptr*) &table_cache_size, 0, GET_ULONG, REQUIRED_ARG, 64, 1, 512*1024L, - 0, 1, 0}, - {"table_lock_wait_timeout", OPT_TABLE_LOCK_WAIT_TIMEOUT, "Timeout in " - "seconds to wait for a table level lock before returning an error. Used" - " only if the connection has active cursors.", + {"table_cache", OPT_TABLE_OPEN_CACHE, + "Deprecated; use --table_open_cache instead.", + (gptr*) &table_cache_size, (gptr*) &table_cache_size, 0, GET_ULONG, + REQUIRED_ARG, 64, 1, 512*1024L, 0, 1, 0}, + {"table_definition_cache", OPT_TABLE_DEF_CACHE, + "The number of cached table definitions.", + (gptr*) &table_def_size, (gptr*) &table_def_size, + 0, GET_ULONG, REQUIRED_ARG, 128, 1, 512*1024L, 0, 1, 0}, + {"table_open_cache", OPT_TABLE_OPEN_CACHE, + "The number of cached open tables.", + (gptr*) &table_cache_size, (gptr*) &table_cache_size, + 0, GET_ULONG, REQUIRED_ARG, 64, 1, 512*1024L, 0, 1, 0}, + {"table_lock_wait_timeout", OPT_TABLE_LOCK_WAIT_TIMEOUT, + "Timeout in seconds to wait for a table level lock before returning an " + "error. Used only if the connection has active cursors.", (gptr*) &table_lock_wait_timeout, (gptr*) &table_lock_wait_timeout, 0, GET_ULONG, REQUIRED_ARG, 50, 1, 1024 * 1024 * 1024, 0, 1, 0}, {"thread_cache_size", OPT_THREAD_CACHE_SIZE, @@ -5990,16 +6257,17 @@ struct show_var_st status_vars[]= { {"Com_show_create_db", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_CREATE_DB]), SHOW_LONG_STATUS}, {"Com_show_create_table", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_CREATE]), SHOW_LONG_STATUS}, {"Com_show_databases", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_DATABASES]), SHOW_LONG_STATUS}, + {"Com_show_engine_logs", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_ENGINE_LOGS]), SHOW_LONG_STATUS}, + {"Com_show_engine_mutex", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_ENGINE_MUTEX]), SHOW_LONG_STATUS}, + {"Com_show_engine_status", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_ENGINE_STATUS]), SHOW_LONG_STATUS}, {"Com_show_errors", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_ERRORS]), SHOW_LONG_STATUS}, {"Com_show_fields", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_FIELDS]), SHOW_LONG_STATUS}, {"Com_show_grants", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_GRANTS]), SHOW_LONG_STATUS}, - {"Com_show_innodb_status", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_INNODB_STATUS]), SHOW_LONG_STATUS}, {"Com_show_keys", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_KEYS]), SHOW_LONG_STATUS}, - {"Com_show_logs", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_LOGS]), SHOW_LONG_STATUS}, {"Com_show_master_status", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_MASTER_STAT]), SHOW_LONG_STATUS}, - {"Com_show_ndb_status", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_NDBCLUSTER_STATUS]), SHOW_LONG_STATUS}, {"Com_show_new_master", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_NEW_MASTER]), SHOW_LONG_STATUS}, {"Com_show_open_tables", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_OPEN_TABLES]), SHOW_LONG_STATUS}, + {"Com_show_plugins", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_PLUGINS]), SHOW_LONG_STATUS}, {"Com_show_privileges", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_PRIVILEGES]), SHOW_LONG_STATUS}, {"Com_show_processlist", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_PROCESSLIST]), SHOW_LONG_STATUS}, {"Com_show_slave_hosts", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_SLAVE_HOSTS]), SHOW_LONG_STATUS}, @@ -6052,9 +6320,9 @@ struct show_var_st status_vars[]= { {"Handler_savepoint_rollback",(char*) offsetof(STATUS_VAR, ha_savepoint_rollback_count), SHOW_LONG_STATUS}, {"Handler_update", (char*) offsetof(STATUS_VAR, ha_update_count), SHOW_LONG_STATUS}, {"Handler_write", (char*) offsetof(STATUS_VAR, ha_write_count), SHOW_LONG_STATUS}, -#ifdef HAVE_INNOBASE_DB +#ifdef WITH_INNOBASE_STORAGE_ENGINE {"Innodb_", (char*) &innodb_status_variables, SHOW_VARS}, -#endif /*HAVE_INNOBASE_DB*/ +#endif /* WITH_INNOBASE_STORAGE_ENGINE */ {"Key_blocks_not_flushed", (char*) &dflt_key_cache_var.global_blocks_changed, SHOW_KEY_CACHE_LONG}, {"Key_blocks_unused", (char*) &dflt_key_cache_var.blocks_unused, SHOW_KEY_CACHE_CONST_LONG}, {"Key_blocks_used", (char*) &dflt_key_cache_var.blocks_used, SHOW_KEY_CACHE_CONST_LONG}, @@ -6064,13 +6332,14 @@ struct show_var_st status_vars[]= { {"Key_writes", (char*) &dflt_key_cache_var.global_cache_write, SHOW_KEY_CACHE_LONGLONG}, {"Last_query_cost", (char*) offsetof(STATUS_VAR, last_query_cost), SHOW_DOUBLE_STATUS}, {"Max_used_connections", (char*) &max_used_connections, SHOW_LONG}, -#ifdef HAVE_NDBCLUSTER_DB +#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE {"Ndb_", (char*) &ndb_status_variables, SHOW_VARS}, -#endif /*HAVE_NDBCLUSTER_DB*/ +#endif /* WITH_NDBCLUSTER_STORAGE_ENGINE */ {"Not_flushed_delayed_rows", (char*) &delayed_rows_in_use, SHOW_LONG_CONST}, {"Open_files", (char*) &my_file_opened, SHOW_LONG_CONST}, {"Open_streams", (char*) &my_stream_opened, SHOW_LONG_CONST}, - {"Open_tables", (char*) 0, SHOW_OPENTABLES}, + {"Open_table_definitions", (char*) 0, SHOW_TABLE_DEFINITIONS}, + {"Open_tables", (char*) 0, SHOW_OPEN_TABLES}, {"Opened_tables", (char*) offsetof(STATUS_VAR, opened_tables), SHOW_LONG_STATUS}, #ifdef HAVE_QUERY_CACHE {"Qcache_free_blocks", (char*) &query_cache.free_memory_blocks, SHOW_LONG_CONST}, @@ -6284,13 +6553,6 @@ static void mysql_init_variables(void) exit(1); multi_keycache_init(); /* set key_cache_hash.default_value = dflt_key_cache */ - /* Initialize structures that is used when processing options */ - replicate_rewrite_db.empty(); - replicate_do_db.empty(); - replicate_ignore_db.empty(); - binlog_do_db.empty(); - binlog_ignore_db.empty(); - /* Set directory paths */ strmake(language, LANGUAGE, sizeof(language)-1); strmake(mysql_real_data_home, get_relative_path(DATADIR), @@ -6316,13 +6578,14 @@ static void mysql_init_variables(void) /* Set default values for some option variables */ - global_system_variables.table_type= DB_TYPE_MYISAM; + global_system_variables.table_type= &myisam_hton; global_system_variables.tx_isolation= ISO_REPEATABLE_READ; global_system_variables.select_limit= (ulonglong) HA_POS_ERROR; max_system_variables.select_limit= (ulonglong) HA_POS_ERROR; global_system_variables.max_join_size= (ulonglong) HA_POS_ERROR; max_system_variables.max_join_size= (ulonglong) HA_POS_ERROR; global_system_variables.old_passwords= 0; + global_system_variables.old_alter_table= 0; /* Default behavior for 4.1 and 5.0 is to treat NULL values as unequal @@ -6336,44 +6599,19 @@ static void mysql_init_variables(void) "d:t:i:o,/tmp/mysqld.trace"); #endif opt_error_log= IF_WIN(1,0); -#ifdef HAVE_BERKELEY_DB - have_berkeley_db= SHOW_OPTION_YES; -#else - have_berkeley_db= SHOW_OPTION_NO; -#endif -#ifdef HAVE_INNOBASE_DB - have_innodb=SHOW_OPTION_YES; -#else - have_innodb=SHOW_OPTION_NO; -#endif - have_isam=SHOW_OPTION_NO; -#ifdef HAVE_EXAMPLE_DB - have_example_db= SHOW_OPTION_YES; +#ifdef HAVE_ROW_BASED_REPLICATION + have_row_based_replication= SHOW_OPTION_YES; #else - have_example_db= SHOW_OPTION_NO; + have_row_based_replication= SHOW_OPTION_NO; #endif -#if defined(HAVE_ARCHIVE_DB) - have_archive_db= SHOW_OPTION_YES; -#else - have_archive_db= SHOW_OPTION_NO; -#endif -#ifdef HAVE_BLACKHOLE_DB - have_blackhole_db= SHOW_OPTION_YES; -#else - have_blackhole_db= SHOW_OPTION_NO; -#endif -#ifdef HAVE_FEDERATED_DB - have_federated_db= SHOW_OPTION_YES; -#else - have_federated_db= SHOW_OPTION_NO; -#endif -#ifdef HAVE_CSV_DB - have_csv_db= SHOW_OPTION_YES; -#else - have_csv_db= SHOW_OPTION_NO; -#endif -#ifdef HAVE_NDBCLUSTER_DB +#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE have_ndbcluster=SHOW_OPTION_DISABLED; + global_system_variables.ndb_index_stat_enable=TRUE; + max_system_variables.ndb_index_stat_enable=TRUE; + global_system_variables.ndb_index_stat_cache_entries=32; + max_system_variables.ndb_index_stat_cache_entries=~0L; + global_system_variables.ndb_index_stat_update_freq=20; + max_system_variables.ndb_index_stat_update_freq=~0L; #else have_ndbcluster=SHOW_OPTION_NO; #endif @@ -6542,14 +6780,12 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)), } case (int)OPT_REPLICATE_IGNORE_DB: { - i_string *db = new i_string(argument); - replicate_ignore_db.push_back(db); + rpl_filter->add_ignore_db(argument); break; } case (int)OPT_REPLICATE_DO_DB: { - i_string *db = new i_string(argument); - replicate_do_db.push_back(db); + rpl_filter->add_do_db(argument); break; } case (int)OPT_REPLICATE_REWRITE_DB: @@ -6582,71 +6818,76 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)), exit(1); } - i_string_pair *db_pair = new i_string_pair(key, val); - replicate_rewrite_db.push_back(db_pair); + rpl_filter->add_db_rewrite(key, val); break; } case (int)OPT_BINLOG_IGNORE_DB: { - i_string *db = new i_string(argument); - binlog_ignore_db.push_back(db); + binlog_filter->add_ignore_db(argument); + break; + } + case OPT_BINLOG_FORMAT: + { + int id; + if ((id= find_type(argument, &binlog_format_typelib, 2)) <= 0) + { +#ifdef HAVE_ROW_BASED_REPLICATION + fprintf(stderr, + "Unknown binary log format: '%s' " + "(should be '%s' or '%s')\n", + argument, + binlog_format_names[BF_STMT], + binlog_format_names[BF_ROW]); +#else + fprintf(stderr, + "Unknown binary log format: '%s' (only legal value is '%s')\n", + argument, binlog_format_names[BF_STMT]); +#endif + exit(1); + } + opt_binlog_format_id= (enum binlog_format)(id-1); break; } case (int)OPT_BINLOG_DO_DB: { - i_string *db = new i_string(argument); - binlog_do_db.push_back(db); + binlog_filter->add_do_db(argument); break; } case (int)OPT_REPLICATE_DO_TABLE: { - if (!do_table_inited) - init_table_rule_hash(&replicate_do_table, &do_table_inited); - if (add_table_rule(&replicate_do_table, argument)) + if (rpl_filter->add_do_table(argument)) { fprintf(stderr, "Could not add do table rule '%s'!\n", argument); exit(1); } - table_rules_on = 1; break; } case (int)OPT_REPLICATE_WILD_DO_TABLE: { - if (!wild_do_table_inited) - init_table_rule_array(&replicate_wild_do_table, - &wild_do_table_inited); - if (add_wild_table_rule(&replicate_wild_do_table, argument)) + if (rpl_filter->add_wild_do_table(argument)) { fprintf(stderr, "Could not add do table rule '%s'!\n", argument); exit(1); } - table_rules_on = 1; break; } case (int)OPT_REPLICATE_WILD_IGNORE_TABLE: { - if (!wild_ignore_table_inited) - init_table_rule_array(&replicate_wild_ignore_table, - &wild_ignore_table_inited); - if (add_wild_table_rule(&replicate_wild_ignore_table, argument)) + if (rpl_filter->add_wild_ignore_table(argument)) { fprintf(stderr, "Could not add ignore table rule '%s'!\n", argument); exit(1); } - table_rules_on = 1; break; } case (int)OPT_REPLICATE_IGNORE_TABLE: { - if (!ignore_table_inited) - init_table_rule_hash(&replicate_ignore_table, &ignore_table_inited); - if (add_table_rule(&replicate_ignore_table, argument)) + if (rpl_filter->add_ignore_table(argument)) { fprintf(stderr, "Could not add ignore table rule '%s'!\n", argument); exit(1); } - table_rules_on = 1; break; } #endif /* HAVE_REPLICATION */ @@ -6757,9 +6998,9 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)), break; case OPT_STORAGE_ENGINE: { - if ((enum db_type)((global_system_variables.table_type= - ha_resolve_by_name(argument, strlen(argument)))) == - DB_TYPE_UNKNOWN) + LEX_STRING name= { argument, strlen(argument) }; + if ((global_system_variables.table_type= + ha_resolve_by_name(current_thd, &name)) == NULL) { fprintf(stderr,"Unknown/unsupported table type: %s\n",argument); exit(1); @@ -6804,19 +7045,25 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)), global_system_variables.tx_isolation= (type-1); break; } -#ifdef HAVE_BERKELEY_DB +#ifdef WITH_BERKELEY_STORAGE_ENGINE case OPT_BDB_NOSYNC: /* Deprecated option */ opt_sync_bdb_logs= 0; /* Fall through */ case OPT_BDB_SYNC: if (!opt_sync_bdb_logs) - berkeley_env_flags|= DB_TXN_NOSYNC; + berkeley_env_flags|= bdb_DB_TXN_NOSYNC; else - berkeley_env_flags&= ~DB_TXN_NOSYNC; + berkeley_env_flags&= ~bdb_DB_TXN_NOSYNC; + break; + case OPT_BDB_LOG_DIRECT: + berkeley_env_flags|= bdb_DB_DIRECT_DB; + break; + case OPT_BDB_DATA_DIRECT: + berkeley_env_flags|= bdb_DB_DIRECT_LOG; break; case OPT_BDB_NO_RECOVER: - berkeley_init_flags&= ~(DB_RECOVER); + berkeley_init_flags&= ~(bdb_DB_RECOVER); break; case OPT_BDB_LOCK: { @@ -6840,12 +7087,12 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)), break; } case OPT_BDB_SHARED: - berkeley_init_flags&= ~(DB_PRIVATE); + berkeley_init_flags&= ~(bdb_DB_PRIVATE); berkeley_shared_data= 1; break; -#endif /* HAVE_BERKELEY_DB */ +#endif /* WITH_BERKELEY_STORAGE_ENGINE */ case OPT_BDB: -#ifdef HAVE_BERKELEY_DB +#ifdef WITH_BERKELEY_STORAGE_ENGINE if (opt_bdb) have_berkeley_db= SHOW_OPTION_YES; else @@ -6853,14 +7100,14 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)), #endif break; case OPT_NDBCLUSTER: -#ifdef HAVE_NDBCLUSTER_DB +#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE if (opt_ndbcluster) have_ndbcluster= SHOW_OPTION_YES; else have_ndbcluster= SHOW_OPTION_DISABLED; #endif break; -#ifdef HAVE_NDBCLUSTER_DB +#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE case OPT_NDB_MGMD: case OPT_NDB_NODEID: { @@ -6884,9 +7131,23 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)), opt_ndb_constrbuf[opt_ndb_constrbuf_len]= 0; opt_ndbcluster_connectstring= opt_ndb_constrbuf; break; + case OPT_NDB_DISTRIBUTION: + int id; + if ((id= find_type(argument, &ndb_distribution_typelib, 2)) <= 0) + { + fprintf(stderr, + "Unknown ndb distribution type: '%s' " + "(should be '%s' or '%s')\n", + argument, + ndb_distribution_names[ND_KEYHASH], + ndb_distribution_names[ND_LINHASH]); + exit(1); + } + opt_ndb_distribution_id= (enum ndb_distribution)(id-1); + break; #endif case OPT_INNODB: -#ifdef HAVE_INNOBASE_DB +#ifdef WITH_INNOBASE_STORAGE_ENGINE if (opt_innodb) have_innodb= SHOW_OPTION_YES; else @@ -6894,15 +7155,15 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)), #endif break; case OPT_INNODB_DATA_FILE_PATH: -#ifdef HAVE_INNOBASE_DB +#ifdef WITH_INNOBASE_STORAGE_ENGINE innobase_data_file_path= argument; #endif break; -#ifdef HAVE_INNOBASE_DB +#ifdef WITH_INNOBASE_STORAGE_ENGINE case OPT_INNODB_LOG_ARCHIVE: innobase_log_archive= argument ? test(atoi(argument)) : 1; break; -#endif /* HAVE_INNOBASE_DB */ +#endif /* WITH_INNOBASE_STORAGE_ENGINE */ case OPT_MYISAM_RECOVER: { if (!argument || !argument[0]) @@ -7050,19 +7311,19 @@ static void get_options(int argc,char **argv) get_one_option))) exit(ho_error); -#ifndef HAVE_NDBCLUSTER_DB +#ifndef WITH_NDBCLUSTER_STORAGE_ENGINE if (opt_ndbcluster) sql_print_warning("this binary does not contain NDBCLUSTER storage engine"); #endif -#ifndef HAVE_INNOBASE_DB +#ifndef WITH_INNOBASE_STORAGE_ENGINE if (opt_innodb) sql_print_warning("this binary does not contain INNODB storage engine"); #endif -#ifndef HAVE_ISAM +#ifndef WITH_ISAM_STORAGE_ENGINE if (opt_isam) sql_print_warning("this binary does not contain ISAM storage engine"); #endif -#ifndef HAVE_BERKELEY_DB +#ifndef WITH_BERKELEY_STORAGE_ENGINE if (opt_bdb) sql_print_warning("this binary does not contain BDB storage engine"); #endif @@ -7135,6 +7396,7 @@ static void get_options(int argc,char **argv) init_global_datetime_format(MYSQL_TIMESTAMP_DATETIME, &global_system_variables.datetime_format)) exit(1); + } @@ -7193,7 +7455,7 @@ fn_format_relative_to_data_home(my_string to, const char *name, dir=tmp_path; } return !fn_format(to, name, dir, extension, - MY_REPLACE_EXT | MY_UNPACK_FILENAME | MY_SAFE_PATH); + MY_APPEND_EXT | MY_UNPACK_FILENAME | MY_SAFE_PATH); } @@ -7215,6 +7477,9 @@ static void fix_paths(void) (void) my_load_path(mysql_home,mysql_home,""); // Resolve current dir (void) my_load_path(mysql_real_data_home,mysql_real_data_home,mysql_home); (void) my_load_path(pidfile_name,pidfile_name,mysql_real_data_home); + (void) my_load_path(opt_plugin_dir, opt_plugin_dir_ptr ? opt_plugin_dir_ptr : + get_relative_path(LIBDIR), mysql_home); + opt_plugin_dir_ptr= opt_plugin_dir; char *sharedir=get_relative_path(SHAREDIR); if (test_if_hard_path(sharedir)) @@ -7371,6 +7636,74 @@ static void create_pid_file() /***************************************************************************** + Instantiate have_xyx for missing storage engines +*****************************************************************************/ +#undef have_isam +#undef have_berkeley_db +#undef have_innodb +#undef have_ndbcluster +#undef have_example_db +#undef have_archive_db +#undef have_csv_db +#undef have_federated_db +#undef have_partition_db +#undef have_blackhole_db + +SHOW_COMP_OPTION have_berkeley_db= SHOW_OPTION_NO; +SHOW_COMP_OPTION have_innodb= SHOW_OPTION_NO; +SHOW_COMP_OPTION have_isam= SHOW_OPTION_NO; +SHOW_COMP_OPTION have_ndbcluster= SHOW_OPTION_NO; +SHOW_COMP_OPTION have_example_db= SHOW_OPTION_NO; +SHOW_COMP_OPTION have_archive_db= SHOW_OPTION_NO; +SHOW_COMP_OPTION have_csv_db= SHOW_OPTION_NO; +SHOW_COMP_OPTION have_federated_db= SHOW_OPTION_NO; +SHOW_COMP_OPTION have_partition_db= SHOW_OPTION_NO; +SHOW_COMP_OPTION have_blackhole_db= SHOW_OPTION_NO; + +#ifndef WITH_BERKELEY_STORAGE_ENGINE +bool berkeley_shared_data; +ulong berkeley_max_lock, berkeley_log_buffer_size; +ulonglong berkeley_cache_size; +ulong berkeley_region_size, berkeley_cache_parts; +char *berkeley_home, *berkeley_tmpdir, *berkeley_logdir; +#endif + +#ifndef WITH_INNOBASE_STORAGE_ENGINE +uint innobase_flush_log_at_trx_commit; +ulong innobase_fast_shutdown; +long innobase_mirrored_log_groups, innobase_log_files_in_group; +longlong innobase_log_file_size; +long innobase_log_buffer_size; +longlong innobase_buffer_pool_size; +long innobase_additional_mem_pool_size; +long innobase_buffer_pool_awe_mem_mb; +long innobase_file_io_threads, innobase_lock_wait_timeout; +long innobase_force_recovery; +long innobase_open_files; +char *innobase_data_home_dir, *innobase_data_file_path; +char *innobase_log_group_home_dir, *innobase_log_arch_dir; +char *innobase_unix_file_flush_method; +my_bool innobase_log_archive, + innobase_use_doublewrite, + innobase_use_checksums, + innobase_file_per_table, + innobase_locks_unsafe_for_binlog; + +ulong srv_max_buf_pool_modified_pct; +ulong srv_max_purge_lag; +ulong srv_auto_extend_increment; +ulong srv_n_spin_wait_rounds; +ulong srv_n_free_tickets_to_enter; +ulong srv_thread_sleep_delay; +ulong srv_thread_concurrency; +ulong srv_commit_concurrency; +#endif + +#ifndef WITH_NDBCLUSTER_STORAGE_ENGINE +ulong ndb_cache_check_time; +#endif + +/***************************************************************************** Instantiate templates *****************************************************************************/ @@ -7384,3 +7717,5 @@ template class I_List<NAMED_LIST>; template class I_List<Statement>; template class I_List_iterator<Statement>; #endif + + diff --git a/sql/opt_range.cc b/sql/opt_range.cc index 7c5274eb5eb..7dd694f3411 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -24,16 +24,42 @@ */ /* - Classes in this file are used in the following way: - 1. For a selection condition a tree of SEL_IMERGE/SEL_TREE/SEL_ARG objects - is created. #of rows in table and index statistics are ignored at this - step. - 2. Created SEL_TREE and index stats data are used to construct a - TABLE_READ_PLAN-derived object (TRP_*). Several 'candidate' table read - plans may be created. - 3. The least expensive table read plan is used to create a tree of - QUICK_SELECT_I-derived objects which are later used for row retrieval. - QUICK_RANGEs are also created in this step. + This file contains: + + RangeAnalysisModule + A module that accepts a condition, index (or partitioning) description, + and builds lists of intervals (in index/partitioning space), such that + all possible records that match the condition are contained within the + intervals. + The entry point for the range analysis module is get_mm_tree() function. + + The lists are returned in form of complicated structure of interlinked + SEL_TREE/SEL_IMERGE/SEL_ARG objects. + See check_quick_keys, find_used_partitions for examples of how to walk + this structure. + All direct "users" of this module are located within this file, too. + + + PartitionPruningModule + A module that accepts a partitioned table, condition, and finds which + partitions we will need to use in query execution. Search down for + "PartitionPruningModule" for description. + The module has single entry point - prune_partitions() function. + + + Range/index_merge/groupby-minmax optimizer module + A module that accepts a table, condition, and returns + - a QUICK_*_SELECT object that can be used to retrieve rows that match + the specified condition, or a "no records will match the condition" + statement. + + The module entry points are + test_quick_select() + get_quick_select_for_ref() + + + Record retrieval code for range/index_merge/groupby-min-max. + Implementations of QUICK_*_SELECT classes. */ #ifdef USE_PRAGMA_IMPLEMENTATION @@ -286,6 +312,13 @@ public: return parent->left == this ? &parent->left : &parent->right; } SEL_ARG *clone_tree(); + + /* Return TRUE if this represents "keypartK = const" or "keypartK IS NULL" */ + bool is_singlepoint() + { + return !min_flag && !max_flag && + !field->key_cmp((byte*) min_value, (byte*)max_value); + } }; class SEL_IMERGE; @@ -294,6 +327,11 @@ class SEL_IMERGE; class SEL_TREE :public Sql_alloc { public: + /* + Starting an effort to document this field: + (for some i, keys[i]->type == SEL_ARG::IMPOSSIBLE) => + (type == SEL_TREE::IMPOSSIBLE) + */ enum Type { IMPOSSIBLE, ALWAYS, MAYBE, KEY, KEY_SMALLER } type; SEL_TREE(enum Type type_arg) :type(type_arg) {} SEL_TREE() :type(KEY) @@ -319,25 +357,53 @@ public: /* Note that #records for each key scan is stored in table->quick_rows */ }; +class RANGE_OPT_PARAM +{ +public: + THD *thd; /* Current thread handle */ + TABLE *table; /* Table being analyzed */ + COND *cond; /* Used inside get_mm_tree(). */ + table_map prev_tables; + table_map read_tables; + table_map current_table; /* Bit of the table being analyzed */ + + /* Array of parts of all keys for which range analysis is performed */ + KEY_PART *key_parts; + KEY_PART *key_parts_end; + MEM_ROOT *mem_root; /* Memory that will be freed when range analysis completes */ + MEM_ROOT *old_root; /* Memory that will last until the query end */ + /* + Number of indexes used in range analysis (In SEL_TREE::keys only first + #keys elements are not empty) + */ + uint keys; + + /* + If true, the index descriptions describe real indexes (and it is ok to + call field->optimize_range(real_keynr[...], ...). + Otherwise index description describes fake indexes. + */ + bool using_real_indexes; + + bool remove_jump_scans; + + /* + used_key_no -> table_key_no translation table. Only makes sense if + using_real_indexes==TRUE + */ + uint real_keynr[MAX_KEY]; +}; -typedef struct st_qsel_param { - THD *thd; - TABLE *table; - KEY_PART *key_parts,*key_parts_end; +class PARAM : public RANGE_OPT_PARAM +{ +public: KEY_PART *key[MAX_KEY]; /* First key parts of keys used in the query */ - MEM_ROOT *mem_root, *old_root; - table_map prev_tables,read_tables,current_table; uint baseflag, max_key_part, range_count; - uint keys; /* number of keys used in the query */ - - /* used_key_no -> table_key_no translation table */ - uint real_keynr[MAX_KEY]; char min_key[MAX_KEY_LENGTH+MAX_FIELD_WIDTH], max_key[MAX_KEY_LENGTH+MAX_FIELD_WIDTH]; bool quick; // Don't calulate possible keys - COND *cond; uint fields_bitmap_size; MY_BITMAP needed_fields; /* bitmask of fields needed by the query */ @@ -347,9 +413,9 @@ typedef struct st_qsel_param { uint *imerge_cost_buff; /* buffer for index_merge cost estimates */ uint imerge_cost_buff_size; /* size of the buffer */ - /* TRUE if last checked tree->key can be used for ROR-scan */ + /* TRUE if last checked tree->key can be used for ROR-scan */ bool is_ror_scan; -} PARAM; +}; class TABLE_READ_PLAN; class TRP_RANGE; @@ -360,13 +426,13 @@ class TABLE_READ_PLAN; struct st_ror_scan_info; -static SEL_TREE * get_mm_parts(PARAM *param,COND *cond_func,Field *field, +static SEL_TREE * get_mm_parts(RANGE_OPT_PARAM *param,COND *cond_func,Field *field, Item_func::Functype type,Item *value, Item_result cmp_type); -static SEL_ARG *get_mm_leaf(PARAM *param,COND *cond_func,Field *field, +static SEL_ARG *get_mm_leaf(RANGE_OPT_PARAM *param,COND *cond_func,Field *field, KEY_PART *key_part, Item_func::Functype type,Item *value); -static SEL_TREE *get_mm_tree(PARAM *param,COND *cond); +static SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param,COND *cond); static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts); static ha_rows check_quick_select(PARAM *param,uint index,SEL_ARG *key_tree); @@ -409,8 +475,8 @@ static void print_rowid(byte* val, int len); static void print_quick(QUICK_SELECT_I *quick, const key_map *needed_reg); #endif -static SEL_TREE *tree_and(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2); -static SEL_TREE *tree_or(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2); +static SEL_TREE *tree_and(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2); +static SEL_TREE *tree_or(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2); static SEL_ARG *sel_add(SEL_ARG *key1,SEL_ARG *key2); static SEL_ARG *key_or(SEL_ARG *key1,SEL_ARG *key2); static SEL_ARG *key_and(SEL_ARG *key1,SEL_ARG *key2,uint clone_flag); @@ -423,7 +489,7 @@ static bool eq_tree(SEL_ARG* a,SEL_ARG *b); static SEL_ARG null_element(SEL_ARG::IMPOSSIBLE); static bool null_part_in_key(KEY_PART *key_part, const char *key, uint length); -bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2, PARAM* param); +bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2, RANGE_OPT_PARAM* param); /* @@ -455,9 +521,9 @@ public: trees_next(trees), trees_end(trees + PREALLOCED_TREES) {} - int or_sel_tree(PARAM *param, SEL_TREE *tree); - int or_sel_tree_with_checks(PARAM *param, SEL_TREE *new_tree); - int or_sel_imerge_with_checks(PARAM *param, SEL_IMERGE* imerge); + int or_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree); + int or_sel_tree_with_checks(RANGE_OPT_PARAM *param, SEL_TREE *new_tree); + int or_sel_imerge_with_checks(RANGE_OPT_PARAM *param, SEL_IMERGE* imerge); }; @@ -473,7 +539,7 @@ public: -1 - Out of memory. */ -int SEL_IMERGE::or_sel_tree(PARAM *param, SEL_TREE *tree) +int SEL_IMERGE::or_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree) { if (trees_next == trees_end) { @@ -524,7 +590,7 @@ int SEL_IMERGE::or_sel_tree(PARAM *param, SEL_TREE *tree) -1 An error occurred. */ -int SEL_IMERGE::or_sel_tree_with_checks(PARAM *param, SEL_TREE *new_tree) +int SEL_IMERGE::or_sel_tree_with_checks(RANGE_OPT_PARAM *param, SEL_TREE *new_tree) { for (SEL_TREE** tree = trees; tree != trees_next; @@ -558,7 +624,7 @@ int SEL_IMERGE::or_sel_tree_with_checks(PARAM *param, SEL_TREE *new_tree) -1 - An error occurred */ -int SEL_IMERGE::or_sel_imerge_with_checks(PARAM *param, SEL_IMERGE* imerge) +int SEL_IMERGE::or_sel_imerge_with_checks(RANGE_OPT_PARAM *param, SEL_IMERGE* imerge) { for (SEL_TREE** tree= imerge->trees; tree != imerge->trees_next; @@ -604,7 +670,7 @@ inline void imerge_list_and_list(List<SEL_IMERGE> *im1, List<SEL_IMERGE> *im2) other Error, both passed lists are unusable */ -int imerge_list_or_list(PARAM *param, +int imerge_list_or_list(RANGE_OPT_PARAM *param, List<SEL_IMERGE> *im1, List<SEL_IMERGE> *im2) { @@ -624,7 +690,7 @@ int imerge_list_or_list(PARAM *param, other Error */ -int imerge_list_or_tree(PARAM *param, +int imerge_list_or_tree(RANGE_OPT_PARAM *param, List<SEL_IMERGE> *im1, SEL_TREE *tree) { @@ -752,7 +818,7 @@ int QUICK_RANGE_SELECT::init() if (file->inited != handler::NONE) file->ha_index_or_rnd_end(); - DBUG_RETURN(error= file->ha_index_init(index)); + DBUG_RETURN(error= file->ha_index_init(index, 1)); } @@ -777,9 +843,10 @@ QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT() { DBUG_PRINT("info", ("Freeing separate handler %p (free=%d)", file, free_file)); - file->reset(); + file->ha_reset(); file->external_lock(current_thd, F_UNLCK); file->close(); + delete file; } } delete_dynamic(&ranges); /* ranges are allocated in alloc */ @@ -909,13 +976,14 @@ int QUICK_ROR_INTERSECT_SELECT::init() int QUICK_RANGE_SELECT::init_ror_merged_scan(bool reuse_handler) { handler *save_file= file; + THD *thd; DBUG_ENTER("QUICK_RANGE_SELECT::init_ror_merged_scan"); if (reuse_handler) { DBUG_PRINT("info", ("Reusing handler %p", file)); if (file->extra(HA_EXTRA_KEYREAD) || - file->extra(HA_EXTRA_RETRIEVE_PRIMARY_KEY) || + file->ha_retrieve_all_pk() || init() || reset()) { DBUG_RETURN(1); @@ -930,11 +998,12 @@ int QUICK_RANGE_SELECT::init_ror_merged_scan(bool reuse_handler) DBUG_RETURN(0); } - THD *thd= current_thd; - if (!(file= get_new_handler(head, thd->mem_root, head->s->db_type))) + thd= head->in_use; + if (!(file= get_new_handler(head->s, thd->mem_root, head->s->db_type))) goto failure; DBUG_PRINT("info", ("Allocated new handler %p", file)); - if (file->ha_open(head->s->path, head->db_stat, HA_OPEN_IGNORE_IF_LOCKED)) + if (file->ha_open(head, head->s->normalized_path.str, head->db_stat, + HA_OPEN_IGNORE_IF_LOCKED)) { /* Caller will free the memory */ goto failure; @@ -943,7 +1012,7 @@ int QUICK_RANGE_SELECT::init_ror_merged_scan(bool reuse_handler) goto failure; if (file->extra(HA_EXTRA_KEYREAD) || - file->extra(HA_EXTRA_RETRIEVE_PRIMARY_KEY) || + file->ha_retrieve_all_pk() || init() || reset()) { file->external_lock(thd, F_UNLCK); @@ -955,6 +1024,8 @@ int QUICK_RANGE_SELECT::init_ror_merged_scan(bool reuse_handler) DBUG_RETURN(0); failure: + if (file) + delete file; file= save_file; DBUG_RETURN(1); } @@ -1649,10 +1720,10 @@ public: static int fill_used_fields_bitmap(PARAM *param) { TABLE *table= param->table; - param->fields_bitmap_size= (table->s->fields/8 + 1); - uchar *tmp; + param->fields_bitmap_size= bitmap_buffer_size(table->s->fields+1); + uint32 *tmp; uint pk; - if (!(tmp= (uchar*)alloc_root(param->mem_root,param->fields_bitmap_size)) || + if (!(tmp= (uint32*) alloc_root(param->mem_root,param->fields_bitmap_size)) || bitmap_init(¶m->needed_fields, tmp, param->fields_bitmap_size*8, FALSE)) return 1; @@ -1771,6 +1842,8 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, param.old_root= thd->mem_root; param.needed_reg= &needed_reg; param.imerge_cost_buff_size= 0; + param.using_real_indexes= TRUE; + param.remove_jump_scans= TRUE; thd->no_errors=1; // Don't warn about NULL init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0); @@ -1961,6 +2034,1060 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, DBUG_RETURN(records ? test(quick) : -1); } +/**************************************************************************** + * Partition pruning starts + ****************************************************************************/ +#ifdef WITH_PARTITION_STORAGE_ENGINE + +/* + PartitionPruningModule + + This part of the code does partition pruning. Partition pruning solves the + following problem: given a query over partitioned tables, find partitions + that we will not need to access (i.e. partitions that we can assume to be + empty) when executing the query. + The set of partitions to prune doesn't depend on which query execution + plan will be used to execute the query. + + HOW IT WORKS + + Partition pruning module makes use of RangeAnalysisModule. The following + examples show how the problem of partition pruning can be reduced to the + range analysis problem: + + EXAMPLE 1 + Consider a query: + + SELECT * FROM t1 WHERE (t1.a < 5 OR t1.a = 10) AND t1.a > 3 AND t1.b='z' + + where table t1 is partitioned using PARTITION BY RANGE(t1.a). An apparent + way to find the used (i.e. not pruned away) partitions is as follows: + + 1. analyze the WHERE clause and extract the list of intervals over t1.a + for the above query we will get this list: {(3 < t1.a < 5), (t1.a=10)} + + 2. for each interval I + { + find partitions that have non-empty intersection with I; + mark them as used; + } + + EXAMPLE 2 + Suppose the table is partitioned by HASH(part_func(t1.a, t1.b)). Then + we need to: + + 1. Analyze the WHERE clause and get a list of intervals over (t1.a, t1.b). + The list of intervals we'll obtain will look like this: + ((t1.a, t1.b) = (1,'foo')), + ((t1.a, t1.b) = (2,'bar')), + ((t1,a, t1.b) > (10,'zz')) (**) + + 2. for each interval I + { + if (the interval has form "(t1.a, t1.b) = (const1, const2)" ) + { + calculate HASH(part_func(t1.a, t1.b)); + find which partition has records with this hash value and mark + it as used; + } + else + { + mark all partitions as used; + break; + } + } + + For both examples the step #1 is exactly what RangeAnalysisModule could + be used to do, if it was provided with appropriate index description + (array of KEY_PART structures). + In example #1, we need to provide it with description of index(t1.a), + in example #2, we need to provide it with description of index(t1.a, t1.b). + + These index descriptions are further called "partitioning index + descriptions". Note that it doesn't matter if such indexes really exist, + as range analysis module only uses the description. + + Putting it all together, partitioning module works as follows: + + prune_partitions() { + call create_partition_index_descrition(); + + call get_mm_tree(); // invoke the RangeAnalysisModule + + // analyze the obtained interval list and get used partitions + call find_used_partitions(); + } + +*/ + +struct st_part_prune_param; +struct st_part_opt_info; + +typedef void (*mark_full_part_func)(partition_info*, uint32); +typedef uint32 (*part_num_to_partition_id_func)(struct st_part_prune_param*, + uint32); +typedef uint32 (*get_endpoint_func)(partition_info*, bool left_endpoint, + bool include_endpoint); + +/* + Partition pruning operation context +*/ +typedef struct st_part_prune_param +{ + RANGE_OPT_PARAM range_param; /* Range analyzer parameters */ + + /*************************************************************** + Following fields are filled in based solely on partitioning + definition and not modified after that: + **************************************************************/ + partition_info *part_info; /* Copy of table->part_info */ + /* Function to get partition id from partitioning fields only */ + get_part_id_func get_top_partition_id_func; + /* Function to mark a partition as used (w/all subpartitions if they exist)*/ + mark_full_part_func mark_full_partition_used; + + /* Partitioning 'index' description, array of key parts */ + KEY_PART *key; + + /* + Number of fields in partitioning 'index' definition created for + partitioning (0 if partitioning 'index' doesn't include partitioning + fields) + */ + uint part_fields; + uint subpart_fields; /* Same as above for subpartitioning */ + + /* + Number of the last partitioning field keypart in the index, or -1 if + partitioning index definition doesn't include partitioning fields. + */ + int last_part_partno; + int last_subpart_partno; /* Same as above for supartitioning */ + + /* + Function to be used to analyze non-singlepoint intervals (Can be pointer + to one of two functions - for RANGE and for LIST types). NULL means + partitioning type and/or expression doesn't allow non-singlepoint interval + analysis. + See get_list_array_idx_for_endpoint (or get_range_...) for description of + what the function does. + */ + get_endpoint_func get_endpoint; + + /* Maximum possible value that can be returned by get_endpoint function */ + uint32 max_endpoint_val; + + /* + For RANGE partitioning, part_num_to_part_id_range, for LIST partitioning, + part_num_to_part_id_list. Just to avoid the if-else clutter. + */ + part_num_to_partition_id_func endpoints_walk_func; + + /* + If true, process "key < const" as "part_func(key) < part_func(const)", + otherwise as "part_func(key) <= part_func(const)". Same for '>' and '>='. + This is defined iff get_endpoint != NULL. + */ + bool force_include_bounds; + + /* + is_part_keypart[i] == test(keypart #i in partitioning index is a member + used in partitioning) + Used to maintain current values of cur_part_fields and cur_subpart_fields + */ + my_bool *is_part_keypart; + /* Same as above for subpartitioning */ + my_bool *is_subpart_keypart; + + /*************************************************************** + Following fields form find_used_partitions() recursion context: + **************************************************************/ + SEL_ARG **arg_stack; /* "Stack" of SEL_ARGs */ + SEL_ARG **arg_stack_end; /* Top of the stack */ + /* Number of partitioning fields for which we have a SEL_ARG* in arg_stack */ + uint cur_part_fields; + /* Same as cur_part_fields, but for subpartitioning */ + uint cur_subpart_fields; + + /*************************************************************** + Following fields are used to store an 'iterator' that can be + used to obtain a set of used artitions. + **************************************************************/ + /* + Start and end+1 partition "numbers". They can have two meanings depending + depending of the value of part_num_to_part_id: + part_num_to_part_id_range - numbers are partition ids + part_num_to_part_id_list - numbers are indexes in part_info->list_array + */ + uint32 start_part_num; + uint32 end_part_num; + + /* + A function that should be used to convert two above "partition numbers" + to partition_ids. + */ + part_num_to_partition_id_func part_num_to_part_id; +} PART_PRUNE_PARAM; + +static bool create_partition_index_descrition(PART_PRUNE_PARAM *prune_par); +static int find_used_partitions(PART_PRUNE_PARAM *ppar, SEL_ARG *key_tree); +static int find_used_partitions_imerge(PART_PRUNE_PARAM *ppar, + SEL_IMERGE *imerge); +static int find_used_partitions_imerge_list(PART_PRUNE_PARAM *ppar, + List<SEL_IMERGE> &merges); +static void mark_all_partitions_as_used(partition_info *part_info); +static uint32 part_num_to_part_id_range(PART_PRUNE_PARAM* prune_par, + uint32 num); + +#ifndef DBUG_OFF +static void print_partitioning_index(KEY_PART *parts, KEY_PART *parts_end); +static void dbug_print_field(Field *field); +static void dbug_print_segment_range(SEL_ARG *arg, KEY_PART *part); +static void dbug_print_onepoint_range(SEL_ARG **start, uint num); +#endif + + +/* + Perform partition pruning for a given table and condition. + + SYNOPSIS + prune_partitions() + thd Thread handle + table Table to perform partition pruning for + pprune_cond Condition to use for partition pruning + + DESCRIPTION + This function assumes that all partitions are marked as unused when it + is invoked. The function analyzes the condition, finds partitions that + need to be used to retrieve the records that match the condition, and + marks them as used by setting appropriate bit in part_info->used_partitions + In the worst case all partitions are marked as used. + + NOTE + This function returns promptly if called for non-partitioned table. + + RETURN + TRUE We've inferred that no partitions need to be used (i.e. no table + records will satisfy pprune_cond) + FALSE Otherwise +*/ + +bool prune_partitions(THD *thd, TABLE *table, Item *pprune_cond) +{ + bool retval= FALSE; + partition_info *part_info = table->part_info; + DBUG_ENTER("prune_partitions"); + + if (!part_info) + DBUG_RETURN(FALSE); /* not a partitioned table */ + + if (!pprune_cond) + { + mark_all_partitions_as_used(part_info); + DBUG_RETURN(FALSE); + } + + PART_PRUNE_PARAM prune_param; + MEM_ROOT alloc; + RANGE_OPT_PARAM *range_par= &prune_param.range_param; + + prune_param.part_info= part_info; + + init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0); + range_par->mem_root= &alloc; + range_par->old_root= thd->mem_root; + + if (create_partition_index_descrition(&prune_param)) + { + mark_all_partitions_as_used(part_info); + free_root(&alloc,MYF(0)); // Return memory & allocator + DBUG_RETURN(FALSE); + } + + range_par->thd= thd; + range_par->table= table; + /* range_par->cond doesn't need initialization */ + range_par->prev_tables= range_par->read_tables= 0; + range_par->current_table= table->map; + + range_par->keys= 1; // one index + range_par->using_real_indexes= FALSE; + range_par->remove_jump_scans= FALSE; + range_par->real_keynr[0]= 0; + + thd->no_errors=1; // Don't warn about NULL + thd->mem_root=&alloc; + + prune_param.key= prune_param.range_param.key_parts; + SEL_TREE *tree; + SEL_ARG *arg; + int res; + + tree= get_mm_tree(range_par, pprune_cond); + if (!tree) + goto all_used; + + if (tree->type == SEL_TREE::IMPOSSIBLE) + { + retval= TRUE; + goto end; + } + + if (tree->type != SEL_TREE::KEY && tree->type != SEL_TREE::KEY_SMALLER) + goto all_used; + + if (tree->merges.is_empty()) + { + prune_param.arg_stack_end= prune_param.arg_stack; + prune_param.cur_part_fields= 0; + prune_param.cur_subpart_fields= 0; + prune_param.part_num_to_part_id= part_num_to_part_id_range; + prune_param.start_part_num= 0; + prune_param.end_part_num= prune_param.part_info->no_parts; + if (!tree->keys[0] || (-1 == (res= find_used_partitions(&prune_param, + tree->keys[0])))) + goto all_used; + } + else + { + if (tree->merges.elements == 1) + { + if (-1 == (res |= find_used_partitions_imerge(&prune_param, + tree->merges.head()))) + goto all_used; + } + else + { + if (-1 == (res |= find_used_partitions_imerge_list(&prune_param, + tree->merges))) + goto all_used; + } + } + + /* + res == 0 => no used partitions => retval=TRUE + res == 1 => some used partitions => retval=FALSE + res == -1 - we jump over this line to all_used: + */ + retval= test(!res); + goto end; + +all_used: + retval= FALSE; // some partitions are used + mark_all_partitions_as_used(prune_param.part_info); +end: + thd->no_errors=0; + thd->mem_root= range_par->old_root; + free_root(&alloc,MYF(0)); // Return memory & allocator + DBUG_RETURN(retval); +} + + +/* + Store key image to table record + + SYNOPSIS + field Field which key image should be stored. + ptr Field value in key format. + len Length of the value, in bytes. +*/ + +static void store_key_image_to_rec(Field *field, char *ptr, uint len) +{ + /* Do the same as print_key() does */ + if (field->real_maybe_null()) + { + if (*ptr) + { + field->set_null(); + return; + } + ptr++; + } + field->set_key_image(ptr, len); +} + + +/* + For SEL_ARG* array, store sel_arg->min values into table record buffer + + SYNOPSIS + store_selargs_to_rec() + ppar Partition pruning context + start Array SEL_ARG* for which the minimum values should be stored + num Number of elements in the array +*/ + +static void store_selargs_to_rec(PART_PRUNE_PARAM *ppar, SEL_ARG **start, + int num) +{ + KEY_PART *parts= ppar->range_param.key_parts; + for (SEL_ARG **end= start + num; start != end; start++) + { + SEL_ARG *sel_arg= (*start); + store_key_image_to_rec(sel_arg->field, sel_arg->min_value, + parts[sel_arg->part].length); + } +} + + +/* Mark a partition as used in the case when there are no subpartitions */ +static void mark_full_partition_used_no_parts(partition_info* part_info, + uint32 part_id) +{ + bitmap_set_bit(&part_info->used_partitions, part_id); +} + + +/* Mark a partition as used in the case when there are subpartitions */ +static void mark_full_partition_used_with_parts(partition_info *part_info, + uint32 part_id) +{ + uint32 start= part_id * part_info->no_subparts; + uint32 end= start + part_info->no_subparts; + for (; start != end; start++) + bitmap_set_bit(&part_info->used_partitions, start); +} + +/* See comment in PART_PRUNE_PARAM::part_num_to_part_id about what this is */ +static uint32 part_num_to_part_id_range(PART_PRUNE_PARAM* ppar, uint32 num) +{ + return num; +} + +/* See comment in PART_PRUNE_PARAM::part_num_to_part_id about what this is */ +static uint32 part_num_to_part_id_list(PART_PRUNE_PARAM* ppar, uint32 num) +{ + return ppar->part_info->list_array[num].partition_id; +} + + +/* + Find the set of used partitions for List<SEL_IMERGE> + SYNOPSIS + find_used_partitions_imerge_list + ppar Partition pruning context. + key_tree Intervals tree to perform pruning for. + + DESCRIPTION + List<SEL_IMERGE> represents "imerge1 AND imerge2 AND ...". + The set of used partitions is an intersection of used partitions sets + for imerge_{i}. + We accumulate this intersection a separate bitmap. + + RETURN + See find_used_partitions() +*/ + +static int find_used_partitions_imerge_list(PART_PRUNE_PARAM *ppar, + List<SEL_IMERGE> &merges) +{ + MY_BITMAP all_merges; + uint bitmap_bytes; + uint32 *bitmap_buf; + uint n_bits= ppar->part_info->used_partitions.n_bits; + bitmap_bytes= bitmap_buffer_size(n_bits); + if (!(bitmap_buf= (uint32*)alloc_root(ppar->range_param.mem_root, + bitmap_bytes))) + { + /* + Fallback, process just first SEL_IMERGE. This can leave us with more + partitions marked as used then actually needed. + */ + return find_used_partitions_imerge(ppar, merges.head()); + } + bitmap_init(&all_merges, bitmap_buf, n_bits, FALSE); + bitmap_set_prefix(&all_merges, n_bits); + + List_iterator<SEL_IMERGE> it(merges); + SEL_IMERGE *imerge; + while ((imerge=it++)) + { + int res= find_used_partitions_imerge(ppar, imerge); + if (!res) + { + /* no used partitions on one ANDed imerge => no used partitions at all */ + return 0; + } + + if (res != -1) + bitmap_intersect(&all_merges, &ppar->part_info->used_partitions); + + if (bitmap_is_clear_all(&all_merges)) + return 0; + + bitmap_clear_all(&ppar->part_info->used_partitions); + } + memcpy(ppar->part_info->used_partitions.bitmap, all_merges.bitmap, + bitmap_bytes); + return 1; +} + + +/* + Find the set of used partitions for SEL_IMERGE structure + SYNOPSIS + find_used_partitions_imerge() + ppar Partition pruning context. + key_tree Intervals tree to perform pruning for. + + DESCRIPTION + SEL_IMERGE represents "tree1 OR tree2 OR ...". The implementation is + trivial - just use mark used partitions for each tree and bail out early + if for some tree_{i} all partitions are used. + + RETURN + See find_used_partitions(). +*/ + +static +int find_used_partitions_imerge(PART_PRUNE_PARAM *ppar, SEL_IMERGE *imerge) +{ + int res= 0; + for (SEL_TREE **ptree= imerge->trees; ptree < imerge->trees_next; ptree++) + { + ppar->arg_stack_end= ppar->arg_stack; + ppar->cur_part_fields= 0; + ppar->cur_subpart_fields= 0; + ppar->part_num_to_part_id= part_num_to_part_id_range; + ppar->start_part_num= 0; + ppar->end_part_num= ppar->part_info->no_parts; + if (-1 == (res |= find_used_partitions(ppar, (*ptree)->keys[0]))) + return -1; + } + return res; +} + + +/* + Recursively walk the SEL_ARG tree, find/mark partitions that need to be used + + SYNOPSIS + find_used_partitions() + ppar Partition pruning context. + key_tree Intervals tree to perform pruning for. + + DESCRIPTION + This function + * recursively walks the SEL_ARG* tree, collecting partitioning + "intervals"; + * finds the partitions one needs to use to get rows in these intervals; + * marks these partitions as used. + + WHAT IS CONSIDERED TO BE "INTERVALS" + A partition pruning "interval" is equivalent to condition in one of the + forms: + + "partition_field1=const1 AND ... partition_fieldN=constN" (1) + "subpartition_field1=const1 AND ... subpartition_fieldN=constN" (2) + "(1) AND (2)" (3) + + In (1) and (2) all [sub]partitioning fields must be used, and "x=const" + includes "x IS NULL". + + If partitioning is performed using + + PARTITION BY RANGE(unary_monotonic_func(single_partition_field)), + + then the following is also an interval: + + " const1 OP1 single_partition_field OR const2" (4) + + where OP1 and OP2 are '<' OR '<=', and const_i can be +/- inf. + Everything else is not a partition pruning "interval". + + RETURN + 1 OK, one or more [sub]partitions are marked as used. + 0 The passed condition doesn't match any partitions + -1 Couldn't infer any partition pruning "intervals" from the passed + SEL_ARG* tree (which means that all partitions should be marked as + used) Marking partitions as used is the responsibility of the caller. +*/ + +static +int find_used_partitions(PART_PRUNE_PARAM *ppar, SEL_ARG *key_tree) +{ + int res, left_res=0, right_res=0; + int partno= (int)key_tree->part; + bool pushed= FALSE; + bool set_full_part_if_bad_ret= FALSE; + + if (key_tree->left != &null_element) + { + if (-1 == (left_res= find_used_partitions(ppar,key_tree->left))) + return -1; + } + + if (key_tree->type == SEL_ARG::KEY_RANGE) + { + if (partno == 0 && (NULL != ppar->get_endpoint)) + { + /* + Partitioning is done by RANGE|INTERVAL(monotonic_expr(fieldX)), and + we got "const1 < fieldX < const2" interval. + */ + DBUG_EXECUTE("info", dbug_print_segment_range(key_tree, + ppar->range_param. + key_parts);); + /* Find minimum */ + if (key_tree->min_flag & NO_MIN_RANGE) + ppar->start_part_num= 0; + else + { + /* + Store the interval edge in the record buffer, and call the + function that maps the edge in table-field space to an edge + in ordered-set-of-partitions (for RANGE partitioning) or + indexes-in-ordered-array-of-list-constants (for LIST) space. + */ + store_key_image_to_rec(key_tree->field, key_tree->min_value, + ppar->range_param.key_parts[0].length); + bool include_endp= ppar->force_include_bounds || + !test(key_tree->min_flag & NEAR_MIN); + ppar->start_part_num= ppar->get_endpoint(ppar->part_info, 1, + include_endp); + if (ppar->start_part_num == ppar->max_endpoint_val) + { + res= 0; /* No satisfying partitions */ + goto pop_and_go_right; + } + } + + /* Find maximum, do the same as above but for right interval bound */ + if (key_tree->max_flag & NO_MAX_RANGE) + ppar->end_part_num= ppar->max_endpoint_val; + else + { + store_key_image_to_rec(key_tree->field, key_tree->max_value, + ppar->range_param.key_parts[0].length); + bool include_endp= ppar->force_include_bounds || + !test(key_tree->max_flag & NEAR_MAX); + ppar->end_part_num= ppar->get_endpoint(ppar->part_info, 0, + include_endp); + if (ppar->start_part_num == ppar->end_part_num) + { + res= 0; /* No satisfying partitions */ + goto pop_and_go_right; + } + } + ppar->part_num_to_part_id= ppar->endpoints_walk_func; + + /* + Save our intent to mark full partition as used if we will not be able + to obtain further limits on subpartitions + */ + set_full_part_if_bad_ret= TRUE; + goto process_next_key_part; + } + + if (key_tree->is_singlepoint()) + { + pushed= TRUE; + ppar->cur_part_fields+= ppar->is_part_keypart[partno]; + ppar->cur_subpart_fields+= ppar->is_subpart_keypart[partno]; + *(ppar->arg_stack_end++) = key_tree; + + if (partno == ppar->last_part_partno && + ppar->cur_part_fields == ppar->part_fields) + { + /* + Ok, we've got "fieldN<=>constN"-type SEL_ARGs for all partitioning + fields. Save all constN constants into table record buffer. + */ + store_selargs_to_rec(ppar, ppar->arg_stack, ppar->part_fields); + DBUG_EXECUTE("info", dbug_print_onepoint_range(ppar->arg_stack, + ppar->part_fields);); + uint32 part_id; + /* then find in which partition the {const1, ...,constN} tuple goes */ + if (ppar->get_top_partition_id_func(ppar->part_info, &part_id)) + { + res= 0; /* No satisfying partitions */ + goto pop_and_go_right; + } + /* Rembember the limit we got - single partition #part_id */ + ppar->part_num_to_part_id= part_num_to_part_id_range; + ppar->start_part_num= part_id; + ppar->end_part_num= part_id + 1; + + /* + If there are no subpartitions/we fail to get any limit for them, + then we'll mark full partition as used. + */ + set_full_part_if_bad_ret= TRUE; + goto process_next_key_part; + } + + if (partno == ppar->last_subpart_partno) + { + /* + Ok, we've got "fieldN<=>constN"-type SEL_ARGs for all subpartitioning + fields. Save all constN constants into table record buffer. + */ + store_selargs_to_rec(ppar, ppar->arg_stack_end - ppar->subpart_fields, + ppar->subpart_fields); + DBUG_EXECUTE("info", dbug_print_onepoint_range(ppar->arg_stack_end - + ppar->subpart_fields, + ppar->subpart_fields);); + /* Find the subpartition (it's HASH/KEY so we always have one) */ + partition_info *part_info= ppar->part_info; + uint32 subpart_id= part_info->get_subpartition_id(part_info); + + /* Mark this partition as used in each subpartition. */ + for (uint32 num= ppar->start_part_num; num != ppar->end_part_num; + num++) + { + bitmap_set_bit(&part_info->used_partitions, + ppar->part_num_to_part_id(ppar, num) * + part_info->no_subparts + subpart_id); + } + res= 1; /* Some partitions were marked as used */ + goto pop_and_go_right; + } + } + else + { + /* + Can't handle condition on current key part. If we're that deep that + we're processing subpartititoning's key parts, this means we'll not be + able to infer any suitable condition, so bail out. + */ + if (partno >= ppar->last_part_partno) + return -1; + } + } + +process_next_key_part: + if (key_tree->next_key_part) + res= find_used_partitions(ppar, key_tree->next_key_part); + else + res= -1; + + if (res == -1) /* Got "full range" for key_tree->next_key_part call */ + { + if (set_full_part_if_bad_ret) + { + for (uint32 num= ppar->start_part_num; num != ppar->end_part_num; + num++) + { + ppar->mark_full_partition_used(ppar->part_info, + ppar->part_num_to_part_id(ppar, num)); + } + res= 1; + } + else + return -1; + } + + if (set_full_part_if_bad_ret) + { + /* Restore the "used partition iterator" to its default */ + ppar->part_num_to_part_id= part_num_to_part_id_range; + ppar->start_part_num= 0; + ppar->end_part_num= ppar->part_info->no_parts; + } + + if (pushed) + { +pop_and_go_right: + /* Pop this key part info off the "stack" */ + ppar->arg_stack_end--; + ppar->cur_part_fields-= ppar->is_part_keypart[partno]; + ppar->cur_subpart_fields-= ppar->is_subpart_keypart[partno]; + } + + if (key_tree->right != &null_element) + { + if (-1 == (right_res= find_used_partitions(ppar,key_tree->right))) + return -1; + } + return (left_res || right_res || res); +} + + +static void mark_all_partitions_as_used(partition_info *part_info) +{ + bitmap_set_all(&part_info->used_partitions); +} + + +/* + Check if field types allow to construct partitioning index description + + SYNOPSIS + fields_ok_for_partition_index() + pfield NULL-terminated array of pointers to fields. + + DESCRIPTION + For an array of fields, check if we can use all of the fields to create + partitioning index description. + + We can't process GEOMETRY fields - for these fields singlepoint intervals + cant be generated, and non-singlepoint are "special" kinds of intervals + to which our processing logic can't be applied. + + It is not known if we could process ENUM fields, so they are disabled to be + on the safe side. + + RETURN + TRUE Yes, fields can be used in partitioning index + FALSE Otherwise +*/ + +static bool fields_ok_for_partition_index(Field **pfield) +{ + if (!pfield) + return FALSE; + for (; (*pfield); pfield++) + { + enum_field_types ftype= (*pfield)->real_type(); + if (ftype == FIELD_TYPE_ENUM || ftype == FIELD_TYPE_GEOMETRY) + return FALSE; + } + return TRUE; +} + + +/* + Create partition index description and fill related info in the context + struct + + SYNOPSIS + create_partition_index_descrition() + prune_par INOUT Partition pruning context + + DESCRIPTION + Create partition index description. Partition index description is: + + part_index(used_fields_list(part_expr), used_fields_list(subpart_expr)) + + If partitioning/sub-partitioning uses BLOB or Geometry fields, then + corresponding fields_list(...) is not included into index description + and we don't perform partition pruning for partitions/subpartitions. + + RETURN + TRUE Out of memory or can't do partition pruning at all + FALSE OK +*/ + +static bool create_partition_index_descrition(PART_PRUNE_PARAM *ppar) +{ + RANGE_OPT_PARAM *range_par= &(ppar->range_param); + partition_info *part_info= ppar->part_info; + uint used_part_fields, used_subpart_fields; + + used_part_fields= fields_ok_for_partition_index(part_info->part_field_array) ? + part_info->no_part_fields : 0; + used_subpart_fields= + fields_ok_for_partition_index(part_info->subpart_field_array)? + part_info->no_subpart_fields : 0; + + uint total_parts= used_part_fields + used_subpart_fields; + + ppar->part_fields= used_part_fields; + ppar->last_part_partno= (int)used_part_fields - 1; + + ppar->subpart_fields= used_subpart_fields; + ppar->last_subpart_partno= + used_subpart_fields?(int)(used_part_fields + used_subpart_fields - 1): -1; + + if (is_sub_partitioned(part_info)) + { + ppar->mark_full_partition_used= mark_full_partition_used_with_parts; + ppar->get_top_partition_id_func= part_info->get_part_partition_id; + } + else + { + ppar->mark_full_partition_used= mark_full_partition_used_no_parts; + ppar->get_top_partition_id_func= part_info->get_partition_id; + } + + enum_monotonicity_info minfo; + ppar->get_endpoint= NULL; + if (part_info->part_expr && + (minfo= part_info->part_expr->get_monotonicity_info()) != NON_MONOTONIC) + { + /* + ppar->force_include_bounds controls how we'll process "field < C" and + "field > C" intervals. + If the partitioning function F is strictly increasing, then for any x, y + "x < y" => "F(x) < F(y)" (*), i.e. when we get interval "field < C" + we can perform partition pruning on the equivalent "F(field) < F(C)". + + If the partitioning function not strictly increasing (it is simply + increasing), then instead of (*) we get "x < y" => "F(x) <= F(y)" + i.e. for interval "field < C" we can perform partition pruning for + "F(field) <= F(C)". + */ + ppar->force_include_bounds= test(minfo == MONOTONIC_INCREASING); + if (part_info->part_type == RANGE_PARTITION) + { + ppar->get_endpoint= get_partition_id_range_for_endpoint; + ppar->endpoints_walk_func= part_num_to_part_id_range; + ppar->max_endpoint_val= part_info->no_parts; + } + else if (part_info->part_type == LIST_PARTITION) + { + ppar->get_endpoint= get_list_array_idx_for_endpoint; + ppar->endpoints_walk_func= part_num_to_part_id_list; + ppar->max_endpoint_val= part_info->no_list_values; + } + } + + KEY_PART *key_part; + MEM_ROOT *alloc= range_par->mem_root; + if (!total_parts || + !(key_part= (KEY_PART*)alloc_root(alloc, sizeof(KEY_PART)* + total_parts)) || + !(ppar->arg_stack= (SEL_ARG**)alloc_root(alloc, sizeof(SEL_ARG*)* + total_parts)) || + !(ppar->is_part_keypart= (my_bool*)alloc_root(alloc, sizeof(my_bool)* + total_parts)) || + !(ppar->is_subpart_keypart= (my_bool*)alloc_root(alloc, sizeof(my_bool)* + total_parts))) + return TRUE; + + range_par->key_parts= key_part; + Field **field= (ppar->part_fields)? part_info->part_field_array : + part_info->subpart_field_array; + bool subpart_fields= FALSE; + for (uint part= 0; part < total_parts; part++, key_part++) + { + key_part->key= 0; + key_part->part= part; + key_part->length= (*field)->pack_length_in_rec(); + /* + psergey-todo: check yet again if this is correct for tricky field types, + e.g. see "Fix a fatal error in decimal key handling" in open_binary_frm() + */ + key_part->store_length= (*field)->pack_length(); + if ((*field)->real_maybe_null()) + key_part->store_length+= HA_KEY_NULL_LENGTH; + if ((*field)->type() == FIELD_TYPE_BLOB || + (*field)->real_type() == MYSQL_TYPE_VARCHAR) + key_part->store_length+= HA_KEY_BLOB_LENGTH; + + key_part->field= (*field); + key_part->image_type = Field::itRAW; + /* We don't set key_parts->null_bit as it will not be used */ + + ppar->is_part_keypart[part]= !subpart_fields; + ppar->is_subpart_keypart[part]= subpart_fields; + + if (!*(++field)) + { + field= part_info->subpart_field_array; + subpart_fields= TRUE; + } + } + range_par->key_parts_end= key_part; + + DBUG_EXECUTE("info", print_partitioning_index(range_par->key_parts, + range_par->key_parts_end);); + return FALSE; +} + + +#ifndef DBUG_OFF + +static void print_partitioning_index(KEY_PART *parts, KEY_PART *parts_end) +{ + DBUG_ENTER("print_partitioning_index"); + DBUG_LOCK_FILE; + fprintf(DBUG_FILE, "partitioning INDEX("); + for (KEY_PART *p=parts; p != parts_end; p++) + { + fprintf(DBUG_FILE, "%s%s", p==parts?"":" ,", p->field->field_name); + } + fputs(");\n", DBUG_FILE); + DBUG_UNLOCK_FILE; + DBUG_VOID_RETURN; +} + +/* Print field value into debug trace, in NULL-aware way. */ +static void dbug_print_field(Field *field) +{ + if (field->is_real_null()) + fprintf(DBUG_FILE, "NULL"); + else + { + char buf[256]; + String str(buf, sizeof(buf), &my_charset_bin); + str.length(0); + String *pstr; + pstr= field->val_str(&str); + fprintf(DBUG_FILE, "'%s'", pstr->c_ptr_safe()); + } +} + + +/* Print a "c1 < keypartX < c2" - type interval into debug trace. */ +static void dbug_print_segment_range(SEL_ARG *arg, KEY_PART *part) +{ + DBUG_ENTER("dbug_print_segment_range"); + DBUG_LOCK_FILE; + if (!(arg->min_flag & NO_MIN_RANGE)) + { + store_key_image_to_rec(part->field, (char*)(arg->min_value), part->length); + dbug_print_field(part->field); + if (arg->min_flag & NEAR_MIN) + fputs(" < ", DBUG_FILE); + else + fputs(" <= ", DBUG_FILE); + } + + fprintf(DBUG_FILE, "%s", part->field->field_name); + + if (!(arg->max_flag & NO_MAX_RANGE)) + { + if (arg->max_flag & NEAR_MAX) + fputs(" < ", DBUG_FILE); + else + fputs(" <= ", DBUG_FILE); + store_key_image_to_rec(part->field, (char*)(arg->min_value), part->length); + dbug_print_field(part->field); + } + fputs("\n", DBUG_FILE); + DBUG_UNLOCK_FILE; + DBUG_VOID_RETURN; +} + + +/* + Print a singlepoint multi-keypart range interval to debug trace + + SYNOPSIS + dbug_print_onepoint_range() + start Array of SEL_ARG* ptrs representing conditions on key parts + num Number of elements in the array. + + DESCRIPTION + This function prints a "keypartN=constN AND ... AND keypartK=constK"-type + interval to debug trace. +*/ + +static void dbug_print_onepoint_range(SEL_ARG **start, uint num) +{ + DBUG_ENTER("dbug_print_onepoint_range"); + DBUG_LOCK_FILE; + SEL_ARG **end= start + num; + + for (SEL_ARG **arg= start; arg != end; arg++) + { + Field *field= (*arg)->field; + fprintf(DBUG_FILE, "%s%s=", (arg==start)?"":", ", field->field_name); + dbug_print_field(field); + } + fputs("\n", DBUG_FILE); + DBUG_UNLOCK_FILE; + DBUG_VOID_RETURN; +} +#endif + +/**************************************************************************** + * Partition pruning code ends + ****************************************************************************/ +#endif + /* Get cost of 'sweep' full records retrieval. @@ -2395,7 +3522,7 @@ static ROR_SCAN_INFO *make_ror_scan(const PARAM *param, int idx, SEL_ARG *sel_arg) { ROR_SCAN_INFO *ror_scan; - uchar *bitmap_buf; + uint32 *bitmap_buf; uint keynr; DBUG_ENTER("make_ror_scan"); @@ -2410,8 +3537,8 @@ ROR_SCAN_INFO *make_ror_scan(const PARAM *param, int idx, SEL_ARG *sel_arg) ror_scan->sel_arg= sel_arg; ror_scan->records= param->table->quick_rows[keynr]; - if (!(bitmap_buf= (uchar*)alloc_root(param->mem_root, - param->fields_bitmap_size))) + if (!(bitmap_buf= (uint32*)alloc_root(param->mem_root, + param->fields_bitmap_size))) DBUG_RETURN(NULL); if (bitmap_init(&ror_scan->covered_fields, bitmap_buf, @@ -2525,12 +3652,13 @@ static ROR_INTERSECT_INFO* ror_intersect_init(const PARAM *param) { ROR_INTERSECT_INFO *info; - uchar* buf; + uint32* buf; if (!(info= (ROR_INTERSECT_INFO*)alloc_root(param->mem_root, sizeof(ROR_INTERSECT_INFO)))) return NULL; info->param= param; - if (!(buf= (uchar*)alloc_root(param->mem_root, param->fields_bitmap_size))) + if (!(buf= (uint32*)alloc_root(param->mem_root, + param->fields_bitmap_size))) return NULL; if (bitmap_init(&info->covered_fields, buf, param->fields_bitmap_size*8, FALSE)) @@ -2547,7 +3675,7 @@ void ror_intersect_cpy(ROR_INTERSECT_INFO *dst, const ROR_INTERSECT_INFO *src) { dst->param= src->param; memcpy(dst->covered_fields.bitmap, src->covered_fields.bitmap, - src->covered_fields.bitmap_size); + no_bytes_in_map(&src->covered_fields)); dst->out_rows= src->out_rows; dst->is_covering= src->is_covering; dst->index_records= src->index_records; @@ -3089,9 +4217,9 @@ TRP_ROR_INTERSECT *get_best_covering_ror_intersect(PARAM *param, /*I=set of all covering indexes */ ror_scan_mark= tree->ror_scans; - uchar buf[MAX_KEY/8+1]; + uint32 int_buf[MAX_KEY/32+1]; MY_BITMAP covered_fields; - if (bitmap_init(&covered_fields, buf, nbits, FALSE)) + if (bitmap_init(&covered_fields, int_buf, nbits, FALSE)) DBUG_RETURN(0); bitmap_clear_all(&covered_fields); @@ -3418,7 +4546,7 @@ QUICK_SELECT_I *TRP_ROR_UNION::make_quick(PARAM *param, 0 on error */ -static SEL_TREE *get_ne_mm_tree(PARAM *param, Item_func *cond_func, +static SEL_TREE *get_ne_mm_tree(RANGE_OPT_PARAM *param, Item_func *cond_func, Field *field, Item *lt_value, Item *gt_value, Item_result cmp_type) @@ -3453,7 +4581,7 @@ static SEL_TREE *get_ne_mm_tree(PARAM *param, Item_func *cond_func, Pointer to the tree built tree */ -static SEL_TREE *get_func_mm_tree(PARAM *param, Item_func *cond_func, +static SEL_TREE *get_func_mm_tree(RANGE_OPT_PARAM *param, Item_func *cond_func, Field *field, Item *value, Item_result cmp_type, bool inv) { @@ -3546,7 +4674,7 @@ static SEL_TREE *get_func_mm_tree(PARAM *param, Item_func *cond_func, /* make a select tree of all keys in condition */ -static SEL_TREE *get_mm_tree(PARAM *param,COND *cond) +static SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param,COND *cond) { SEL_TREE *tree=0; SEL_TREE *ftree= 0; @@ -3719,7 +4847,7 @@ static SEL_TREE *get_mm_tree(PARAM *param,COND *cond) static SEL_TREE * -get_mm_parts(PARAM *param, COND *cond_func, Field *field, +get_mm_parts(RANGE_OPT_PARAM *param, COND *cond_func, Field *field, Item_func::Functype type, Item *value, Item_result cmp_type) { @@ -3769,7 +4897,7 @@ get_mm_parts(PARAM *param, COND *cond_func, Field *field, static SEL_ARG * -get_mm_leaf(PARAM *param, COND *conf_func, Field *field, KEY_PART *key_part, +get_mm_leaf(RANGE_OPT_PARAM *param, COND *conf_func, Field *field, KEY_PART *key_part, Item_func::Functype type,Item *value) { uint maybe_null=(uint) field->real_maybe_null(); @@ -3828,8 +4956,11 @@ get_mm_leaf(PARAM *param, COND *conf_func, Field *field, KEY_PART *key_part, !(conf_func->compare_collation()->state & MY_CS_BINSORT)) goto end; - optimize_range= field->optimize_range(param->real_keynr[key_part->key], - key_part->part); + if (param->using_real_indexes) + optimize_range= field->optimize_range(param->real_keynr[key_part->key], + key_part->part); + else + optimize_range= TRUE; if (type == Item_func::LIKE_FUNC) { @@ -4096,7 +5227,7 @@ sel_add(SEL_ARG *key1,SEL_ARG *key2) static SEL_TREE * -tree_and(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2) +tree_and(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2) { DBUG_ENTER("tree_and"); if (!tree1) @@ -4166,7 +5297,8 @@ tree_and(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2) using index_merge. */ -bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2, PARAM* param) +bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2, + RANGE_OPT_PARAM* param) { key_map common_keys= tree1->keys_map; DBUG_ENTER("sel_trees_can_be_ored"); @@ -4192,8 +5324,84 @@ bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2, PARAM* param) DBUG_RETURN(FALSE); } + +/* + Remove the trees that are not suitable for record retrieval. + SYNOPSIS + param Range analysis parameter + tree Tree to be processed, tree->type is KEY or KEY_SMALLER + + DESCRIPTION + This function walks through tree->keys[] and removes the SEL_ARG* trees + that are not "maybe" trees (*) and cannot be used to construct quick range + selects. + (*) - have type MAYBE or MAYBE_KEY. Perhaps we should remove trees of + these types here as well. + + A SEL_ARG* tree cannot be used to construct quick select if it has + tree->part != 0. (e.g. it could represent "keypart2 < const"). + + WHY THIS FUNCTION IS NEEDED + + Normally we allow construction of SEL_TREE objects that have SEL_ARG + trees that do not allow quick range select construction. For example for + " keypart1=1 AND keypart2=2 " the execution will proceed as follows: + tree1= SEL_TREE { SEL_ARG{keypart1=1} } + tree2= SEL_TREE { SEL_ARG{keypart2=2} } -- can't make quick range select + from this + call tree_and(tree1, tree2) -- this joins SEL_ARGs into a usable SEL_ARG + tree. + + There is an exception though: when we construct index_merge SEL_TREE, + any SEL_ARG* tree that cannot be used to construct quick range select can + be removed, because current range analysis code doesn't provide any way + that tree could be later combined with another tree. + Consider an example: we should not construct + st1 = SEL_TREE { + merges = SEL_IMERGE { + SEL_TREE(t.key1part1 = 1), + SEL_TREE(t.key2part2 = 2) -- (*) + } + }; + because + - (*) cannot be used to construct quick range select, + - There is no execution path that would cause (*) to be converted to + a tree that could be used. + + The latter is easy to verify: first, notice that the only way to convert + (*) into a usable tree is to call tree_and(something, (*)). + + Second look at what tree_and/tree_or function would do when passed a + SEL_TREE that has the structure like st1 tree has, and conlcude that + tree_and(something, (*)) will not be called. + + RETURN + 0 Ok, some suitable trees left + 1 No tree->keys[] left. +*/ + +static bool remove_nonrange_trees(RANGE_OPT_PARAM *param, SEL_TREE *tree) +{ + bool res= FALSE; + for (uint i=0; i < param->keys; i++) + { + if (tree->keys[i]) + { + if (tree->keys[i]->part) + { + tree->keys[i]= NULL; + tree->keys_map.clear_bit(i); + } + else + res= TRUE; + } + } + return !res; +} + + static SEL_TREE * -tree_or(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2) +tree_or(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2) { DBUG_ENTER("tree_or"); if (!tree1 || !tree2) @@ -4235,6 +5443,13 @@ tree_or(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2) /* ok, two trees have KEY type but cannot be used without index merge */ if (tree1->merges.is_empty() && tree2->merges.is_empty()) { + if (param->remove_jump_scans) + { + bool no_trees= remove_nonrange_trees(param, tree1); + no_trees= no_trees || remove_nonrange_trees(param, tree2); + if (no_trees) + DBUG_RETURN(new SEL_TREE(SEL_TREE::ALWAYS)); + } SEL_IMERGE *merge; /* both trees are "range" trees, produce new index merge structure */ if (!(result= new SEL_TREE()) || !(merge= new SEL_IMERGE()) || @@ -4257,7 +5472,9 @@ tree_or(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2) /* one tree is index merge tree and another is range tree */ if (tree1->merges.is_empty()) swap_variables(SEL_TREE*, tree1, tree2); - + + if (param->remove_jump_scans && remove_nonrange_trees(param, tree2)) + DBUG_RETURN(new SEL_TREE(SEL_TREE::ALWAYS)); /* add tree2 to tree1->merges, checking if it collapses to ALWAYS */ if (imerge_list_or_tree(param, &tree1->merges, tree2)) result= new SEL_TREE(SEL_TREE::ALWAYS); @@ -5877,7 +7094,7 @@ int QUICK_INDEX_MERGE_SELECT::read_keys_and_merge() (This also creates a deficiency - it is possible that we will retrieve parts of key that are not used by current query at all.) */ - if (head->file->extra(HA_EXTRA_RETRIEVE_PRIMARY_KEY)) + if (head->file->ha_retrieve_all_pk()) DBUG_RETURN(1); cur_quick_it.rewind(); @@ -6147,7 +7364,7 @@ int QUICK_RANGE_SELECT::reset() in_range= FALSE; cur_range= (QUICK_RANGE**) ranges.buffer; - if (file->inited == handler::NONE && (error= file->ha_index_init(index))) + if (file->inited == handler::NONE && (error= file->ha_index_init(index,1))) DBUG_RETURN(error); /* Do not allocate the buffers twice. */ @@ -6202,6 +7419,14 @@ int QUICK_RANGE_SELECT::reset() multi_range_buff->buffer= mrange_buff; multi_range_buff->buffer_end= mrange_buff + mrange_bufsiz; multi_range_buff->end_of_used_area= mrange_buff; +#ifdef HAVE_purify + /* + We need this until ndb will use the buffer efficiently + (Now ndb stores complete row in here, instead of only the used fields + which gives us valgrind warnings in compare_record[]) + */ + bzero((char*) mrange_buff, mrange_bufsiz); +#endif } DBUG_RETURN(0); } @@ -6406,7 +7631,7 @@ int QUICK_RANGE_SELECT_GEOM::get_next() (byte*) range->min_key, range->min_length, (ha_rkey_function)(range->flag ^ GEOM_FLAG)); - if (result != HA_ERR_KEY_NOT_FOUND) + if (result != HA_ERR_KEY_NOT_FOUND && result != HA_ERR_END_OF_FILE) DBUG_RETURN(result); range=0; // Not found, to next range } @@ -6549,7 +7774,7 @@ int QUICK_SELECT_DESC::get_next() } if (result) { - if (result != HA_ERR_KEY_NOT_FOUND) + if (result != HA_ERR_KEY_NOT_FOUND && result != HA_ERR_END_OF_FILE) DBUG_RETURN(result); range=0; // Not found, to next range continue; @@ -8230,7 +9455,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::reset(void) DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::reset"); file->extra(HA_EXTRA_KEYREAD); /* We need only the key attributes */ - result= file->ha_index_init(index); + result= file->ha_index_init(index, 1); result= file->index_last(record); if (result == HA_ERR_END_OF_FILE) DBUG_RETURN(0); @@ -8306,7 +9531,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::get_next() DBUG_ASSERT(is_last_prefix <= 0); if (result == HA_ERR_KEY_NOT_FOUND) continue; - else if (result) + if (result) break; if (have_min) @@ -8336,10 +9561,11 @@ int QUICK_GROUP_MIN_MAX_SELECT::get_next() HA_READ_KEY_EXACT); result= have_min ? min_res : have_max ? max_res : result; - } - while (result == HA_ERR_KEY_NOT_FOUND && is_last_prefix != 0); + } while ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) && + is_last_prefix != 0); if (result == 0) + { /* Partially mimic the behavior of end_select_send. Copy the field data from Item_field::field into Item_field::result_field @@ -8347,6 +9573,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::get_next() other fields in non-ANSI SQL mode). */ copy_fields(&join->tmp_table_param); + } else if (result == HA_ERR_KEY_NOT_FOUND) result= HA_ERR_END_OF_FILE; @@ -8373,6 +9600,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::get_next() RETURN 0 on success HA_ERR_KEY_NOT_FOUND if no MIN key was found that fulfills all conditions. + HA_ERR_END_OF_FILE - "" - other if some error occurred */ @@ -8426,7 +9654,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_min() if (key_cmp(index_info->key_part, group_prefix, real_prefix_len)) key_restore(record, tmp_record, index_info, 0); } - else if (result == HA_ERR_KEY_NOT_FOUND) + else if (result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) result= 0; /* There is a result in any case. */ } } @@ -8451,6 +9679,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_min() RETURN 0 on success HA_ERR_KEY_NOT_FOUND if no MAX key was found that fulfills all conditions. + HA_ERR_END_OF_FILE - "" - other if some error occurred */ @@ -8551,6 +9780,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_prefix() 0 on success HA_ERR_KEY_NOT_FOUND if there is no key with the given prefix in any of the ranges + HA_ERR_END_OF_FILE - "" - other if some error */ @@ -8595,11 +9825,12 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_min_in_range() result= file->index_read(record, group_prefix, search_prefix_len, find_flag); - if ((result == HA_ERR_KEY_NOT_FOUND) && - (cur_range->flag & (EQ_RANGE | NULL_RANGE))) - continue; /* Check the next range. */ - else if (result) + if (result) { + if ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) && + (cur_range->flag & (EQ_RANGE | NULL_RANGE))) + continue; /* Check the next range. */ + /* In all other cases (HA_ERR_*, HA_READ_KEY_EXACT with NO_MIN_RANGE, HA_READ_AFTER_KEY, HA_READ_KEY_OR_NEXT) if the lookup failed for this @@ -8626,7 +9857,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_min_in_range() /* Check if record belongs to the current group. */ if (key_cmp(index_info->key_part, group_prefix, real_prefix_len)) { - result = HA_ERR_KEY_NOT_FOUND; + result= HA_ERR_KEY_NOT_FOUND; continue; } @@ -8644,7 +9875,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_min_in_range() if (!((cur_range->flag & NEAR_MAX) && (cmp_res == -1) || (cmp_res <= 0))) { - result = HA_ERR_KEY_NOT_FOUND; + result= HA_ERR_KEY_NOT_FOUND; continue; } } @@ -8683,6 +9914,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_min_in_range() 0 on success HA_ERR_KEY_NOT_FOUND if there is no key with the given prefix in any of the ranges + HA_ERR_END_OF_FILE - "" - other if some error */ @@ -8728,10 +9960,12 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_max_in_range() result= file->index_read(record, group_prefix, search_prefix_len, find_flag); - if ((result == HA_ERR_KEY_NOT_FOUND) && (cur_range->flag & EQ_RANGE)) - continue; /* Check the next range. */ if (result) { + if ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) && + (cur_range->flag & EQ_RANGE)) + continue; /* Check the next range. */ + /* In no key was found with this upper bound, there certainly are no keys in the ranges to the left. diff --git a/sql/opt_range.h b/sql/opt_range.h index f84058f3b64..3cd348ba9af 100644 --- a/sql/opt_range.h +++ b/sql/opt_range.h @@ -249,6 +249,7 @@ public: struct st_qsel_param; +class PARAM; class SEL_ARG; /* @@ -283,12 +284,12 @@ protected: QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table, struct st_table_ref *ref, ha_rows records); - friend bool get_quick_keys(struct st_qsel_param *param, + friend bool get_quick_keys(PARAM *param, QUICK_RANGE_SELECT *quick,KEY_PART *key, SEL_ARG *key_tree, char *min_key, uint min_key_flag, char *max_key, uint max_key_flag); - friend QUICK_RANGE_SELECT *get_quick_select(struct st_qsel_param*,uint idx, + friend QUICK_RANGE_SELECT *get_quick_select(PARAM*,uint idx, SEL_ARG *key_tree, MEM_ROOT *alloc); friend class QUICK_SELECT_DESC; @@ -718,4 +719,8 @@ QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table, ha_rows records); uint get_index_for_order(TABLE *table, ORDER *order, ha_rows limit); +#ifdef WITH_PARTITION_STORAGE_ENGINE +bool prune_partitions(THD *thd, TABLE *table, Item *pprune_cond); +#endif + #endif diff --git a/sql/opt_sum.cc b/sql/opt_sum.cc index 37acce2934b..2e87f9cf0db 100644 --- a/sql/opt_sum.cc +++ b/sql/opt_sum.cc @@ -205,7 +205,7 @@ int opt_sum_query(TABLE_LIST *tables, List<Item> &all_fields,COND *conds) const_result= 0; break; } - error= table->file->ha_index_init((uint) ref.key); + error= table->file->ha_index_init((uint) ref.key, 1); if (!ref.key_length) error= table->file->index_first(table->record[0]); @@ -292,7 +292,7 @@ int opt_sum_query(TABLE_LIST *tables, List<Item> &all_fields,COND *conds) const_result= 0; break; } - error= table->file->ha_index_init((uint) ref.key); + error= table->file->ha_index_init((uint) ref.key, 1); if (!ref.key_length) error= table->file->index_last(table->record[0]); diff --git a/sql/parse_file.cc b/sql/parse_file.cc index 041b770ac0b..3cddc879825 100644 --- a/sql/parse_file.cc +++ b/sql/parse_file.cc @@ -226,8 +226,20 @@ sql_create_definition_file(const LEX_STRING *dir, const LEX_STRING *file_name, DBUG_PRINT("enter", ("Dir: %s, file: %s, base 0x%lx", dir->str, file_name->str, (ulong) base)); - fn_format(path, file_name->str, dir->str, 0, MY_UNPACK_FILENAME); - path_end= strlen(path); + if (dir) + { + fn_format(path, file_name->str, dir->str, 0, MY_UNPACK_FILENAME); + path_end= strlen(path); + } + else + { + /* + if not dir is passed, it means file_name is a full path, + including dir name, file name itself, and an extension, + and with unpack_filename() executed over it. + */ + path_end= strxnmov(path, FN_REFLEN, file_name->str, NullS) - path; + } // temporary file name path[path_end]='~'; @@ -355,11 +367,11 @@ my_bool rename_in_schema_file(const char *schema, const char *old_name, { char old_path[FN_REFLEN], new_path[FN_REFLEN], arc_path[FN_REFLEN]; - strxnmov(old_path, FN_REFLEN, mysql_data_home, "/", schema, "/", + strxnmov(old_path, FN_REFLEN-1, mysql_data_home, "/", schema, "/", old_name, reg_ext, NullS); (void) unpack_filename(old_path, old_path); - strxnmov(new_path, FN_REFLEN, mysql_data_home, "/", schema, "/", + strxnmov(new_path, FN_REFLEN-1, mysql_data_home, "/", schema, "/", new_name, reg_ext, NullS); (void) unpack_filename(new_path, new_path); @@ -367,7 +379,7 @@ my_bool rename_in_schema_file(const char *schema, const char *old_name, return 1; /* check if arc_dir exists */ - strxnmov(arc_path, FN_REFLEN, mysql_data_home, "/", schema, "/arc", NullS); + strxnmov(arc_path, FN_REFLEN-1, mysql_data_home, "/", schema, "/arc", NullS); (void) unpack_filename(arc_path, arc_path); if (revision > 0 && !access(arc_path, F_OK)) @@ -414,7 +426,7 @@ sql_parse_prepare(const LEX_STRING *file_name, MEM_ROOT *mem_root, char *end, *sign; File_parser *parser; File file; - DBUG_ENTER("sql__parse_prepare"); + DBUG_ENTER("sql_parse_prepare"); if (!my_stat(file_name->str, &stat_info, MYF(MY_WME))) { diff --git a/sql/records.cc b/sql/records.cc index b352f9f395a..5cb9b1e5c47 100644 --- a/sql/records.cc +++ b/sql/records.cc @@ -65,7 +65,7 @@ void init_read_record_idx(READ_RECORD *info, THD *thd, TABLE *table, table->status=0; /* And it's always found */ if (!table->file->inited) { - table->file->ha_index_init(idx); + table->file->ha_index_init(idx, 1); table->file->extra(HA_EXTRA_RETRIEVE_PRIMARY_KEY); } /* read_record will be changed to rr_index in rr_index_first */ @@ -73,8 +73,74 @@ void init_read_record_idx(READ_RECORD *info, THD *thd, TABLE *table, } -/* init struct for read with info->read_record */ - +/* + init_read_record is used to scan by using a number of different methods. + Which method to use is set-up in this call so that later calls to + the info->read_record will call the appropriate method using a function + pointer. + + There are five methods that relate completely to the sort function + filesort. The result of a filesort is retrieved using read_record + calls. The other two methods are used for normal table access. + + The filesort will produce references to the records sorted, these + references can be stored in memory or in a temporary file. + + The temporary file is normally used when the references doesn't fit into + a properly sized memory buffer. For most small queries the references + are stored in the memory buffer. + + The temporary file is also used when performing an update where a key is + modified. + + Methods used when ref's are in memory (using rr_from_pointers): + rr_unpack_from_buffer: + ---------------------- + This method is used when table->sort.addon_field is allocated. + This is allocated for most SELECT queries not involving any BLOB's. + In this case the records are fetched from a memory buffer. + rr_from_pointers: + ----------------- + Used when the above is not true, UPDATE, DELETE and so forth and + SELECT's involving BLOB's. It is also used when the addon_field + buffer is not allocated due to that its size was bigger than the + session variable max_length_for_sort_data. + In this case the record data is fetched from the handler using the + saved reference using the rnd_pos handler call. + + Methods used when ref's are in a temporary file (using rr_from_tempfile) + rr_unpack_from_tempfile: + ------------------------ + Same as rr_unpack_from_buffer except that references are fetched from + temporary file. Should obviously not really happen other than in + strange configurations. + + rr_from_tempfile: + ----------------- + Same as rr_from_pointers except that references are fetched from + temporary file instead of from + rr_from_cache: + -------------- + This is a special variant of rr_from_tempfile that can be used for + handlers that is not using the HA_FAST_KEY_READ table flag. Instead + of reading the references one by one from the temporary file it reads + a set of them, sorts them and reads all of them into a buffer which + is then used for a number of subsequent calls to rr_from_cache. + It is only used for SELECT queries and a number of other conditions + on table size. + + All other accesses use either index access methods (rr_quick) or a full + table scan (rr_sequential). + rr_quick: + --------- + rr_quick uses one of the QUICK_SELECT classes in opt_range.cc to + perform an index scan. There are loads of functionality hidden + in these quick classes. It handles all index scans of various kinds. + rr_sequential: + -------------- + This is the most basic access method of a table using rnd_init, + rnd_next and rnd_end. No indexes are used. +*/ void init_read_record(READ_RECORD *info,THD *thd, TABLE *table, SQL_SELECT *select, int use_record_cache, bool print_error) @@ -87,6 +153,10 @@ void init_read_record(READ_RECORD *info,THD *thd, TABLE *table, info->table=table; info->file= table->file; info->forms= &info->table; /* Only one table */ + + if (table->s->tmp_table == TMP_TABLE && !table->sort.addon_field) + VOID(table->file->extra(HA_EXTRA_MMAP)); + if (table->sort.addon_field) { info->rec_buf= table->sort.addon_buf; diff --git a/sql/repl_failsafe.cc b/sql/repl_failsafe.cc index d76be2ec2e4..960e0ac86cc 100644 --- a/sql/repl_failsafe.cc +++ b/sql/repl_failsafe.cc @@ -20,6 +20,7 @@ #include "repl_failsafe.h" #include "sql_repl.h" #include "slave.h" +#include "rpl_filter.h" #include "log_event.h" #include <mysql.h> @@ -732,14 +733,14 @@ static int fetch_db_tables(THD *thd, MYSQL *mysql, const char *db, TABLE_LIST table; const char* table_name= row[0]; int error; - if (table_rules_on) + if (rpl_filter->is_on()) { bzero((char*) &table, sizeof(table)); //just for safe table.db= (char*) db; table.table_name= (char*) table_name; table.updating= 1; - if (!tables_ok(thd, &table)) + if (!rpl_filter->tables_ok(thd->db, &table)) continue; } /* download master's table and overwrite slave's table */ @@ -858,8 +859,8 @@ bool load_master_data(THD* thd) data from master */ - if (!db_ok(db, replicate_do_db, replicate_ignore_db) || - !db_ok_with_wild_table(db) || + if (!rpl_filter->db_ok(db) || + !rpl_filter->db_ok_with_wild_table(db) || !strcmp(db,"mysql")) { *cur_table_res = 0; diff --git a/sql/rpl_filter.cc b/sql/rpl_filter.cc new file mode 100644 index 00000000000..143cd027b5f --- /dev/null +++ b/sql/rpl_filter.cc @@ -0,0 +1,546 @@ +/* Copyright (C) 2000-2003 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "mysql_priv.h" +#include "rpl_filter.h" + +#define TABLE_RULE_HASH_SIZE 16 +#define TABLE_RULE_ARR_SIZE 16 + +Rpl_filter::Rpl_filter() : + table_rules_on(0), do_table_inited(0), ignore_table_inited(0), + wild_do_table_inited(0), wild_ignore_table_inited(0) +{ + do_db.empty(); + ignore_db.empty(); + rewrite_db.empty(); +} + + +Rpl_filter::~Rpl_filter() +{ + if (do_table_inited) + hash_free(&do_table); + if (ignore_table_inited) + hash_free(&ignore_table); + if (wild_do_table_inited) + free_string_array(&wild_do_table); + if (wild_ignore_table_inited) + free_string_array(&wild_ignore_table); + free_list(&do_db); + free_list(&ignore_db); + free_list(&rewrite_db); +} + + +/* + Returns true if table should be logged/replicated + + SYNOPSIS + tables_ok() + db db to use if db in TABLE_LIST is undefined for a table + tables list of tables to check + + NOTES + Changing table order in the list can lead to different results. + + Note also order of precedence of do/ignore rules (see code). For + that reason, users should not set conflicting rules because they + may get unpredicted results (precedence order is explained in the + manual). + + If no table in the list is marked "updating", then we always + return 0, because there is no reason to execute this statement on + slave if it updates nothing. (Currently, this can only happen if + statement is a multi-delete (SQLCOM_DELETE_MULTI) and "tables" are + the tables in the FROM): + + In the case of SQLCOM_DELETE_MULTI, there will be a second call to + tables_ok(), with tables having "updating==TRUE" (those after the + DELETE), so this second call will make the decision (because + all_tables_not_ok() = !tables_ok(1st_list) && + !tables_ok(2nd_list)). + + TODO + "Include all tables like "abc.%" except "%.EFG"". (Can't be done now.) + If we supported Perl regexps, we could do it with pattern: /^abc\.(?!EFG)/ + (I could not find an equivalent in the regex library MySQL uses). + + RETURN VALUES + 0 should not be logged/replicated + 1 should be logged/replicated +*/ + +bool +Rpl_filter::tables_ok(const char* db, TABLE_LIST* tables) +{ + bool some_tables_updating= 0; + DBUG_ENTER("Rpl_filter::tables_ok"); + + for (; tables; tables= tables->next_global) + { + char hash_key[2*NAME_LEN+2]; + char *end; + uint len; + + if (!tables->updating) + continue; + some_tables_updating= 1; + end= strmov(hash_key, tables->db ? tables->db : db); + *end++= '.'; + len= (uint) (strmov(end, tables->table_name) - hash_key); + if (do_table_inited) // if there are any do's + { + if (hash_search(&do_table, (byte*) hash_key, len)) + DBUG_RETURN(1); + } + if (ignore_table_inited) // if there are any ignores + { + if (hash_search(&ignore_table, (byte*) hash_key, len)) + DBUG_RETURN(0); + } + if (wild_do_table_inited && + find_wild(&wild_do_table, hash_key, len)) + DBUG_RETURN(1); + if (wild_ignore_table_inited && + find_wild(&wild_ignore_table, hash_key, len)) + DBUG_RETURN(0); + } + + /* + If no table was to be updated, ignore statement (no reason we play it on + slave, slave is supposed to replicate _changes_ only). + If no explicit rule found and there was a do list, do not replicate. + If there was no do list, go ahead + */ + DBUG_RETURN(some_tables_updating && + !do_table_inited && !wild_do_table_inited); +} + + +/* + Checks whether a db matches some do_db and ignore_db rules + + SYNOPSIS + db_ok() + db name of the db to check + + RETURN VALUES + 0 should not be logged/replicated + 1 should be logged/replicated +*/ + +bool +Rpl_filter::db_ok(const char* db) +{ + DBUG_ENTER("Rpl_filter::db_ok"); + + if (do_db.is_empty() && ignore_db.is_empty()) + DBUG_RETURN(1); // Ok to replicate if the user puts no constraints + + /* + If the user has specified restrictions on which databases to replicate + and db was not selected, do not replicate. + */ + if (!db) + DBUG_RETURN(0); + + if (!do_db.is_empty()) // if the do's are not empty + { + I_List_iterator<i_string> it(do_db); + i_string* tmp; + + while ((tmp=it++)) + { + if (!strcmp(tmp->ptr, db)) + DBUG_RETURN(1); // match + } + DBUG_RETURN(0); + } + else // there are some elements in the don't, otherwise we cannot get here + { + I_List_iterator<i_string> it(ignore_db); + i_string* tmp; + + while ((tmp=it++)) + { + if (!strcmp(tmp->ptr, db)) + DBUG_RETURN(0); // match + } + DBUG_RETURN(1); + } +} + + +/* + Checks whether a db matches wild_do_table and wild_ignore_table + rules (for replication) + + SYNOPSIS + db_ok_with_wild_table() + db name of the db to check. + Is tested with check_db_name() before calling this function. + + NOTES + Here is the reason for this function. + We advise users who want to exclude a database 'db1' safely to do it + with replicate_wild_ignore_table='db1.%' instead of binlog_ignore_db or + replicate_ignore_db because the two lasts only check for the selected db, + which won't work in that case: + USE db2; + UPDATE db1.t SET ... #this will be replicated and should not + whereas replicate_wild_ignore_table will work in all cases. + With replicate_wild_ignore_table, we only check tables. When + one does 'DROP DATABASE db1', tables are not involved and the + statement will be replicated, while users could expect it would not (as it + rougly means 'DROP db1.first_table, DROP db1.second_table...'). + In other words, we want to interpret 'db1.%' as "everything touching db1". + That is why we want to match 'db1' against 'db1.%' wild table rules. + + RETURN VALUES + 0 should not be logged/replicated + 1 should be logged/replicated +*/ + +bool +Rpl_filter::db_ok_with_wild_table(const char *db) +{ + DBUG_ENTER("Rpl_filter::db_ok_with_wild_table"); + + char hash_key[NAME_LEN+2]; + char *end; + int len; + end= strmov(hash_key, db); + *end++= '.'; + len= end - hash_key ; + if (wild_do_table_inited && find_wild(&wild_do_table, hash_key, len)) + { + DBUG_PRINT("return",("1")); + DBUG_RETURN(1); + } + if (wild_ignore_table_inited && find_wild(&wild_ignore_table, hash_key, len)) + { + DBUG_PRINT("return",("0")); + DBUG_RETURN(0); + } + + /* + If no explicit rule found and there was a do list, do not replicate. + If there was no do list, go ahead + */ + DBUG_PRINT("return",("db=%s,retval=%d", db, !wild_do_table_inited)); + DBUG_RETURN(!wild_do_table_inited); +} + + +bool +Rpl_filter::is_on() +{ + return table_rules_on; +} + + +int +Rpl_filter::add_do_table(const char* table_spec) +{ + DBUG_ENTER("Rpl_filter::add_do_table"); + if (!do_table_inited) + init_table_rule_hash(&do_table, &do_table_inited); + table_rules_on= 1; + DBUG_RETURN(add_table_rule(&do_table, table_spec)); +} + + +int +Rpl_filter::add_ignore_table(const char* table_spec) +{ + DBUG_ENTER("Rpl_filter::add_ignore_table"); + if (!ignore_table_inited) + init_table_rule_hash(&ignore_table, &ignore_table_inited); + table_rules_on= 1; + DBUG_RETURN(add_table_rule(&ignore_table, table_spec)); +} + + +int +Rpl_filter::add_wild_do_table(const char* table_spec) +{ + DBUG_ENTER("Rpl_filter::add_wild_do_table"); + if (!wild_do_table_inited) + init_table_rule_array(&wild_do_table, &wild_do_table_inited); + table_rules_on= 1; + DBUG_RETURN(add_wild_table_rule(&wild_do_table, table_spec)); +} + + +int +Rpl_filter::add_wild_ignore_table(const char* table_spec) +{ + DBUG_ENTER("Rpl_filter::add_wild_ignore_table"); + if (!wild_ignore_table_inited) + init_table_rule_array(&wild_ignore_table, &wild_ignore_table_inited); + table_rules_on= 1; + DBUG_RETURN(add_wild_table_rule(&wild_ignore_table, table_spec)); +} + + +void +Rpl_filter::add_db_rewrite(const char* from_db, const char* to_db) +{ + i_string_pair *db_pair = new i_string_pair(from_db, to_db); + rewrite_db.push_back(db_pair); +} + + +int +Rpl_filter::add_table_rule(HASH* h, const char* table_spec) +{ + const char* dot = strchr(table_spec, '.'); + if (!dot) return 1; + // len is always > 0 because we know the there exists a '.' + uint len = (uint)strlen(table_spec); + TABLE_RULE_ENT* e = (TABLE_RULE_ENT*)my_malloc(sizeof(TABLE_RULE_ENT) + + len, MYF(MY_WME)); + if (!e) return 1; + e->db= (char*)e + sizeof(TABLE_RULE_ENT); + e->tbl_name= e->db + (dot - table_spec) + 1; + e->key_len= len; + memcpy(e->db, table_spec, len); + + return my_hash_insert(h, (byte*)e); +} + + +/* + Add table expression with wildcards to dynamic array +*/ + +int +Rpl_filter::add_wild_table_rule(DYNAMIC_ARRAY* a, const char* table_spec) +{ + const char* dot = strchr(table_spec, '.'); + if (!dot) return 1; + uint len = (uint)strlen(table_spec); + TABLE_RULE_ENT* e = (TABLE_RULE_ENT*)my_malloc(sizeof(TABLE_RULE_ENT) + + len, MYF(MY_WME)); + if (!e) return 1; + e->db= (char*)e + sizeof(TABLE_RULE_ENT); + e->tbl_name= e->db + (dot - table_spec) + 1; + e->key_len= len; + memcpy(e->db, table_spec, len); + insert_dynamic(a, (gptr)&e); + return 0; +} + + +void +Rpl_filter::add_do_db(const char* table_spec) +{ + DBUG_ENTER("Rpl_filter::add_do_db"); + i_string *db = new i_string(table_spec); + do_db.push_back(db); +} + + +void +Rpl_filter::add_ignore_db(const char* table_spec) +{ + DBUG_ENTER("Rpl_filter::add_ignore_db"); + i_string *db = new i_string(table_spec); + ignore_db.push_back(db); +} + + +static byte* get_table_key(const byte* a, uint* len, + my_bool __attribute__((unused))) +{ + TABLE_RULE_ENT *e= (TABLE_RULE_ENT *) a; + + *len= e->key_len; + return (byte*)e->db; +} + + +static void free_table_ent(void* a) +{ + TABLE_RULE_ENT *e= (TABLE_RULE_ENT *) a; + + my_free((gptr) e, MYF(0)); +} + + +void +Rpl_filter::init_table_rule_hash(HASH* h, bool* h_inited) +{ + hash_init(h, system_charset_info,TABLE_RULE_HASH_SIZE,0,0, + get_table_key, free_table_ent, 0); + *h_inited = 1; +} + + +void +Rpl_filter::init_table_rule_array(DYNAMIC_ARRAY* a, bool* a_inited) +{ + my_init_dynamic_array(a, sizeof(TABLE_RULE_ENT*), TABLE_RULE_ARR_SIZE, + TABLE_RULE_ARR_SIZE); + *a_inited = 1; +} + + +TABLE_RULE_ENT* +Rpl_filter::find_wild(DYNAMIC_ARRAY *a, const char* key, int len) +{ + uint i; + const char* key_end= key + len; + + for (i= 0; i < a->elements; i++) + { + TABLE_RULE_ENT* e ; + get_dynamic(a, (gptr)&e, i); + if (!my_wildcmp(system_charset_info, key, key_end, + (const char*)e->db, + (const char*)(e->db + e->key_len), + '\\',wild_one,wild_many)) + return e; + } + + return 0; +} + + +void +Rpl_filter::free_string_array(DYNAMIC_ARRAY *a) +{ + uint i; + for (i= 0; i < a->elements; i++) + { + char* p; + get_dynamic(a, (gptr) &p, i); + my_free(p, MYF(MY_WME)); + } + delete_dynamic(a); +} + + +/* + Builds a String from a HASH of TABLE_RULE_ENT. Cannot be used for any other + hash, as it assumes that the hash entries are TABLE_RULE_ENT. + + SYNOPSIS + table_rule_ent_hash_to_str() + s pointer to the String to fill + h pointer to the HASH to read + + RETURN VALUES + none +*/ + +void +Rpl_filter::table_rule_ent_hash_to_str(String* s, HASH* h, bool inited) +{ + s->length(0); + if (inited) + { + for (uint i= 0; i < h->records; i++) + { + TABLE_RULE_ENT* e= (TABLE_RULE_ENT*) hash_element(h, i); + if (s->length()) + s->append(','); + s->append(e->db,e->key_len); + } + } +} + + +void +Rpl_filter::table_rule_ent_dynamic_array_to_str(String* s, DYNAMIC_ARRAY* a, + bool inited) +{ + s->length(0); + if (inited) + { + for (uint i= 0; i < a->elements; i++) + { + TABLE_RULE_ENT* e; + get_dynamic(a, (gptr)&e, i); + if (s->length()) + s->append(','); + s->append(e->db,e->key_len); + } + } +} + + +void +Rpl_filter::get_do_table(String* str) +{ + table_rule_ent_hash_to_str(str, &do_table, do_table_inited); +} + + +void +Rpl_filter::get_ignore_table(String* str) +{ + table_rule_ent_hash_to_str(str, &ignore_table, ignore_table_inited); +} + + +void +Rpl_filter::get_wild_do_table(String* str) +{ + table_rule_ent_dynamic_array_to_str(str, &wild_do_table, wild_do_table_inited); +} + + +void +Rpl_filter::get_wild_ignore_table(String* str) +{ + table_rule_ent_dynamic_array_to_str(str, &wild_ignore_table, wild_ignore_table_inited); +} + + +const char* +Rpl_filter::get_rewrite_db(const char* db, uint32 *new_len) +{ + if (rewrite_db.is_empty() || !db) + return db; + I_List_iterator<i_string_pair> it(rewrite_db); + i_string_pair* tmp; + + while ((tmp=it++)) + { + if (!strcmp(tmp->key, db)) + { + *new_len= strlen(tmp->val); + return tmp->val; + } + } + return db; +} + + +I_List<i_string>* +Rpl_filter::get_do_db() +{ + return &do_db; +} + + +I_List<i_string>* +Rpl_filter::get_ignore_db() +{ + return &ignore_db; +} diff --git a/sql/rpl_filter.h b/sql/rpl_filter.h new file mode 100644 index 00000000000..58d2b97c9c6 --- /dev/null +++ b/sql/rpl_filter.h @@ -0,0 +1,117 @@ +/* Copyright (C) 2000-2003 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef RPL_FILTER_H +#define RPL_FILTER_H + +#include "mysql.h" + +typedef struct st_table_rule_ent +{ + char* db; + char* tbl_name; + uint key_len; +} TABLE_RULE_ENT; + +/* + Rpl_filter + + Inclusion and exclusion rules of tables and databases. + Also handles rewrites of db. + Used for replication and binlogging. + */ +class Rpl_filter +{ +public: + Rpl_filter(); + ~Rpl_filter(); + Rpl_filter(Rpl_filter const&); + Rpl_filter& operator=(Rpl_filter const&); + + /* Checks - returns true if ok to replicate/log */ + + bool tables_ok(const char* db, TABLE_LIST* tables); + bool db_ok(const char* db); + bool db_ok_with_wild_table(const char *db); + + bool is_on(); + + /* Setters - add filtering rules */ + + int add_do_table(const char* table_spec); + int add_ignore_table(const char* table_spec); + + int add_wild_do_table(const char* table_spec); + int add_wild_ignore_table(const char* table_spec); + + void add_do_db(const char* db_spec); + void add_ignore_db(const char* db_spec); + + void add_db_rewrite(const char* from_db, const char* to_db); + + /* Getters - to get information about current rules */ + + void get_do_table(String* str); + void get_ignore_table(String* str); + + void get_wild_do_table(String* str); + void get_wild_ignore_table(String* str); + + const char* get_rewrite_db(const char* db, uint32 *new_len); + + I_List<i_string>* get_do_db(); + I_List<i_string>* get_ignore_db(); + +private: + bool table_rules_on; + + void init_table_rule_hash(HASH* h, bool* h_inited); + void init_table_rule_array(DYNAMIC_ARRAY* a, bool* a_inited); + + int add_table_rule(HASH* h, const char* table_spec); + int add_wild_table_rule(DYNAMIC_ARRAY* a, const char* table_spec); + + void free_string_array(DYNAMIC_ARRAY *a); + + void table_rule_ent_hash_to_str(String* s, HASH* h, bool inited); + void table_rule_ent_dynamic_array_to_str(String* s, DYNAMIC_ARRAY* a, + bool inited); + TABLE_RULE_ENT* find_wild(DYNAMIC_ARRAY *a, const char* key, int len); + + /* + Those 4 structures below are uninitialized memory unless the + corresponding *_inited variables are "true". + */ + HASH do_table; + HASH ignore_table; + DYNAMIC_ARRAY wild_do_table; + DYNAMIC_ARRAY wild_ignore_table; + + bool do_table_inited; + bool ignore_table_inited; + bool wild_do_table_inited; + bool wild_ignore_table_inited; + + I_List<i_string> do_db; + I_List<i_string> ignore_db; + + I_List<i_string_pair> rewrite_db; +}; + +extern Rpl_filter *rpl_filter; +extern Rpl_filter *binlog_filter; + +#endif // RPL_FILTER_H diff --git a/sql/rpl_rli.h b/sql/rpl_rli.h new file mode 100644 index 00000000000..5500fdf1f64 --- /dev/null +++ b/sql/rpl_rli.h @@ -0,0 +1,312 @@ +/* Copyright (C) 2005 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef RPL_RLI_H +#define RPL_RLI_H + +#define MAX_SLAVE_ERRMSG 1024 + +#include "rpl_tblmap.h" + +/**************************************************************************** + + Replication SQL Thread + + st_relay_log_info contains: + - the current relay log + - the current relay log offset + - master log name + - master log sequence corresponding to the last update + - misc information specific to the SQL thread + + st_relay_log_info is initialized from the slave.info file if such exists. + Otherwise, data members are intialized with defaults. The initialization is + done with init_relay_log_info() call. + + The format of slave.info file: + + relay_log_name + relay_log_pos + master_log_name + master_log_pos + + To clean up, call end_relay_log_info() + +*****************************************************************************/ + +typedef struct st_relay_log_info +{ + /* + If flag set, then rli does not store its state in any info file. + This is the case only when we execute BINLOG SQL commands inside + a client, non-replication thread. + */ + bool no_storage; + + /*** The following variables can only be read when protect by data lock ****/ + + /* + info_fd - file descriptor of the info file. set only during + initialization or clean up - safe to read anytime + cur_log_fd - file descriptor of the current read relay log + */ + File info_fd,cur_log_fd; + + /* + Protected with internal locks. + Must get data_lock when resetting the logs. + */ + MYSQL_LOG relay_log; + LOG_INFO linfo; + IO_CACHE cache_buf,*cur_log; + + /* The following variables are safe to read any time */ + + /* IO_CACHE of the info file - set only during init or end */ + IO_CACHE info_file; + + /* + When we restart slave thread we need to have access to the previously + created temporary tables. Modified only on init/end and by the SQL + thread, read only by SQL thread. + */ + TABLE *save_temporary_tables; + + /* + standard lock acquistion order to avoid deadlocks: + run_lock, data_lock, relay_log.LOCK_log, relay_log.LOCK_index + */ + pthread_mutex_t data_lock,run_lock; + + /* + start_cond is broadcast when SQL thread is started + stop_cond - when stopped + data_cond - when data protected by data_lock changes + */ + pthread_cond_t start_cond, stop_cond, data_cond; + + /* parent master info structure */ + struct st_master_info *mi; + + /* + Needed to deal properly with cur_log getting closed and re-opened with + a different log under our feet + */ + uint32 cur_log_old_open_count; + + /* + Let's call a group (of events) : + - a transaction + or + - an autocommiting query + its associated events (INSERT_ID, + TIMESTAMP...) + We need these rli coordinates : + - relay log name and position of the beginning of the group we currently are + executing. Needed to know where we have to restart when replication has + stopped in the middle of a group (which has been rolled back by the slave). + - relay log name and position just after the event we have just + executed. This event is part of the current group. + Formerly we only had the immediately above coordinates, plus a 'pending' + variable, but this dealt wrong with the case of a transaction starting on a + relay log and finishing (commiting) on another relay log. Case which can + happen when, for example, the relay log gets rotated because of + max_binlog_size. + */ + char group_relay_log_name[FN_REFLEN]; + ulonglong group_relay_log_pos; + char event_relay_log_name[FN_REFLEN]; + ulonglong event_relay_log_pos; + ulonglong future_event_relay_log_pos; + + /* + Original log name and position of the group we're currently executing + (whose coordinates are group_relay_log_name/pos in the relay log) + in the master's binlog. These concern the *group*, because in the master's + binlog the log_pos that comes with each event is the position of the + beginning of the group. + */ + char group_master_log_name[FN_REFLEN]; + volatile my_off_t group_master_log_pos; + + /* + Handling of the relay_log_space_limit optional constraint. + ignore_log_space_limit is used to resolve a deadlock between I/O and SQL + threads, the SQL thread sets it to unblock the I/O thread and make it + temporarily forget about the constraint. + */ + ulonglong log_space_limit,log_space_total; + bool ignore_log_space_limit; + + /* + When it commits, InnoDB internally stores the master log position it has + processed so far; the position to store is the one of the end of the + committing event (the COMMIT query event, or the event if in autocommit + mode). + */ +#if MYSQL_VERSION_ID < 40100 + ulonglong future_master_log_pos; +#else + ulonglong future_group_master_log_pos; +#endif + + time_t last_master_timestamp; + + /* + Needed for problems when slave stops and we want to restart it + skipping one or more events in the master log that have caused + errors, and have been manually applied by DBA already. + */ + volatile uint32 slave_skip_counter; + volatile ulong abort_pos_wait; /* Incremented on change master */ + volatile ulong slave_run_id; /* Incremented on slave start */ + pthread_mutex_t log_space_lock; + pthread_cond_t log_space_cond; + THD * sql_thd; + int last_slave_errno; +#ifndef DBUG_OFF + int events_till_abort; +#endif + char last_slave_error[MAX_SLAVE_ERRMSG]; + + /* if not set, the value of other members of the structure are undefined */ + bool inited; + volatile bool abort_slave; + volatile uint slave_running; + + /* + Condition and its parameters from START SLAVE UNTIL clause. + + UNTIL condition is tested with is_until_satisfied() method that is + called by exec_relay_log_event(). is_until_satisfied() caches the result + of the comparison of log names because log names don't change very often; + this cache is invalidated by parts of code which change log names with + notify_*_log_name_updated() methods. (They need to be called only if SQL + thread is running). + */ + + enum {UNTIL_NONE= 0, UNTIL_MASTER_POS, UNTIL_RELAY_POS} until_condition; + char until_log_name[FN_REFLEN]; + ulonglong until_log_pos; + /* extension extracted from log_name and converted to int */ + ulong until_log_name_extension; + /* + Cached result of comparison of until_log_name and current log name + -2 means unitialised, -1,0,1 are comarison results + */ + enum + { + UNTIL_LOG_NAMES_CMP_UNKNOWN= -2, UNTIL_LOG_NAMES_CMP_LESS= -1, + UNTIL_LOG_NAMES_CMP_EQUAL= 0, UNTIL_LOG_NAMES_CMP_GREATER= 1 + } until_log_names_cmp_result; + + char cached_charset[6]; + /* + trans_retries varies between 0 to slave_transaction_retries and counts how + many times the slave has retried the present transaction; gets reset to 0 + when the transaction finally succeeds. retried_trans is a cumulative + counter: how many times the slave has retried a transaction (any) since + slave started. + */ + ulong trans_retries, retried_trans; + + /* + If the end of the hot relay log is made of master's events ignored by the + slave I/O thread, these two keep track of the coords (in the master's + binlog) of the last of these events seen by the slave I/O thread. If not, + ign_master_log_name_end[0] == 0. + As they are like a Rotate event read/written from/to the relay log, they + are both protected by rli->relay_log.LOCK_log. + */ + char ign_master_log_name_end[FN_REFLEN]; + ulonglong ign_master_log_pos_end; + + st_relay_log_info(); + ~st_relay_log_info(); + + /* + Invalidate cached until_log_name and group_relay_log_name comparison + result. Should be called after any update of group_realy_log_name if + there chances that sql_thread is running. + */ + inline void notify_group_relay_log_name_update() + { + if (until_condition==UNTIL_RELAY_POS) + until_log_names_cmp_result= UNTIL_LOG_NAMES_CMP_UNKNOWN; + } + + /* + The same as previous but for group_master_log_name. + */ + inline void notify_group_master_log_name_update() + { + if (until_condition==UNTIL_MASTER_POS) + until_log_names_cmp_result= UNTIL_LOG_NAMES_CMP_UNKNOWN; + } + + inline void inc_event_relay_log_pos() + { + event_relay_log_pos= future_event_relay_log_pos; + } + + void inc_group_relay_log_pos(ulonglong log_pos, + bool skip_lock=0); + + int wait_for_pos(THD* thd, String* log_name, longlong log_pos, + longlong timeout); + void close_temporary_tables(); + + /* Check if UNTIL condition is satisfied. See slave.cc for more. */ + bool is_until_satisfied(); + inline ulonglong until_pos() + { + return ((until_condition == UNTIL_MASTER_POS) ? group_master_log_pos : + group_relay_log_pos); + } + + table_mapping m_table_map; + + /* + Last charset (6 bytes) seen by slave SQL thread is cached here; it helps + the thread save 3 get_charset() per Query_log_event if the charset is not + changing from event to event (common situation). + When the 6 bytes are equal to 0 is used to mean "cache is invalidated". + */ + void cached_charset_invalidate(); + bool cached_charset_compare(char *charset); + + /* + To reload special tables when they are changes, we introduce a set + of functions that will mark whenever special functions need to be + called after modifying tables. Right now, the tables are either + ACL tables or grants tables. + */ + enum enum_reload_flag + { + RELOAD_NONE_F = 0UL, + RELOAD_GRANT_F = (1UL << 0), + RELOAD_ACCESS_F = (1UL << 1) + }; + + ulong m_reload_flags; + + void touching_table(char const* db, char const* table, ulong table_id); + void transaction_end(THD*); + + void cleanup_context(THD *, bool); + time_t unsafe_to_stop_at; +} RELAY_LOG_INFO; + +#endif /* RPL_RLI_H */ diff --git a/sql/rpl_tblmap.cc b/sql/rpl_tblmap.cc new file mode 100644 index 00000000000..a0272b23ee8 --- /dev/null +++ b/sql/rpl_tblmap.cc @@ -0,0 +1,151 @@ +/* Copyright (C) 2005 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "mysql_priv.h" + +#ifdef HAVE_REPLICATION + +#include "rpl_tblmap.h" + +#define MAYBE_TABLE_NAME(T) ((T) ? (T)->s->table_name.str : "<>") +#define TABLE_ID_HASH_SIZE 32 +#define TABLE_ID_CHUNK 256 + +table_mapping::table_mapping() + : m_free(0) +{ + /* + No "free_element" function for entries passed here, as the entries are + allocated in a MEM_ROOT (freed as a whole in the destructor), they cannot + be freed one by one. + Note that below we don't test if hash_init() succeeded. This constructor + is called at startup only. + */ + (void) hash_init(&m_table_ids,&my_charset_bin,TABLE_ID_HASH_SIZE, + offsetof(entry,table_id),sizeof(ulong), + 0,0,0); + /* We don't preallocate any block, this is consistent with m_free=0 above */ + init_alloc_root(&m_mem_root, TABLE_ID_HASH_SIZE*sizeof(entry), 0); +} + +table_mapping::~table_mapping() +{ + hash_free(&m_table_ids); + free_root(&m_mem_root, MYF(0)); +} + +st_table* table_mapping::get_table(ulong table_id) +{ + DBUG_ENTER("table_mapping::get_table(ulong)"); + DBUG_PRINT("enter", ("table_id=%d", table_id)); + entry *e= find_entry(table_id); + if (e) + { + DBUG_PRINT("info", ("tid %d -> table %p (%s)", + table_id, e->table, + MAYBE_TABLE_NAME(e->table))); + DBUG_RETURN(e->table); + } + + DBUG_PRINT("info", ("tid %d is not mapped!", table_id)); + DBUG_RETURN(NULL); +} + +/* + Called when we are out of table id entries. Creates TABLE_ID_CHUNK + new entries, chain them and attach them at the head of the list of free + (free for use) entries. +*/ +int table_mapping::expand() +{ + /* + If we wanted to use "tmp= new (&m_mem_root) entry[TABLE_ID_CHUNK]", + we would have to make "entry" derive from Sql_alloc but then it would not + be a POD anymore and we want it to be (see rpl_tblmap.h). So we allocate + in C. + */ + entry *tmp= (entry *)alloc_root(&m_mem_root, TABLE_ID_CHUNK*sizeof(entry)); + if (tmp == NULL) + return ERR_MEMORY_ALLOCATION; // Memory allocation failed + + /* Find the end of this fresh new array of free entries */ + entry *e_end= tmp+TABLE_ID_CHUNK-1; + for (entry *e= tmp; e < e_end; e++) + e->next= e+1; + e_end->next= m_free; + m_free= tmp; + return 0; +} + +int table_mapping::set_table(ulong table_id, TABLE* table) +{ + DBUG_ENTER("table_mapping::set_table(ulong,TABLE*)"); + DBUG_PRINT("enter", ("table_id=%d, table=%p (%s)", + table_id, + table, MAYBE_TABLE_NAME(table))); + entry *e= find_entry(table_id); + if (e == 0) + { + if (m_free == 0 && expand()) + DBUG_RETURN(ERR_MEMORY_ALLOCATION); // Memory allocation failed + e= m_free; + m_free= m_free->next; + } + else + hash_delete(&m_table_ids,(byte *)e); + + e->table_id= table_id; + e->table= table; + my_hash_insert(&m_table_ids,(byte *)e); + + DBUG_PRINT("info", ("tid %d -> table %p (%s)", + table_id, e->table, + MAYBE_TABLE_NAME(e->table))); + DBUG_RETURN(0); // All OK +} + +int table_mapping::remove_table(ulong table_id) +{ + entry *e= find_entry(table_id); + if (e) + { + hash_delete(&m_table_ids,(byte *)e); + /* we add this entry to the chain of free (free for use) entries */ + e->next= m_free; + m_free= e; + return 0; // All OK + } + return 1; // No table to remove +} + +/* + Puts all entries into the list of free-for-use entries (does not free any + memory), and empties the hash. +*/ +void table_mapping::clear_tables() +{ + DBUG_ENTER("table_mapping::clear_tables()"); + for (uint i= 0; i < m_table_ids.records; i++) + { + entry *e= (entry *)hash_element(&m_table_ids, i); + e->next= m_free; + m_free= e; + } + my_hash_reset(&m_table_ids); + DBUG_VOID_RETURN; +} + +#endif diff --git a/sql/rpl_tblmap.h b/sql/rpl_tblmap.h new file mode 100644 index 00000000000..23864bd329e --- /dev/null +++ b/sql/rpl_tblmap.h @@ -0,0 +1,105 @@ +/* Copyright (C) 2005 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef TABLE_MAPPING_H +#define TABLE_MAPPING_H + +/* Forward declarations */ +struct st_table; +typedef st_table TABLE; + +/* + CLASS table_mapping + + RESPONSIBILITIES + The table mapping is used to map table id's to table pointers + + COLLABORATION + RELAY_LOG For mapping table id:s to tables when receiving events. + */ + +/* + Guilhem to Mats: + in the table_mapping class, the memory is allocated and never freed (until + destruction). So this is a good candidate for allocating inside a MEM_ROOT: + it gives the efficient allocation in chunks (like in expand()). So I have + introduced a MEM_ROOT. + + Note that inheriting from Sql_alloc had no effect: it has effects only when + "ptr= new table_mapping" is called, and this is never called. And it would + then allocate from thd->mem_root which is a highly volatile object (reset + from example after executing each query, see dispatch_command(), it has a + free_root() at end); as the table_mapping object is supposed to live longer + than a query, it was dangerous. + A dedicated MEM_ROOT needs to be used, see below. +*/ + +class table_mapping { + +private: + MEM_ROOT m_mem_root; + +public: + + enum enum_error { + ERR_NO_ERROR = 0, + ERR_LIMIT_EXCEEDED, + ERR_MEMORY_ALLOCATION + }; + + table_mapping(); + ~table_mapping(); + + TABLE* get_table(ulong table_id); + + int set_table(ulong table_id, TABLE* table); + int remove_table(ulong table_id); + void clear_tables(); + ulong count() const { return m_table_ids.records; } + +private: + /* + This is a POD (Plain Old Data). Keep it that way (we apply offsetof() to + it, which only works for PODs) + */ + struct entry { + ulong table_id; + union { + TABLE *table; + entry *next; + }; + }; + + entry *find_entry(ulong table_id) + { + return (entry *)hash_search(&m_table_ids, + (byte*)&table_id, + sizeof(table_id)); + } + int expand(); + + /* + Head of the list of free entries; "free" in the sense that it's an + allocated entry free for use, NOT in the sense that it's freed + memory. + */ + entry *m_free; + + /* Correspondance between an id (a number) and a TABLE object */ + HASH m_table_ids; +}; + +#endif diff --git a/sql/set_var.cc b/sql/set_var.cc index 632c37d2296..7468e4aadf7 100644 --- a/sql/set_var.cc +++ b/sql/set_var.cc @@ -58,15 +58,52 @@ #include <my_getopt.h> #include <thr_alarm.h> #include <myisam.h> -#ifdef HAVE_BERKELEY_DB -#include "ha_berkeley.h" -#endif -#ifdef HAVE_INNOBASE_DB -#include "ha_innodb.h" -#endif -#ifdef HAVE_NDBCLUSTER_DB -#include "ha_ndbcluster.h" -#endif + +/* WITH_BERKELEY_STORAGE_ENGINE */ +extern bool berkeley_shared_data; +extern ulong berkeley_max_lock, berkeley_log_buffer_size; +extern ulonglong berkeley_cache_size; +extern ulong berkeley_region_size, berkeley_cache_parts; +extern char *berkeley_home, *berkeley_tmpdir, *berkeley_logdir; + +/* WITH_INNOBASE_STORAGE_ENGINE */ +extern uint innobase_flush_log_at_trx_commit; +extern ulong innobase_fast_shutdown; +extern long innobase_mirrored_log_groups, innobase_log_files_in_group; +extern longlong innobase_log_file_size; +extern long innobase_log_buffer_size; +extern longlong innobase_buffer_pool_size; +extern long innobase_additional_mem_pool_size; +extern long innobase_buffer_pool_awe_mem_mb; +extern long innobase_file_io_threads, innobase_lock_wait_timeout; +extern long innobase_force_recovery; +extern long innobase_open_files; +extern char *innobase_data_home_dir, *innobase_data_file_path; +extern char *innobase_log_group_home_dir, *innobase_log_arch_dir; +extern char *innobase_unix_file_flush_method; +/* The following variables have to be my_bool for SHOW VARIABLES to work */ +extern my_bool innobase_log_archive, + innobase_use_doublewrite, + innobase_use_checksums, + innobase_file_per_table, + innobase_locks_unsafe_for_binlog; + +extern "C" { +extern ulong srv_max_buf_pool_modified_pct; +extern ulong srv_max_purge_lag; +extern ulong srv_auto_extend_increment; +extern ulong srv_n_spin_wait_rounds; +extern ulong srv_n_free_tickets_to_enter; +extern ulong srv_thread_sleep_delay; +extern ulong srv_thread_concurrency; +extern ulong srv_commit_concurrency; +} + +/* WITH_NDBCLUSTER_STORAGE_ENGINE */ +extern ulong ndb_cache_check_time; + + + static HASH system_variable_hash; const char *bool_type_names[]= { "OFF", "ON", NullS }; @@ -120,7 +157,6 @@ static KEY_CACHE *create_key_cache(const char *name, uint length); void fix_sql_mode_var(THD *thd, enum_var_type type); static byte *get_error_count(THD *thd); static byte *get_warning_count(THD *thd); -static byte *get_have_innodb(THD *thd); /* Variable definition list @@ -129,6 +165,9 @@ static byte *get_have_innodb(THD *thd); alphabetic order */ +sys_var *sys_var::first= NULL; +uint sys_var::sys_vars= 0; + sys_var_thd_ulong sys_auto_increment_increment("auto_increment_increment", &SV::auto_increment_increment); sys_var_thd_ulong sys_auto_increment_offset("auto_increment_offset", @@ -275,6 +314,8 @@ sys_var_long_ptr sys_myisam_data_pointer_size("myisam_data_pointer_size", sys_var_thd_ulonglong sys_myisam_max_sort_file_size("myisam_max_sort_file_size", &SV::myisam_max_sort_file_size, fix_myisam_max_sort_file_size, 1); sys_var_thd_ulong sys_myisam_repair_threads("myisam_repair_threads", &SV::myisam_repair_threads); sys_var_thd_ulong sys_myisam_sort_buffer_size("myisam_sort_buffer_size", &SV::myisam_sort_buff_size); +sys_var_bool_ptr sys_myisam_use_mmap("myisam_use_mmap", + &opt_myisam_use_mmap); sys_var_thd_enum sys_myisam_stats_method("myisam_stats_method", &SV::myisam_stats_method, @@ -293,6 +334,8 @@ sys_var_thd_ulong sys_net_retry_count("net_retry_count", &SV::net_retry_count, 0, fix_net_retry_count); sys_var_thd_bool sys_new_mode("new", &SV::new_mode); +sys_var_thd_bool sys_old_alter_table("old_alter_table", + &SV::old_alter_table); sys_var_thd_bool sys_old_passwords("old_passwords", &SV::old_passwords); sys_var_thd_ulong sys_optimizer_prune_level("optimizer_prune_level", &SV::optimizer_prune_level); @@ -382,7 +425,9 @@ sys_var_thd_ulong sys_sync_replication_timeout( &SV::sync_replication_timeout); #endif sys_var_bool_ptr sys_sync_frm("sync_frm", &opt_sync_frm); -sys_var_long_ptr sys_table_cache_size("table_cache", +sys_var_long_ptr sys_table_def_size("table_definition_cache", + &table_def_size); +sys_var_long_ptr sys_table_cache_size("table_open_cache", &table_cache_size); sys_var_long_ptr sys_table_lock_wait_timeout("table_lock_wait_timeout", &table_lock_wait_timeout); @@ -399,7 +444,6 @@ sys_var_bool_ptr sys_timed_mutexes("timed_mutexes", sys_var_thd_ulong sys_net_wait_timeout("wait_timeout", &SV::net_wait_timeout); -#ifdef HAVE_INNOBASE_DB sys_var_long_ptr sys_innodb_fast_shutdown("innodb_fast_shutdown", &innobase_fast_shutdown); sys_var_long_ptr sys_innodb_max_dirty_pages_pct("innodb_max_dirty_pages_pct", @@ -422,14 +466,12 @@ sys_var_long_ptr sys_innodb_thread_concurrency("innodb_thread_concurrency", &srv_thread_concurrency); sys_var_long_ptr sys_innodb_commit_concurrency("innodb_commit_concurrency", &srv_commit_concurrency); -#endif /* Condition pushdown to storage engine */ sys_var_thd_bool sys_engine_condition_pushdown("engine_condition_pushdown", &SV::engine_condition_pushdown); -#ifdef HAVE_NDBCLUSTER_DB /* ndb thread specific variable settings */ sys_var_thd_ulong sys_ndb_autoincrement_prefetch_sz("ndb_autoincrement_prefetch_sz", @@ -442,7 +484,15 @@ sys_var_thd_bool sys_ndb_use_transactions("ndb_use_transactions", &SV::ndb_use_transactions); sys_var_long_ptr sys_ndb_cache_check_time("ndb_cache_check_time", &ndb_cache_check_time); -#endif +sys_var_thd_bool +sys_ndb_index_stat_enable("ndb_index_stat_enable", + &SV::ndb_index_stat_enable); +sys_var_thd_ulong +sys_ndb_index_stat_cache_entries("ndb_index_stat_cache_entries", + &SV::ndb_index_stat_cache_entries); +sys_var_thd_ulong +sys_ndb_index_stat_update_freq("ndb_index_stat_update_freq", + &SV::ndb_index_stat_update_freq); /* Time/date/datetime formats */ @@ -546,204 +596,35 @@ sys_var_thd_time_zone sys_time_zone("time_zone"); /* Read only variables */ sys_var_const_str sys_os("version_compile_os", SYSTEM_TYPE); -sys_var_readonly sys_have_innodb("have_innodb", OPT_GLOBAL, - SHOW_CHAR, get_have_innodb); +sys_var_have_variable sys_have_archive_db("have_archive", &have_archive_db); +sys_var_have_variable sys_have_berkeley_db("have_bdb", &have_berkeley_db); +sys_var_have_variable sys_have_blackhole_db("have_blackhole_engine", + &have_blackhole_db); +sys_var_have_variable sys_have_compress("have_compress", &have_compress); +sys_var_have_variable sys_have_crypt("have_crypt", &have_crypt); +sys_var_have_variable sys_have_csv_db("have_csv", &have_csv_db); +sys_var_have_variable sys_have_example_db("have_example_engine", + &have_example_db); +sys_var_have_variable sys_have_federated_db("have_federated_engine", + &have_federated_db); +sys_var_have_variable sys_have_geometry("have_geometry", &have_geometry); +sys_var_have_variable sys_have_innodb("have_innodb", &have_innodb); +sys_var_have_variable sys_have_isam("have_isam", &have_isam); +sys_var_have_variable sys_have_ndbcluster("have_ndbcluster", &have_ndbcluster); +sys_var_have_variable sys_have_openssl("have_openssl", &have_openssl); +sys_var_have_variable sys_have_partition_db("have_partition_engine", + &have_partition_db); +sys_var_have_variable sys_have_query_cache("have_query_cache", + &have_query_cache); +sys_var_have_variable sys_have_raid("have_raid", &have_raid); +sys_var_have_variable sys_have_rtree_keys("have_rtree_keys", &have_rtree_keys); +sys_var_have_variable sys_have_symlink("have_symlink", &have_symlink); +sys_var_have_variable sys_have_row_based_replication("have_row_based_replication",&have_row_based_replication); /* Global read-only variable describing server license */ sys_var_const_str sys_license("license", STRINGIFY_ARG(LICENSE)); /* - List of all variables for initialisation and storage in hash - This is sorted in alphabetical order to make it easy to add new variables - - If the variable is not in this list, it can't be changed with - SET variable_name= -*/ - -sys_var *sys_variables[]= -{ - &sys_auto_is_null, - &sys_auto_increment_increment, - &sys_auto_increment_offset, - &sys_autocommit, - &sys_automatic_sp_privileges, - &sys_big_tables, - &sys_big_selects, - &sys_binlog_cache_size, - &sys_buffer_results, - &sys_bulk_insert_buff_size, - &sys_character_set_server, - &sys_character_set_database, - &sys_character_set_client, - &sys_character_set_connection, - &sys_character_set_results, - &sys_charset_system, - &sys_collation_connection, - &sys_collation_database, - &sys_collation_server, - &sys_completion_type, - &sys_concurrent_insert, - &sys_connect_timeout, - &sys_date_format, - &sys_datetime_format, - &sys_div_precincrement, - &sys_default_week_format, - &sys_delay_key_write, - &sys_delayed_insert_limit, - &sys_delayed_insert_timeout, - &sys_delayed_queue_size, - &sys_error_count, - &sys_expire_logs_days, - &sys_flush, - &sys_flush_time, - &sys_ft_boolean_syntax, - &sys_foreign_key_checks, - &sys_group_concat_max_len, - &sys_have_innodb, - &sys_identity, - &sys_init_connect, - &sys_init_slave, - &sys_insert_id, - &sys_interactive_timeout, - &sys_join_buffer_size, - &sys_key_buffer_size, - &sys_key_cache_block_size, - &sys_key_cache_division_limit, - &sys_key_cache_age_threshold, - &sys_last_insert_id, - &sys_license, - &sys_local_infile, - &sys_log_binlog, - &sys_log_off, - &sys_log_update, - &sys_log_warnings, - &sys_long_query_time, - &sys_low_priority_updates, - &sys_max_allowed_packet, - &sys_max_binlog_cache_size, - &sys_max_binlog_size, - &sys_max_connect_errors, - &sys_max_connections, - &sys_max_delayed_threads, - &sys_max_error_count, - &sys_max_insert_delayed_threads, - &sys_max_heap_table_size, - &sys_max_join_size, - &sys_max_length_for_sort_data, - &sys_max_relay_log_size, - &sys_max_seeks_for_key, - &sys_max_sort_length, - &sys_max_sp_recursion_depth, - &sys_max_tmp_tables, - &sys_max_user_connections, - &sys_max_write_lock_count, - &sys_multi_range_count, - &sys_myisam_data_pointer_size, - &sys_myisam_max_sort_file_size, - &sys_myisam_repair_threads, - &sys_myisam_sort_buffer_size, - &sys_myisam_stats_method, - &sys_net_buffer_length, - &sys_net_read_timeout, - &sys_net_retry_count, - &sys_net_wait_timeout, - &sys_net_write_timeout, - &sys_new_mode, - &sys_old_passwords, - &sys_optimizer_prune_level, - &sys_optimizer_search_depth, - &sys_preload_buff_size, - &sys_pseudo_thread_id, - &sys_query_alloc_block_size, - &sys_query_cache_size, - &sys_query_prealloc_size, -#ifdef HAVE_QUERY_CACHE - &sys_query_cache_limit, - &sys_query_cache_min_res_unit, - &sys_query_cache_type, - &sys_query_cache_wlock_invalidate, -#endif /* HAVE_QUERY_CACHE */ - &sys_quote_show_create, - &sys_rand_seed1, - &sys_rand_seed2, - &sys_range_alloc_block_size, - &sys_readonly, - &sys_read_buff_size, - &sys_read_rnd_buff_size, -#ifdef HAVE_REPLICATION - &sys_relay_log_purge, -#endif - &sys_rpl_recovery_rank, - &sys_safe_updates, - &sys_secure_auth, - &sys_select_limit, - &sys_server_id, -#ifdef HAVE_REPLICATION - &sys_slave_compressed_protocol, - &sys_slave_net_timeout, - &sys_slave_trans_retries, - &sys_slave_skip_counter, -#endif - &sys_slow_launch_time, - &sys_sort_buffer, - &sys_sql_big_tables, - &sys_sql_low_priority_updates, - &sys_sql_max_join_size, - &sys_sql_mode, - &sys_sql_warnings, - &sys_sql_notes, - &sys_storage_engine, -#ifdef HAVE_REPLICATION - &sys_sync_binlog_period, - &sys_sync_replication, - &sys_sync_replication_slave_id, - &sys_sync_replication_timeout, -#endif - &sys_sync_frm, - &sys_table_cache_size, - &sys_table_lock_wait_timeout, - &sys_table_type, - &sys_thread_cache_size, - &sys_time_format, - &sys_timed_mutexes, - &sys_timestamp, - &sys_time_zone, - &sys_tmp_table_size, - &sys_trans_alloc_block_size, - &sys_trans_prealloc_size, - &sys_tx_isolation, - &sys_os, -#ifdef HAVE_INNOBASE_DB - &sys_innodb_fast_shutdown, - &sys_innodb_max_dirty_pages_pct, - &sys_innodb_max_purge_lag, - &sys_innodb_table_locks, - &sys_innodb_support_xa, - &sys_innodb_max_purge_lag, - &sys_innodb_autoextend_increment, - &sys_innodb_sync_spin_loops, - &sys_innodb_concurrency_tickets, - &sys_innodb_thread_sleep_delay, - &sys_innodb_thread_concurrency, - &sys_innodb_commit_concurrency, -#endif - &sys_trust_routine_creators, - &sys_trust_function_creators, - &sys_engine_condition_pushdown, -#ifdef HAVE_NDBCLUSTER_DB - &sys_ndb_autoincrement_prefetch_sz, - &sys_ndb_cache_check_time, - &sys_ndb_force_send, - &sys_ndb_use_exact_count, - &sys_ndb_use_transactions, -#endif - &sys_unique_checks, - &sys_updatable_views_with_limit, - &sys_warning_count -}; - - -/* Variables shown by SHOW variables in alphabetical order */ @@ -753,16 +634,17 @@ struct show_var_st init_vars[]= { {sys_automatic_sp_privileges.name,(char*) &sys_automatic_sp_privileges, SHOW_SYS}, {"back_log", (char*) &back_log, SHOW_LONG}, {"basedir", mysql_home, SHOW_CHAR}, -#ifdef HAVE_BERKELEY_DB - {"bdb_cache_size", (char*) &berkeley_cache_size, SHOW_LONG}, + {"bdb_cache_parts", (char*) &berkeley_cache_parts, SHOW_LONG}, + {"bdb_cache_size", (char*) &berkeley_cache_size, SHOW_LONGLONG}, {"bdb_home", (char*) &berkeley_home, SHOW_CHAR_PTR}, {"bdb_log_buffer_size", (char*) &berkeley_log_buffer_size, SHOW_LONG}, {"bdb_logdir", (char*) &berkeley_logdir, SHOW_CHAR_PTR}, {"bdb_max_lock", (char*) &berkeley_max_lock, SHOW_LONG}, + {"bdb_region_size", (char*) &berkeley_region_size, SHOW_LONG}, {"bdb_shared_data", (char*) &berkeley_shared_data, SHOW_BOOL}, {"bdb_tmpdir", (char*) &berkeley_tmpdir, SHOW_CHAR_PTR}, -#endif {sys_binlog_cache_size.name,(char*) &sys_binlog_cache_size, SHOW_SYS}, + {"binlog_format", (char*) &opt_binlog_format, SHOW_CHAR_PTR}, {sys_bulk_insert_buff_size.name,(char*) &sys_bulk_insert_buff_size,SHOW_SYS}, {sys_character_set_client.name,(char*) &sys_character_set_client, SHOW_SYS}, {sys_character_set_connection.name,(char*) &sys_character_set_connection,SHOW_SYS}, @@ -797,27 +679,28 @@ struct show_var_st init_vars[]= { {"ft_query_expansion_limit",(char*) &ft_query_expansion_limit, SHOW_LONG}, {"ft_stopword_file", (char*) &ft_stopword_file, SHOW_CHAR_PTR}, {sys_group_concat_max_len.name, (char*) &sys_group_concat_max_len, SHOW_SYS}, - {"have_archive", (char*) &have_archive_db, SHOW_HAVE}, - {"have_bdb", (char*) &have_berkeley_db, SHOW_HAVE}, - {"have_blackhole_engine", (char*) &have_blackhole_db, SHOW_HAVE}, - {"have_compress", (char*) &have_compress, SHOW_HAVE}, - {"have_crypt", (char*) &have_crypt, SHOW_HAVE}, - {"have_csv", (char*) &have_csv_db, SHOW_HAVE}, - {"have_example_engine", (char*) &have_example_db, SHOW_HAVE}, - {"have_federated_engine", (char*) &have_federated_db, SHOW_HAVE}, - {"have_geometry", (char*) &have_geometry, SHOW_HAVE}, - {"have_innodb", (char*) &have_innodb, SHOW_HAVE}, - {"have_isam", (char*) &have_isam, SHOW_HAVE}, - {"have_ndbcluster", (char*) &have_ndbcluster, SHOW_HAVE}, - {"have_openssl", (char*) &have_openssl, SHOW_HAVE}, - {"have_query_cache", (char*) &have_query_cache, SHOW_HAVE}, - {"have_raid", (char*) &have_raid, SHOW_HAVE}, - {"have_rtree_keys", (char*) &have_rtree_keys, SHOW_HAVE}, - {"have_symlink", (char*) &have_symlink, SHOW_HAVE}, + {sys_have_archive_db.name, (char*) &have_archive_db, SHOW_HAVE}, + {sys_have_berkeley_db.name, (char*) &have_berkeley_db, SHOW_HAVE}, + {sys_have_blackhole_db.name,(char*) &have_blackhole_db, SHOW_HAVE}, + {sys_have_compress.name, (char*) &have_compress, SHOW_HAVE}, + {sys_have_crypt.name, (char*) &have_crypt, SHOW_HAVE}, + {sys_have_csv_db.name, (char*) &have_csv_db, SHOW_HAVE}, + {sys_have_example_db.name, (char*) &have_example_db, SHOW_HAVE}, + {sys_have_federated_db.name,(char*) &have_federated_db, SHOW_HAVE}, + {sys_have_geometry.name, (char*) &have_geometry, SHOW_HAVE}, + {sys_have_innodb.name, (char*) &have_innodb, SHOW_HAVE}, + {sys_have_isam.name, (char*) &have_isam, SHOW_HAVE}, + {sys_have_ndbcluster.name, (char*) &have_ndbcluster, SHOW_HAVE}, + {sys_have_openssl.name, (char*) &have_openssl, SHOW_HAVE}, + {sys_have_partition_db.name,(char*) &have_partition_db, SHOW_HAVE}, + {sys_have_query_cache.name, (char*) &have_query_cache, SHOW_HAVE}, + {sys_have_raid.name, (char*) &have_raid, SHOW_HAVE}, + {sys_have_row_based_replication.name, (char*) &have_row_based_replication, SHOW_HAVE}, + {sys_have_rtree_keys.name, (char*) &have_rtree_keys, SHOW_HAVE}, + {sys_have_symlink.name, (char*) &have_symlink, SHOW_HAVE}, {"init_connect", (char*) &sys_init_connect, SHOW_SYS}, {"init_file", (char*) &opt_init_file, SHOW_CHAR_PTR}, {"init_slave", (char*) &sys_init_slave, SHOW_SYS}, -#ifdef HAVE_INNOBASE_DB {"innodb_additional_mem_pool_size", (char*) &innobase_additional_mem_pool_size, SHOW_LONG }, {sys_innodb_autoextend_increment.name, (char*) &sys_innodb_autoextend_increment, SHOW_SYS}, {"innodb_buffer_pool_awe_mem_mb", (char*) &innobase_buffer_pool_awe_mem_mb, SHOW_LONG }, @@ -851,7 +734,6 @@ struct show_var_st init_vars[]= { {sys_innodb_table_locks.name, (char*) &sys_innodb_table_locks, SHOW_SYS}, {sys_innodb_thread_concurrency.name, (char*) &sys_innodb_thread_concurrency, SHOW_SYS}, {sys_innodb_thread_sleep_delay.name, (char*) &sys_innodb_thread_sleep_delay, SHOW_SYS}, -#endif {sys_interactive_timeout.name,(char*) &sys_interactive_timeout, SHOW_SYS}, {sys_join_buffer_size.name, (char*) &sys_join_buffer_size, SHOW_SYS}, {sys_key_buffer_size.name, (char*) &sys_key_buffer_size, SHOW_SYS}, @@ -914,23 +796,26 @@ struct show_var_st init_vars[]= { {sys_myisam_sort_buffer_size.name, (char*) &sys_myisam_sort_buffer_size, SHOW_SYS}, {sys_myisam_stats_method.name, (char*) &sys_myisam_stats_method, SHOW_SYS}, + {sys_myisam_use_mmap.name, (char*) &sys_myisam_use_mmap, SHOW_SYS}, #ifdef __NT__ {"named_pipe", (char*) &opt_enable_named_pipe, SHOW_MY_BOOL}, #endif -#ifdef HAVE_NDBCLUSTER_DB {sys_ndb_autoincrement_prefetch_sz.name, (char*) &sys_ndb_autoincrement_prefetch_sz, SHOW_SYS}, + {sys_ndb_cache_check_time.name,(char*) &sys_ndb_cache_check_time, SHOW_SYS}, {sys_ndb_force_send.name, (char*) &sys_ndb_force_send, SHOW_SYS}, + {sys_ndb_index_stat_cache_entries.name, (char*) &sys_ndb_index_stat_cache_entries, SHOW_SYS}, + {sys_ndb_index_stat_enable.name, (char*) &sys_ndb_index_stat_enable, SHOW_SYS}, + {sys_ndb_index_stat_update_freq.name, (char*) &sys_ndb_index_stat_update_freq, SHOW_SYS}, {sys_ndb_use_exact_count.name,(char*) &sys_ndb_use_exact_count, SHOW_SYS}, {sys_ndb_use_transactions.name,(char*) &sys_ndb_use_transactions, SHOW_SYS}, - {sys_ndb_cache_check_time.name,(char*) &sys_ndb_cache_check_time, SHOW_SYS}, -#endif {sys_net_buffer_length.name,(char*) &sys_net_buffer_length, SHOW_SYS}, {sys_net_read_timeout.name, (char*) &sys_net_read_timeout, SHOW_SYS}, {sys_net_retry_count.name, (char*) &sys_net_retry_count, SHOW_SYS}, {sys_net_write_timeout.name,(char*) &sys_net_write_timeout, SHOW_SYS}, {sys_new_mode.name, (char*) &sys_new_mode, SHOW_SYS}, + {sys_old_alter_table.name, (char*) &sys_old_alter_table, SHOW_SYS}, {sys_old_passwords.name, (char*) &sys_old_passwords, SHOW_SYS}, {"open_files_limit", (char*) &open_files_limit, SHOW_LONG}, {sys_optimizer_prune_level.name, (char*) &sys_optimizer_prune_level, @@ -938,6 +823,7 @@ struct show_var_st init_vars[]= { {sys_optimizer_search_depth.name,(char*) &sys_optimizer_search_depth, SHOW_SYS}, {"pid_file", (char*) pidfile_name, SHOW_CHAR}, + {"plugin_dir", (char*) opt_plugin_dir, SHOW_CHAR}, {"port", (char*) &mysqld_port, SHOW_INT}, {sys_preload_buff_size.name, (char*) &sys_preload_buff_size, SHOW_SYS}, {"protocol_version", (char*) &protocol_version, SHOW_INT}, @@ -1001,8 +887,9 @@ struct show_var_st init_vars[]= { #ifdef HAVE_TZNAME {"system_time_zone", system_time_zone, SHOW_CHAR}, #endif - {"table_cache", (char*) &table_cache_size, SHOW_LONG}, + {"table_definition_cache", (char*) &table_def_size, SHOW_LONG}, {"table_lock_wait_timeout", (char*) &table_lock_wait_timeout, SHOW_LONG }, + {"table_open_cache", (char*) &table_cache_size, SHOW_LONG}, {sys_table_type.name, (char*) &sys_table_type, SHOW_SYS}, {sys_thread_cache_size.name,(char*) &sys_thread_cache_size, SHOW_SYS}, #ifdef HAVE_THR_SETCONCURRENCY @@ -1021,9 +908,6 @@ struct show_var_st init_vars[]= { {sys_updatable_views_with_limit.name, (char*) &sys_updatable_views_with_limit,SHOW_SYS}, {"version", server_version, SHOW_CHAR}, -#ifdef HAVE_BERKELEY_DB - {"version_bdb", (char*) DB_VERSION_STRING, SHOW_CHAR}, -#endif {"version_comment", (char*) MYSQL_COMPILATION_COMMENT, SHOW_CHAR}, {"version_compile_machine", (char*) MACHINE_TYPE, SHOW_CHAR}, {sys_os.name, (char*) &sys_os, SHOW_SYS}, @@ -2783,12 +2667,6 @@ static byte *get_error_count(THD *thd) } -static byte *get_have_innodb(THD *thd) -{ - return (byte*) show_comp_option_name[have_innodb]; -} - - /**************************************************************************** Main handling of variables: - Initialisation @@ -2847,17 +2725,15 @@ static byte *get_sys_var_length(const sys_var *var, uint *length, void set_var_init() { - hash_init(&system_variable_hash, system_charset_info, - array_elements(sys_variables),0,0, - (hash_get_key) get_sys_var_length,0,0); - sys_var **var, **end; - for (var= sys_variables, end= sys_variables+array_elements(sys_variables) ; - var < end; - var++) + sys_var *var; + + hash_init(&system_variable_hash, system_charset_info, sys_var::sys_vars, 0, + 0, (hash_get_key) get_sys_var_length, 0, 0); + for (var= sys_var::first; var; var= var->next) { - (*var)->name_length= strlen((*var)->name); - (*var)->option_limits= find_option(my_long_options, (*var)->name); - my_hash_insert(&system_variable_hash, (byte*) *var); + var->name_length= strlen(var->name); + var->option_limits= find_option(my_long_options, var->name); + my_hash_insert(&system_variable_hash, (byte*) var); } /* Special cases @@ -3164,11 +3040,12 @@ bool sys_var_thd_storage_engine::check(THD *thd, set_var *var) if (var->value->result_type() == STRING_RESULT) { - enum db_type db_type; + LEX_STRING name; + handlerton *db_type; if (!(res=var->value->val_str(&str)) || - !(var->save_result.ulong_value= - (ulong) (db_type= ha_resolve_by_name(res->ptr(), res->length()))) || - ha_checktype(thd, db_type, 1, 0) != db_type) + !(name.str= (char *)res->ptr()) || !(name.length= res->length()) || + !(var->save_result.hton= db_type= ha_resolve_by_name(thd, &name)) || + ha_checktype(thd, ha_legacy_type(db_type), 1, 0) != db_type) { value= res ? res->c_ptr() : "NULL"; goto err; @@ -3186,29 +3063,28 @@ err: byte *sys_var_thd_storage_engine::value_ptr(THD *thd, enum_var_type type, LEX_STRING *base) { - ulong val; - val= ((type == OPT_GLOBAL) ? global_system_variables.*offset : - thd->variables.*offset); - const char *table_type= ha_get_storage_engine((enum db_type)val); - return (byte *) table_type; + handlerton *val; + val= (type == OPT_GLOBAL) ? global_system_variables.*offset : + thd->variables.*offset; + return (byte *) val->name; } void sys_var_thd_storage_engine::set_default(THD *thd, enum_var_type type) { if (type == OPT_GLOBAL) - global_system_variables.*offset= (ulong) DB_TYPE_MYISAM; + global_system_variables.*offset= &myisam_hton; else - thd->variables.*offset= (ulong) (global_system_variables.*offset); + thd->variables.*offset= global_system_variables.*offset; } bool sys_var_thd_storage_engine::update(THD *thd, set_var *var) { - if (var->type == OPT_GLOBAL) - global_system_variables.*offset= var->save_result.ulong_value; - else - thd->variables.*offset= var->save_result.ulong_value; + handlerton **value= &(global_system_variables.*offset); + if (var->type != OPT_GLOBAL) + value= &(thd->variables.*offset); + *value= var->save_result.hton; return 0; } diff --git a/sql/set_var.h b/sql/set_var.h index 18c3353e8ff..838037b6e20 100644 --- a/sql/set_var.h +++ b/sql/set_var.h @@ -39,6 +39,9 @@ typedef byte *(*sys_value_ptr_func)(THD *thd); class sys_var { public: + static sys_var *first; + static uint sys_vars; + sys_var *next; struct my_option *option_limits; /* Updated by by set_var_init() */ uint name_length; /* Updated by by set_var_init() */ const char *name; @@ -48,12 +51,18 @@ public: sys_var(const char *name_arg) :name(name_arg), after_update(0) , no_support_one_shot(1) - {} + { add_sys_var(); } sys_var(const char *name_arg,sys_after_update_func func) :name(name_arg), after_update(func) , no_support_one_shot(1) - {} + { add_sys_var(); } virtual ~sys_var() {} + void add_sys_var() + { + next= first; + first= this; + sys_vars++; + } virtual bool check(THD *thd, set_var *var); bool check_enum(THD *thd, set_var *var, TYPELIB *enum_names); bool check_set(THD *thd, set_var *var, TYPELIB *enum_names); @@ -370,9 +379,9 @@ public: class sys_var_thd_storage_engine :public sys_var_thd { protected: - ulong SV::*offset; + handlerton *SV::*offset; public: - sys_var_thd_storage_engine(const char *name_arg, ulong SV::*offset_arg) + sys_var_thd_storage_engine(const char *name_arg, handlerton *SV::*offset_arg) :sys_var_thd(name_arg), offset(offset_arg) {} bool check(THD *thd, set_var *var); @@ -389,7 +398,7 @@ SHOW_TYPE type() { return SHOW_CHAR; } class sys_var_thd_table_type :public sys_var_thd_storage_engine { public: - sys_var_thd_table_type(const char *name_arg, ulong SV::*offset_arg) + sys_var_thd_table_type(const char *name_arg, handlerton *SV::*offset_arg) :sys_var_thd_storage_engine(name_arg, offset_arg) {} void warn_deprecated(THD *thd); @@ -701,6 +710,30 @@ public: bool is_readonly() const { return 1; } }; + +class sys_var_have_variable: public sys_var +{ + SHOW_COMP_OPTION *have_variable; + +public: + sys_var_have_variable(const char *variable_name, + SHOW_COMP_OPTION *have_variable_arg): + sys_var(variable_name), + have_variable(have_variable_arg) + { } + byte *value_ptr(THD *thd, enum_var_type type, LEX_STRING *base) + { + return (byte*) show_comp_option_name[*have_variable]; + } + bool update(THD *thd, set_var *var) { return 1; } + bool check_default(enum_var_type type) { return 1; } + bool check_type(enum_var_type type) { return type != OPT_GLOBAL; } + bool check_update_type(Item_result type) { return 1; } + SHOW_TYPE type() { return SHOW_CHAR; } + bool is_readonly() const { return 1; } +}; + + class sys_var_thd_time_zone :public sys_var_thd { public: @@ -779,6 +812,7 @@ public: CHARSET_INFO *charset; ulong ulong_value; ulonglong ulonglong_value; + handlerton *hton; DATE_TIME_FORMAT *date_time_format; Time_zone *time_zone; } save_result; @@ -891,6 +925,7 @@ public: /* updated in sql_acl.cc */ +extern sys_var_thd_bool sys_old_alter_table; extern sys_var_thd_bool sys_old_passwords; extern LEX_STRING default_key_cache_base; diff --git a/sql/share/charsets/Index.xml b/sql/share/charsets/Index.xml index 32fd1618a8b..6db14f22264 100644 --- a/sql/share/charsets/Index.xml +++ b/sql/share/charsets/Index.xml @@ -1,6 +1,6 @@ <?xml version='1.0' encoding="utf-8"?> -<charsets max-id="98"> +<charsets max-id="99"> <copyright> Copyright (C) 2003 MySQL AB @@ -370,6 +370,9 @@ To make maintaining easier please: <collation name="cp1250_croatian_ci" id="44"> <order>Croatian</order> </collation> + <collation name="cp1250_polish_ci" id="99"> + <order>Polish</order> + </collation> <collation name="cp1250_czech_cs" id="34" order="Czech"> <flag>compiled</flag> </collation> diff --git a/sql/share/charsets/cp1250.xml b/sql/share/charsets/cp1250.xml index 1e62e64ad5a..b83d0faeca8 100644 --- a/sql/share/charsets/cp1250.xml +++ b/sql/share/charsets/cp1250.xml @@ -153,6 +153,27 @@ BE BF C0 54 C1 C2 C3 C4 C5 41 5F C6 54 C7 54 6B </map> </collation> +<collation name="cp1250_polish_ci"> +<map> +00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F +10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F +20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F +30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F +40 41 43 44 48 4B 4D 4E 4F 50 52 53 54 56 57 59 +5B 5C 5D 5F 62 64 66 67 68 69 6B 90 91 92 93 94 +95 41 43 44 48 4B 4D 4E 4F 50 52 53 54 56 57 59 +5B 5C 5D 5F 62 64 66 67 68 69 6B 96 97 98 99 9A +9B 9C 9E 9F A0 A1 A2 A3 A4 A5 5F A6 60 62 6B 6C +A7 A8 A9 AA AB AC AD AE AF B0 5F B1 60 62 6B 6C +B2 B3 B4 55 B5 42 B6 B7 B8 B9 5F BA BB BC BD 6D +BE BF C0 55 C1 C2 C3 C4 C5 42 5F C6 54 C7 54 6D +5D 41 41 41 41 54 47 44 44 4B 4C 4B 4B 50 50 48 +48 58 57 5A 59 59 59 C8 5D 64 64 64 64 69 62 5F +5D 41 41 41 41 54 47 44 44 4B 4C 4B 4B 50 50 48 +48 58 57 5A 59 59 59 C9 5D 64 64 64 64 69 62 FF +</map> +</collation> + <collation name="cp1250_czech_ci"/> <collation name="cp1250_bin" flag="binary"/> diff --git a/sql/share/errmsg.txt b/sql/share/errmsg.txt index 185b4326c5c..12123001463 100644 --- a/sql/share/errmsg.txt +++ b/sql/share/errmsg.txt @@ -3006,7 +3006,7 @@ ER_CANT_FIND_DL_ENTRY cze "Nemohu naj-Bít funkci '%-.64s' v knihovnì" dan "Kan ikke finde funktionen '%-.64s' i bibliotek" nla "Kan functie '%-.64s' niet in library vinden" - eng "Can't find function '%-.64s' in library" + eng "Can't find symbol '%-.64s' in library" jps "function '%-.64s' ‚ðƒ‰ƒCƒuƒ‰ƒŠ[’†‚ÉŒ©•t‚¯‚鎖‚ª‚Å‚«‚Ü‚¹‚ñ", est "Ei leia funktsiooni '%-.64s' antud teegis" fre "Impossible de trouver la fonction '%-.64s' dans la bibliothèque" @@ -3018,7 +3018,7 @@ ER_CANT_FIND_DL_ENTRY kor "¶óÀ̹ö·¯¸®¿¡¼ '%-.64s' ÇÔ¼ö¸¦ ãÀ» ¼ö ¾ø½À´Ï´Ù." por "Não pode encontrar a função '%-.64s' na biblioteca" rum "Nu pot gasi functia '%-.64s' in libraria" - rus "îÅ×ÏÚÍÏÖÎÏ ÏÔÙÓËÁÔØ ÆÕÎËÃÉÀ '%-.64s' × ÂÉÂÌÉÏÔÅËÅ" + rus "îÅ×ÏÚÍÏÖÎÏ ÏÔÙÓËÁÔØ ÓÉÍ×ÏÌ '%-.64s' × ÂÉÂÌÉÏÔÅËÅ" serbian "Ne mogu da pronadjem funkciju '%-.64s' u biblioteci" slo "Nemô¾em nájs» funkciu '%-.64s' v kni¾nici" spa "No puedo encontrar función '%-.64s' en libraria" @@ -4898,10 +4898,7 @@ ER_WARN_NULL_TO_NOTNULL 22004 por "Dado truncado, NULL fornecido para NOT NULL coluna '%s' na linha %ld" spa "Datos truncado, NULL suministrado para NOT NULL columna '%s' en la línea %ld" ER_WARN_DATA_OUT_OF_RANGE 22003 - eng "Out of range value adjusted for column '%s' at row %ld" - ger "Daten abgeschnitten, außerhalb des Wertebereichs für Feld '%s' in Zeile %ld" - por "Dado truncado, fora de alcance para coluna '%s' na linha %ld" - spa "Datos truncados, fuera de gama para columna '%s' en la línea %ld" + eng "Out of range value for column '%s' at row %ld" WARN_DATA_TRUNCATED 01000 eng "Data truncated for column '%s' at row %ld" ger "Daten abgeschnitten für Feld '%s' in Zeile %ld" @@ -5592,7 +5589,7 @@ ER_NO_REFERENCED_ROW_2 23000 eng "Cannot add or update a child row: a foreign key constraint fails (%.192s)" ger "Kann Kind-Zeile nicht hinzufügen oder aktualisieren: eine Fremdschlüsselbedingung schlägt fehl (%.192s)" ER_SP_BAD_VAR_SHADOW 42000 - eng "Variable '%-.64s' must be quoted with `...`, or renamed" + eng "Variable '%-.64s' must be quoted with `...`, or renamed" ger "Variable '%-.64s' muss mit `...` geschützt oder aber umbenannt werden" ER_TRG_NO_DEFINER eng "No definer attribute for trigger '%-.64s'.'%-.64s'. The trigger will be activated under the authorization of the invoker, which may have insufficient privileges. Please recreate the trigger." @@ -5605,3 +5602,134 @@ ER_SP_RECURSION_LIMIT ger "Rekursionsgrenze %d (durch Variable max_sp_recursion_depth gegeben) wurde für Routine %.64s überschritten" ER_SP_PROC_TABLE_CORRUPT eng "Failed to load routine %s. The table mysql.proc is missing, corrupt, or contains bad data (internal code %d)" +ER_PARTITION_REQUIRES_VALUES_ERROR + eng "%s PARTITIONING requires definition of VALUES %s for each partition" + swe "%s PARTITIONering kräver definition av VALUES %s för varje partition" +ER_PARTITION_WRONG_VALUES_ERROR + eng "Only %s PARTITIONING can use VALUES %s in partition definition" + swe "Endast %s partitionering kan använda VALUES %s i definition av partitionen" +ER_PARTITION_MAXVALUE_ERROR + eng "MAXVALUE can only be used in last partition definition" + swe "MAXVALUE kan bara användas i definitionen av den sista partitionen" +ER_PARTITION_SUBPARTITION_ERROR + eng "Subpartitions can only be hash partitions and by key" + swe "Subpartitioner kan bara vara hash och key partitioner" +ER_PARTITION_WRONG_NO_PART_ERROR + eng "Wrong number of partitions defined, mismatch with previous setting" + swe "Antal partitioner definierade och antal partitioner är inte lika" +ER_PARTITION_WRONG_NO_SUBPART_ERROR + eng "Wrong number of subpartitions defined, mismatch with previous setting" + swe "Antal subpartitioner definierade och antal subpartitioner är inte lika" +ER_CONST_EXPR_IN_PARTITION_FUNC_ERROR + eng "Constant/Random expression in (sub)partitioning function is not allowed" + swe "Konstanta uttryck eller slumpmässiga uttryck är inte tillåtna (sub)partitioneringsfunktioner" +ER_NO_CONST_EXPR_IN_RANGE_OR_LIST_ERROR + eng "Expression in RANGE/LIST VALUES must be constant" + swe "Uttryck i RANGE/LIST VALUES måste vara ett konstant uttryck" +ER_FIELD_NOT_FOUND_PART_ERROR + eng "Field in list of fields for partition function not found in table" + swe "Fält i listan av fält för partitionering med key inte funnen i tabellen" +ER_LIST_OF_FIELDS_ONLY_IN_HASH_ERROR + eng "List of fields is only allowed in KEY partitions" + swe "En lista av fält är endast tillåtet för KEY partitioner" +ER_INCONSISTENT_PARTITION_INFO_ERROR + eng "The partition info in the frm file is not consistent with what can be written into the frm file" + swe "Partitioneringsinformationen i frm-filen är inte konsistent med vad som kan skrivas i frm-filen" +ER_PARTITION_FUNC_NOT_ALLOWED_ERROR + eng "The %s function returns the wrong type" + swe "%s-funktionen returnerar felaktig typ" +ER_PARTITIONS_MUST_BE_DEFINED_ERROR + eng "For %s partitions each partition must be defined" + swe "För %s partitionering så måste varje partition definieras" +ER_RANGE_NOT_INCREASING_ERROR + eng "VALUES LESS THAN value must be strictly increasing for each partition" + swe "Värden i VALUES LESS THAN måste vara strikt växande för varje partition" +ER_INCONSISTENT_TYPE_OF_FUNCTIONS_ERROR + eng "VALUES value must be of same type as partition function" + swe "Värden i VALUES måste vara av samma typ som partitioneringsfunktionen" +ER_MULTIPLE_DEF_CONST_IN_LIST_PART_ERROR + eng "Multiple definition of same constant in list partitioning" + swe "Multipel definition av samma konstant i list partitionering" +ER_PARTITION_ENTRY_ERROR + eng "Partitioning can not be used stand-alone in query" + swe "Partitioneringssyntax kan inte användas på egen hand i en SQL-fråga" +ER_MIX_HANDLER_ERROR + eng "The mix of handlers in the partitions is not allowed in this version of MySQL" + swe "Denna mix av lagringsmotorer är inte tillåten i denna version av MySQL" +ER_PARTITION_NOT_DEFINED_ERROR + eng "For the partitioned engine it is necessary to define all %s" + swe "För partitioneringsmotorn så är det nödvändigt att definiera alla %s" +ER_TOO_MANY_PARTITIONS_ERROR + eng "Too many partitions were defined" + swe "För många partitioner definierades" +ER_SUBPARTITION_ERROR + eng "It is only possible to mix RANGE/LIST partitioning with HASH/KEY partitioning for subpartitioning" + swe "Det är endast möjligt att blanda RANGE/LIST partitionering med HASH/KEY partitionering för subpartitionering" +ER_CANT_CREATE_HANDLER_FILE + eng "Failed to create specific handler file" + swe "Misslyckades med att skapa specifik fil i lagringsmotor" +ER_BLOB_FIELD_IN_PART_FUNC_ERROR + eng "A BLOB field is not allowed in partition function" + swe "Ett BLOB-fält är inte tillåtet i partitioneringsfunktioner" +ER_CHAR_SET_IN_PART_FIELD_ERROR + eng "VARCHAR only allowed if binary collation for partition functions" + swe "VARCHAR endast tillåten med binär collation för partitioneringsfunktion" +ER_UNIQUE_KEY_NEED_ALL_FIELDS_IN_PF + eng "A %s need to include all fields in the partition function" + swe "En %s behöver inkludera alla fält i partitioneringsfunktionen för denna lagringsmotor" +ER_NO_PARTS_ERROR + eng "Number of %s = 0 is not an allowed value" + swe "Antal %s = 0 är inte ett tillåten värde" +ER_PARTITION_MGMT_ON_NONPARTITIONED + eng "Partition management on a not partitioned table is not possible" + swe "Partitioneringskommando på en opartitionerad tabell är inte möjligt" +ER_DROP_PARTITION_NON_EXISTENT + eng "Error in list of partitions to change" + swe "Fel i listan av partitioner att förändra" +ER_DROP_LAST_PARTITION + eng "Cannot remove all partitions, use DROP TABLE instead" + swe "Det är inte tillåtet att ta bort alla partitioner, använd DROP TABLE istället" +ER_COALESCE_ONLY_ON_HASH_PARTITION + eng "COALESCE PARTITION can only be used on HASH/KEY partitions" + swe "COALESCE PARTITION kan bara användas på HASH/KEY partitioner" +ER_ONLY_ON_RANGE_LIST_PARTITION + eng "%s PARTITION can only be used on RANGE/LIST partitions" + swe "%s PARTITION kan bara användas på RANGE/LIST-partitioner" +ER_ADD_PARTITION_SUBPART_ERROR + eng "Trying to Add partition(s) with wrong number of subpartitions" + swe "ADD PARTITION med fel antal subpartitioner" +ER_ADD_PARTITION_NO_NEW_PARTITION + eng "At least one partition must be added" + swe "Åtminstone en partition måste läggas till vid ADD PARTITION" +ER_COALESCE_PARTITION_NO_PARTITION + eng "At least one partition must be coalesced" + swe "Åtminstone en partition måste slås ihop vid COALESCE PARTITION" +ER_REORG_PARTITION_NOT_EXIST + eng "More partitions to reorganise than there are partitions" + swe "Fler partitioner att reorganisera än det finns partitioner" +ER_SAME_NAME_PARTITION + eng "All partitions must have unique names in the table" + swe "Alla partitioner i tabellen måste ha unika namn" +ER_CONSECUTIVE_REORG_PARTITIONS + eng "When reorganising a set of partitions they must be in consecutive order" + swe "När ett antal partitioner omorganiseras måste de vara i konsekutiv ordning" +ER_REORG_OUTSIDE_RANGE + eng "The new partitions cover a bigger range then the reorganised partitions do" + swe "De nya partitionerna täcker ett större intervall än de omorganiserade partitionerna" +ER_DROP_PARTITION_FAILURE + eng "Drop partition not supported in this version for this handler" +ER_DROP_PARTITION_WHEN_FK_DEFINED + eng "Cannot drop a partition when a foreign key constraint is defined on the table" + swe "Kan inte ta bort en partition när en främmande nyckel är definierad på tabellen" +ER_PLUGIN_IS_NOT_LOADED + eng "Plugin '%-.64s' is not loaded" +ER_WRONG_VALUE + eng "Incorrect %-.32s value: '%-.128s'" +ER_NO_PARTITION_FOR_GIVEN_VALUE + eng "Table has no partition for value %ld" +ER_BINLOG_ROW_LOGGING_FAILED + eng "Writing one row to the row-based binary log failed" +ER_BINLOG_ROW_WRONG_TABLE_DEF + eng "Table definition on master and slave does not match" +ER_BINLOG_ROW_RBR_TO_SBR + eng "Slave running with --log-slave-updates must use row-based binary logging to be able to replicate row-based binary log events" diff --git a/sql/slave.cc b/sql/slave.cc index 5e1c838730c..99bddb7b9b0 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -16,17 +16,21 @@ #include "mysql_priv.h" -#ifdef HAVE_REPLICATION - #include <mysql.h> #include <myisam.h> +#include "rpl_rli.h" #include "slave.h" #include "sql_repl.h" +#include "rpl_filter.h" #include "repl_failsafe.h" #include <thr_alarm.h> #include <my_dir.h> #include <sql_common.h> +#ifdef HAVE_REPLICATION + +#include "rpl_tblmap.h" + #define MAX_SLAVE_RETRY_PAUSE 5 bool use_slave_mask = 0; MY_BITMAP slave_error_mask; @@ -36,11 +40,6 @@ typedef bool (*CHECK_KILLED_FUNC)(THD*,void*); volatile bool slave_sql_running = 0, slave_io_running = 0; char* slave_load_tmpdir = 0; MASTER_INFO *active_mi; -HASH replicate_do_table, replicate_ignore_table; -DYNAMIC_ARRAY replicate_wild_do_table, replicate_wild_ignore_table; -bool do_table_inited = 0, ignore_table_inited = 0; -bool wild_do_table_inited = 0, wild_ignore_table_inited = 0; -bool table_rules_on= 0; my_bool replicate_same_server_id; ulonglong relay_log_space_limit = 0; @@ -52,8 +51,6 @@ ulonglong relay_log_space_limit = 0; */ int disconnect_slave_event_count = 0, abort_slave_event_count = 0; -int events_till_abort = -1; -static int events_till_disconnect = -1; typedef enum { SLAVE_THD_IO, SLAVE_THD_SQL} SLAVE_THD_TYPE; @@ -195,20 +192,6 @@ err: } -static void free_table_ent(TABLE_RULE_ENT* e) -{ - my_free((gptr) e, MYF(0)); -} - - -static byte* get_table_key(TABLE_RULE_ENT* e, uint* len, - my_bool not_used __attribute__((unused))) -{ - *len = e->key_len; - return (byte*)e->db; -} - - /* Open the given relay log @@ -531,7 +514,7 @@ void st_relay_log_info::close_temporary_tables() Don't ask for disk deletion. For now, anyway they will be deleted when slave restarts, but it is a better intention to not delete them. */ - close_temporary(table, 0); + close_temporary(table, 1, 0); } save_temporary_tables= 0; slave_open_temp_tables= 0; @@ -824,237 +807,6 @@ int start_slave_threads(bool need_slave_mutex, bool wait_for_start, } -void init_table_rule_hash(HASH* h, bool* h_inited) -{ - hash_init(h, system_charset_info,TABLE_RULE_HASH_SIZE,0,0, - (hash_get_key) get_table_key, - (hash_free_key) free_table_ent, 0); - *h_inited = 1; -} - - -void init_table_rule_array(DYNAMIC_ARRAY* a, bool* a_inited) -{ - my_init_dynamic_array(a, sizeof(TABLE_RULE_ENT*), TABLE_RULE_ARR_SIZE, - TABLE_RULE_ARR_SIZE); - *a_inited = 1; -} - - -static TABLE_RULE_ENT* find_wild(DYNAMIC_ARRAY *a, const char* key, int len) -{ - uint i; - const char* key_end = key + len; - - for (i = 0; i < a->elements; i++) - { - TABLE_RULE_ENT* e ; - get_dynamic(a, (gptr)&e, i); - if (!my_wildcmp(system_charset_info, key, key_end, - (const char*)e->db, - (const char*)(e->db + e->key_len), - '\\',wild_one,wild_many)) - return e; - } - - return 0; -} - - -/* - Checks whether tables match some (wild_)do_table and (wild_)ignore_table - rules (for replication) - - SYNOPSIS - tables_ok() - thd thread (SQL slave thread normally). Mustn't be null. - tables list of tables to check - - NOTES - Note that changing the order of the tables in the list can lead to - different results. Note also the order of precedence of the do/ignore - rules (see code below). For that reason, users should not set conflicting - rules because they may get unpredicted results (precedence order is - explained in the manual). - - Thought which arose from a question of a big customer "I want to include - all tables like "abc.%" except the "%.EFG"". This can't be done now. If we - supported Perl regexps we could do it with this pattern: /^abc\.(?!EFG)/ - (I could not find an equivalent in the regex library MySQL uses). - - RETURN VALUES - 0 should not be logged/replicated - 1 should be logged/replicated -*/ - -bool tables_ok(THD* thd, TABLE_LIST* tables) -{ - bool some_tables_updating= 0; - DBUG_ENTER("tables_ok"); - - /* - In routine, can't reliably pick and choose substatements, so always - replicate. - We can't reliably know if one substatement should be executed or not: - consider the case of this substatement: a SELECT on a non-replicated - constant table; if we don't execute it maybe it was going to fill a - variable which was going to be used by the next substatement to update - a replicated table? If we execute it maybe the constant non-replicated - table does not exist (and so we'll fail) while there was no need to - execute this as this SELECT does not influence replicated tables in the - rest of the routine? In other words: users are used to replicate-*-table - specifying how to handle updates to tables, these options don't say - anything about reads to tables; we can't guess. - */ - if (thd->spcont) - DBUG_RETURN(1); - - for (; tables; tables= tables->next_global) - { - char hash_key[2*NAME_LEN+2]; - char *end; - uint len; - - if (!tables->updating) - continue; - some_tables_updating= 1; - end= strmov(hash_key, tables->db ? tables->db : thd->db); - *end++= '.'; - len= (uint) (strmov(end, tables->table_name) - hash_key); - if (do_table_inited) // if there are any do's - { - if (hash_search(&replicate_do_table, (byte*) hash_key, len)) - DBUG_RETURN(1); - } - if (ignore_table_inited) // if there are any ignores - { - if (hash_search(&replicate_ignore_table, (byte*) hash_key, len)) - DBUG_RETURN(0); - } - if (wild_do_table_inited && find_wild(&replicate_wild_do_table, - hash_key, len)) - DBUG_RETURN(1); - if (wild_ignore_table_inited && find_wild(&replicate_wild_ignore_table, - hash_key, len)) - DBUG_RETURN(0); - } - - /* - If no table was to be updated, ignore statement (no reason we play it on - slave, slave is supposed to replicate _changes_ only). - If no explicit rule found and there was a do list, do not replicate. - If there was no do list, go ahead - */ - DBUG_RETURN(some_tables_updating && - !do_table_inited && !wild_do_table_inited); -} - - -/* - Checks whether a db matches wild_do_table and wild_ignore_table - rules (for replication) - - SYNOPSIS - db_ok_with_wild_table() - db name of the db to check. - Is tested with check_db_name() before calling this function. - - NOTES - Here is the reason for this function. - We advise users who want to exclude a database 'db1' safely to do it - with replicate_wild_ignore_table='db1.%' instead of binlog_ignore_db or - replicate_ignore_db because the two lasts only check for the selected db, - which won't work in that case: - USE db2; - UPDATE db1.t SET ... #this will be replicated and should not - whereas replicate_wild_ignore_table will work in all cases. - With replicate_wild_ignore_table, we only check tables. When - one does 'DROP DATABASE db1', tables are not involved and the - statement will be replicated, while users could expect it would not (as it - rougly means 'DROP db1.first_table, DROP db1.second_table...'). - In other words, we want to interpret 'db1.%' as "everything touching db1". - That is why we want to match 'db1' against 'db1.%' wild table rules. - - RETURN VALUES - 0 should not be logged/replicated - 1 should be logged/replicated - */ - -int db_ok_with_wild_table(const char *db) -{ - char hash_key[NAME_LEN+2]; - char *end; - int len; - end= strmov(hash_key, db); - *end++= '.'; - len= end - hash_key ; - if (wild_do_table_inited && find_wild(&replicate_wild_do_table, - hash_key, len)) - return 1; - if (wild_ignore_table_inited && find_wild(&replicate_wild_ignore_table, - hash_key, len)) - return 0; - - /* - If no explicit rule found and there was a do list, do not replicate. - If there was no do list, go ahead - */ - return !wild_do_table_inited; -} - - -int add_table_rule(HASH* h, const char* table_spec) -{ - const char* dot = strchr(table_spec, '.'); - if (!dot) return 1; - // len is always > 0 because we know the there exists a '.' - uint len = (uint)strlen(table_spec); - TABLE_RULE_ENT* e = (TABLE_RULE_ENT*)my_malloc(sizeof(TABLE_RULE_ENT) - + len, MYF(MY_WME)); - if (!e) return 1; - e->db = (char*)e + sizeof(TABLE_RULE_ENT); - e->tbl_name = e->db + (dot - table_spec) + 1; - e->key_len = len; - memcpy(e->db, table_spec, len); - (void)my_hash_insert(h, (byte*)e); - return 0; -} - - -/* - Add table expression with wildcards to dynamic array -*/ - -int add_wild_table_rule(DYNAMIC_ARRAY* a, const char* table_spec) -{ - const char* dot = strchr(table_spec, '.'); - if (!dot) return 1; - uint len = (uint)strlen(table_spec); - TABLE_RULE_ENT* e = (TABLE_RULE_ENT*)my_malloc(sizeof(TABLE_RULE_ENT) - + len, MYF(MY_WME)); - if (!e) return 1; - e->db = (char*)e + sizeof(TABLE_RULE_ENT); - e->tbl_name = e->db + (dot - table_spec) + 1; - e->key_len = len; - memcpy(e->db, table_spec, len); - insert_dynamic(a, (gptr)&e); - return 0; -} - - -static void free_string_array(DYNAMIC_ARRAY *a) -{ - uint i; - for (i = 0; i < a->elements; i++) - { - char* p; - get_dynamic(a, (gptr) &p, i); - my_free(p, MYF(MY_WME)); - } - delete_dynamic(a); -} - - #ifdef NOT_USED_YET static int end_slave_on_walk(MASTER_INFO* mi, gptr /*unused*/) { @@ -1090,14 +842,6 @@ void end_slave() */ terminate_slave_threads(active_mi,SLAVE_FORCE_ALL); end_master_info(active_mi); - if (do_table_inited) - hash_free(&replicate_do_table); - if (ignore_table_inited) - hash_free(&replicate_ignore_table); - if (wild_do_table_inited) - free_string_array(&replicate_wild_do_table); - if (wild_ignore_table_inited) - free_string_array(&replicate_wild_ignore_table); delete active_mi; active_mi= 0; } @@ -1117,19 +861,48 @@ static bool sql_slave_killed(THD* thd, RELAY_LOG_INFO* rli) { DBUG_ASSERT(rli->sql_thd == thd); DBUG_ASSERT(rli->slave_running == 1);// tracking buffer overrun - return rli->abort_slave || abort_loop || thd->killed; + if (abort_loop || thd->killed || rli->abort_slave) + { + /* + If we are in an unsafe situation (stopping could corrupt replication), + we give one minute to the slave SQL thread of grace before really + terminating, in the hope that it will be able to read more events and + the unsafe situation will soon be left. Note that this one minute starts + from the last time anything happened in the slave SQL thread. So it's + really one minute of idleness, we don't timeout if the slave SQL thread + is actively working. + */ + if (!rli->unsafe_to_stop_at) + return 1; + DBUG_PRINT("info", ("Slave SQL thread is in an unsafe situation, giving " + "it some grace period")); + if (difftime(time(0), rli->unsafe_to_stop_at) > 60) + { + slave_print_msg(ERROR_LEVEL, rli, 0, + "SQL thread had to stop in an unsafe situation, in " + "the middle of applying updates to a " + "non-transactional table without any primary key. " + "There is a risk of duplicate updates when the slave " + "SQL thread is restarted. Please check your tables' " + "contents after restart."); + return 1; + } + } + return 0; } /* - Writes an error message to rli->last_slave_error and rli->last_slave_errno - (which will be displayed by SHOW SLAVE STATUS), and prints it to stderr. + Writes a message to stderr, and if it's an error message, to + rli->last_slave_error and rli->last_slave_errno (which will be displayed by + SHOW SLAVE STATUS). SYNOPSIS - slave_print_error() - rli + slave_print_msg() + level The severity level + rli err_code The error code - msg The error message (usually related to the error code, but can + msg The message (usually related to the error code, but can contain more information). ... (this is printf-like format, with % symbols in msg) @@ -1137,22 +910,47 @@ static bool sql_slave_killed(THD* thd, RELAY_LOG_INFO* rli) void */ -void slave_print_error(RELAY_LOG_INFO* rli, int err_code, const char* msg, ...) +void slave_print_msg(enum loglevel level, RELAY_LOG_INFO* rli, + int err_code, const char* msg, ...) { + void (*report_function)(const char *, ...); + char buff[MAX_SLAVE_ERRMSG], *pbuff= buff; + uint pbuffsize= sizeof(buff); va_list args; va_start(args,msg); - my_vsnprintf(rli->last_slave_error, - sizeof(rli->last_slave_error), msg, args); - rli->last_slave_errno = err_code; - /* If the error string ends with '.', do not add a ',' it would be ugly */ - if (rli->last_slave_error[0] && - (*(strend(rli->last_slave_error)-1) == '.')) - sql_print_error("Slave: %s Error_code: %d", rli->last_slave_error, - err_code); + switch (level) + { + case ERROR_LEVEL: + /* + This my_error call only has effect in client threads. + Slave threads do nothing in my_error(). + */ + my_error(ER_UNKNOWN_ERROR, MYF(0), msg); + /* + It's an error, it must be reported in Last_error and Last_errno in SHOW + SLAVE STATUS. + */ + pbuff= rli->last_slave_error; + pbuffsize= sizeof(rli->last_slave_error); + rli->last_slave_errno = err_code; + report_function= sql_print_error; + break; + case WARNING_LEVEL: + report_function= sql_print_warning; + break; + case INFORMATION_LEVEL: + report_function= sql_print_information; + break; + default: + DBUG_ASSERT(0); // should not come here + return; // don't crash production builds, just do nothing + } + my_vsnprintf(pbuff, pbuffsize, msg, args); + /* If the msg string ends with '.', do not add a ',' it would be ugly */ + if (pbuff[0] && (*(strend(pbuff)-1) == '.')) + (*report_function)("Slave: %s Error_code: %d", pbuff, err_code); else - sql_print_error("Slave: %s, Error_code: %d", rli->last_slave_error, - err_code); - + (*report_function)("Slave: %s, Error_code: %d", pbuff, err_code); } /* @@ -1176,25 +974,6 @@ bool net_request_file(NET* net, const char* fname) DBUG_RETURN(net_write_command(net, 251, fname, strlen(fname), "", 0)); } - -const char *rewrite_db(const char* db, uint32 *new_len) -{ - if (replicate_rewrite_db.is_empty() || !db) - return db; - I_List_iterator<i_string_pair> it(replicate_rewrite_db); - i_string_pair* tmp; - - while ((tmp=it++)) - { - if (!strcmp(tmp->key, db)) - { - *new_len= (uint32)strlen(tmp->val); - return tmp->val; - } - } - return db; -} - /* From other comments and tests in code, it looks like sometimes Query_log_event and Load_log_event can have db == 0 @@ -1207,61 +986,6 @@ const char *print_slave_db_safe(const char* db) return (db ? db : ""); } -/* - Checks whether a db matches some do_db and ignore_db rules - (for logging or replication) - - SYNOPSIS - db_ok() - db name of the db to check - do_list either binlog_do_db or replicate_do_db - ignore_list either binlog_ignore_db or replicate_ignore_db - - RETURN VALUES - 0 should not be logged/replicated - 1 should be logged/replicated -*/ - -int db_ok(const char* db, I_List<i_string> &do_list, - I_List<i_string> &ignore_list ) -{ - if (do_list.is_empty() && ignore_list.is_empty()) - return 1; // ok to replicate if the user puts no constraints - - /* - If the user has specified restrictions on which databases to replicate - and db was not selected, do not replicate. - */ - if (!db) - return 0; - - if (!do_list.is_empty()) // if the do's are not empty - { - I_List_iterator<i_string> it(do_list); - i_string* tmp; - - while ((tmp=it++)) - { - if (!strcmp(tmp->ptr, db)) - return 1; // match - } - return 0; - } - else // there are some elements in the don't, otherwise we cannot get here - { - I_List_iterator<i_string> it(ignore_list); - i_string* tmp; - - while ((tmp=it++)) - { - if (!strcmp(tmp->ptr, db)) - return 0; // match - } - return 1; - } -} - - static int init_strvar_from_file(char *var, int max_size, IO_CACHE *f, const char *default_val) { @@ -1625,7 +1349,7 @@ static int create_table_from_dump(THD* thd, MYSQL *mysql, const char* db, error=file->repair(thd,&check_opt) != 0; thd->net.vio = save_vio; if (error) - my_error(ER_INDEX_REBUILD, MYF(0), tables.table->s->table_name); + my_error(ER_INDEX_REBUILD, MYF(0), tables.table->s->table_name.str); err: close_thread_tables(thd); @@ -1708,6 +1432,7 @@ static int init_relay_log_info(RELAY_LOG_INFO* rli, const char* msg = 0; int error = 0; DBUG_ENTER("init_relay_log_info"); + DBUG_ASSERT(!rli->no_storage); // Don't init if there is no storage if (rli->inited) // Set if this function called DBUG_RETURN(0); @@ -2003,7 +1728,7 @@ static void write_ignored_events_info_to_relay_log(THD *thd, MASTER_INFO *mi) if (rli->ign_master_log_name_end[0]) { DBUG_PRINT("info",("writing a Rotate event to track down ignored events")); - Rotate_log_event *ev= new Rotate_log_event(thd, rli->ign_master_log_name_end, + Rotate_log_event *ev= new Rotate_log_event(rli->ign_master_log_name_end, 0, rli->ign_master_log_pos_end, Rotate_log_event::DUP_NAME); rli->ign_master_log_name_end[0]= 0; @@ -2337,48 +2062,6 @@ int register_slave_on_master(MYSQL* mysql) } -/* - Builds a String from a HASH of TABLE_RULE_ENT. Cannot be used for any other - hash, as it assumes that the hash entries are TABLE_RULE_ENT. - - SYNOPSIS - table_rule_ent_hash_to_str() - s pointer to the String to fill - h pointer to the HASH to read - - RETURN VALUES - none -*/ - -void table_rule_ent_hash_to_str(String* s, HASH* h) -{ - s->length(0); - for (uint i=0 ; i < h->records ; i++) - { - TABLE_RULE_ENT* e= (TABLE_RULE_ENT*) hash_element(h, i); - if (s->length()) - s->append(','); - s->append(e->db,e->key_len); - } -} - -/* - Mostly the same thing as above -*/ - -void table_rule_ent_dynamic_array_to_str(String* s, DYNAMIC_ARRAY* a) -{ - s->length(0); - for (uint i=0 ; i < a->elements ; i++) - { - TABLE_RULE_ENT* e; - get_dynamic(a, (gptr)&e, i); - if (s->length()) - s->append(','); - s->append(e->db,e->key_len); - } -} - bool show_master_info(THD* thd, MASTER_INFO* mi) { // TODO: fix this for multi-master @@ -2474,23 +2157,18 @@ bool show_master_info(THD* thd, MASTER_INFO* mi) protocol->store(mi->slave_running == MYSQL_SLAVE_RUN_CONNECT ? "Yes" : "No", &my_charset_bin); protocol->store(mi->rli.slave_running ? "Yes":"No", &my_charset_bin); - protocol->store(&replicate_do_db); - protocol->store(&replicate_ignore_db); - /* - We can't directly use some protocol->store for - replicate_*_table, - as Protocol doesn't know the TABLE_RULE_ENT struct. - We first build Strings and then pass them to protocol->store. - */ + protocol->store(rpl_filter->get_do_db()); + protocol->store(rpl_filter->get_ignore_db()); + char buf[256]; String tmp(buf, sizeof(buf), &my_charset_bin); - table_rule_ent_hash_to_str(&tmp, &replicate_do_table); + rpl_filter->get_do_table(&tmp); protocol->store(&tmp); - table_rule_ent_hash_to_str(&tmp, &replicate_ignore_table); + rpl_filter->get_ignore_table(&tmp); protocol->store(&tmp); - table_rule_ent_dynamic_array_to_str(&tmp, &replicate_wild_do_table); + rpl_filter->get_wild_do_table(&tmp); protocol->store(&tmp); - table_rule_ent_dynamic_array_to_str(&tmp, &replicate_wild_ignore_table); + rpl_filter->get_wild_ignore_table(&tmp); protocol->store(&tmp); protocol->store((uint32) mi->rli.last_slave_errno); @@ -2617,17 +2295,17 @@ bool flush_master_info(MASTER_INFO* mi, bool flush_relay_log_cache) st_relay_log_info::st_relay_log_info() - :info_fd(-1), cur_log_fd(-1), save_temporary_tables(0), + :no_storage(FALSE), info_fd(-1), cur_log_fd(-1), save_temporary_tables(0), cur_log_old_open_count(0), group_master_log_pos(0), log_space_total(0), ignore_log_space_limit(0), last_master_timestamp(0), slave_skip_counter(0), abort_pos_wait(0), slave_run_id(0), sql_thd(0), last_slave_errno(0), inited(0), abort_slave(0), slave_running(0), until_condition(UNTIL_NONE), - until_log_pos(0), retried_trans(0) + until_log_pos(0), retried_trans(0), m_reload_flags(RELOAD_NONE_F), + unsafe_to_stop_at(0) { group_relay_log_name[0]= event_relay_log_name[0]= group_master_log_name[0]= 0; last_slave_error[0]= until_log_name[0]= ign_master_log_name_end[0]= 0; - bzero((char*) &info_file, sizeof(info_file)); bzero((char*) &cache_buf, sizeof(cache_buf)); cached_charset_invalidate(); @@ -3047,11 +2725,9 @@ static ulong read_event(MYSQL* mysql, MASTER_INFO *mi, bool* suppress_warnings) /* my_real_read() will time us out We check if we were told to die, and if not, try reading again - - TODO: Move 'events_till_disconnect' to the MASTER_INFO structure */ #ifndef DBUG_OFF - if (disconnect_slave_event_count && !(events_till_disconnect--)) + if (disconnect_slave_event_count && !(mi->events_till_disconnect--)) return packet_error; #endif @@ -3326,7 +3002,7 @@ static int exec_relay_log_event(THD* thd, RELAY_LOG_INFO* rli) thd->lex->current_select= 0; if (!ev->when) ev->when = time(NULL); - ev->thd = thd; + ev->thd = thd; // because up to this point, ev->thd == 0 exec_res = ev->exec_event(rli); DBUG_ASSERT(rli->sql_thd==thd); /* @@ -3398,7 +3074,7 @@ static int exec_relay_log_event(THD* thd, RELAY_LOG_INFO* rli) else { pthread_mutex_unlock(&rli->data_lock); - slave_print_error(rli, 0, "\ + slave_print_msg(ERROR_LEVEL, rli, 0, "\ Could not parse relay log event entry. The possible reasons are: the master's \ binary log is corrupted (you can check this by running 'mysqlbinlog' on the \ binary log), the slave's relay log is corrupted (you can check this by running \ @@ -3427,9 +3103,6 @@ pthread_handler_t handle_slave_io(void *arg) my_thread_init(); DBUG_ENTER("handle_slave_io"); -#ifndef DBUG_OFF -slave_begin: -#endif DBUG_ASSERT(mi->inited); mysql= NULL ; retry_count= 0; @@ -3439,7 +3112,7 @@ slave_begin: mi->slave_run_id++; #ifndef DBUG_OFF - mi->events_till_abort = abort_slave_event_count; + mi->events_till_disconnect = disconnect_slave_event_count; #endif thd= new THD; // note that contructor of THD uses DBUG_ ! @@ -3677,14 +3350,6 @@ ignore_log_space_limit=%d", log space"); goto err; } - // TODO: check debugging abort code -#ifndef DBUG_OFF - if (abort_slave_event_count && !--events_till_abort) - { - sql_print_error("Slave I/O thread: debugging abort"); - goto err; - } -#endif } } @@ -3723,10 +3388,6 @@ err: pthread_mutex_unlock(&LOCK_thread_count); pthread_cond_broadcast(&mi->stop_cond); // tell the world we are done pthread_mutex_unlock(&mi->run_lock); -#ifndef DBUG_OFF - if (abort_slave_event_count && !events_till_abort) - goto slave_begin; -#endif my_thread_end(); pthread_exit(0); DBUG_RETURN(0); // Can't return anything here @@ -3746,10 +3407,6 @@ pthread_handler_t handle_slave_sql(void *arg) my_thread_init(); DBUG_ENTER("handle_slave_sql"); -#ifndef DBUG_OFF -slave_begin: -#endif - DBUG_ASSERT(rli->inited); pthread_mutex_lock(&rli->run_lock); DBUG_ASSERT(!rli->slave_running); @@ -3896,6 +3553,14 @@ the slave SQL thread with \"SLAVE START\". We stopped at log \ RPL_LOG_NAME, llstr(rli->group_master_log_pos,llbuff)); err: + + /* + Some events set some playgrounds, which won't be cleared because thread + stops. Stopping of this thread may not be known to these events ("stop" + request is detected only by the present function, not by events), so we + must "proactively" clear playgrounds: + */ + rli->cleanup_context(thd, 1); VOID(pthread_mutex_lock(&LOCK_thread_count)); /* Some extra safety, which should not been needed (normally, event deletion @@ -3941,10 +3606,6 @@ the slave SQL thread with \"SLAVE START\". We stopped at log \ pthread_cond_broadcast(&rli->stop_cond); // tell the world we are done pthread_mutex_unlock(&rli->run_lock); -#ifndef DBUG_OFF // TODO: reconsider the code below - if (abort_slave_event_count && !rli->events_till_abort) - goto slave_begin; -#endif my_thread_end(); pthread_exit(0); DBUG_RETURN(0); // Can't return anything here @@ -3966,10 +3627,8 @@ static int process_io_create_file(MASTER_INFO* mi, Create_file_log_event* cev) if (unlikely(!cev->is_valid())) DBUG_RETURN(1); - /* - TODO: fix to honor table rules, not only db rules - */ - if (!db_ok(cev->db, replicate_do_db, replicate_ignore_db)) + + if (!rpl_filter->db_ok(cev->db)) { skip_load_data_infile(net); DBUG_RETURN(0); @@ -4099,7 +3758,7 @@ static int process_io_rotate(MASTER_INFO *mi, Rotate_log_event *rev) rotate event forever, so we need to not disconnect after one. */ if (disconnect_slave_event_count) - events_till_disconnect++; + mi->events_till_disconnect++; #endif /* @@ -4555,7 +4214,7 @@ static int connect_to_master(THD* thd, MYSQL* mysql, MASTER_INFO* mi, DBUG_ENTER("connect_to_master"); #ifndef DBUG_OFF - events_till_disconnect = disconnect_slave_event_count; + mi->events_till_disconnect = disconnect_slave_event_count; #endif ulong client_flag= CLIENT_REMEMBER_OPTIONS; if (opt_slave_compressed_protocol) @@ -4689,6 +4348,10 @@ static int safe_reconnect(THD* thd, MYSQL* mysql, MASTER_INFO* mi, bool flush_relay_log_info(RELAY_LOG_INFO* rli) { bool error=0; + + if (unlikely(rli->no_storage)) + return 0; + IO_CACHE *file = &rli->info_file; char buff[FN_REFLEN*2+22*2+4], *pos; @@ -4705,6 +4368,7 @@ bool flush_relay_log_info(RELAY_LOG_INFO* rli) error=1; if (flush_io_cache(file)) error=1; + /* Flushing the relay log is done by the slave I/O thread */ return error; } @@ -4735,7 +4399,7 @@ static IO_CACHE *reopen_relay_log(RELAY_LOG_INFO *rli, const char **errmsg) } -Log_event* next_event(RELAY_LOG_INFO* rli) +static Log_event* next_event(RELAY_LOG_INFO* rli) { Log_event* ev; IO_CACHE* cur_log = rli->cur_log; @@ -4746,6 +4410,11 @@ Log_event* next_event(RELAY_LOG_INFO* rli) DBUG_ENTER("next_event"); DBUG_ASSERT(thd != 0); +#ifndef DBUG_OFF + if (abort_slave_event_count && !rli->events_till_abort--) + DBUG_RETURN(0); +#endif + /* For most operations we need to protect rli members with data_lock, so we assume calling function acquired this mutex for us and we will @@ -4867,7 +4536,7 @@ Log_event* next_event(RELAY_LOG_INFO* rli) { /* We generate and return a Rotate, to make our positions advance */ DBUG_PRINT("info",("seeing an ignored end segment")); - ev= new Rotate_log_event(thd, rli->ign_master_log_name_end, + ev= new Rotate_log_event(rli->ign_master_log_name_end, 0, rli->ign_master_log_pos_end, Rotate_log_event::DUP_NAME); rli->ign_master_log_name_end[0]= 0; @@ -5115,11 +4784,114 @@ end: DBUG_VOID_RETURN; } +/* + Some system tables needed to be re-read by the MySQL server after it has + updated them; in statement-based replication, the GRANT and other commands + are sent verbatim to the slave which then reloads; in row-based replication, + changes to these tables are done through ordinary Rows binlog events, so + master must add some flag for the slave to know it has to reload the tables. +*/ +struct st_reload_entry +{ + char const *table; + st_relay_log_info::enum_reload_flag flag; +}; + +/* + Sorted array of table names, please keep it sorted since we are + using bsearch() on it below. + */ +static st_reload_entry s_mysql_tables[] = +{ + { "columns_priv", st_relay_log_info::RELOAD_GRANT_F }, + { "db", st_relay_log_info::RELOAD_ACCESS_F }, + { "host", st_relay_log_info::RELOAD_ACCESS_F }, + { "procs_priv", st_relay_log_info::RELOAD_GRANT_F }, + { "tables_priv", st_relay_log_info::RELOAD_GRANT_F }, + { "user", st_relay_log_info::RELOAD_ACCESS_F } +}; + +static const my_size_t s_mysql_tables_size = + sizeof(s_mysql_tables)/sizeof(*s_mysql_tables); + +static int reload_entry_compare(const void *lhs, const void *rhs) +{ + const char *lstr = static_cast<const char *>(lhs); + const char *rstr = static_cast<const st_reload_entry*>(rhs)->table; + return strcmp(lstr, rstr); +} + +void st_relay_log_info::touching_table(char const* db, char const* table, + ulong table_id) +{ + if (strcmp(db,"mysql") == 0) + { +#if defined(HAVE_BSEARCH) && defined(HAVE_SIZE_T) + void *const ptr= bsearch(table, s_mysql_tables, + s_mysql_tables_size, + sizeof(*s_mysql_tables), reload_entry_compare); + st_reload_entry const *const entry= static_cast<st_reload_entry*>(ptr); +#else + /* + Fall back to full scan, there are few rows anyway and updating the + "mysql" database is rare. + */ + st_reload_entry const *entry= s_mysql_tables; + for ( ; entry < s_mysql_tables + s_mysql_tables_size ; entry++) + if (reload_entry_compare(table, entry) == 0) + break; +#endif + if (entry) + m_reload_flags|= entry->flag; + } +} + +void st_relay_log_info::transaction_end(THD* thd) +{ + if (m_reload_flags != RELOAD_NONE_F) + { + if (m_reload_flags & RELOAD_ACCESS_F) + acl_reload(thd); + + if (m_reload_flags & RELOAD_GRANT_F) + grant_reload(thd); + + m_reload_flags= RELOAD_NONE_F; + } +} + +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) +void st_relay_log_info::cleanup_context(THD *thd, bool error) +{ + DBUG_ASSERT(sql_thd == thd); + /* + 1) Instances of Table_map_log_event, if ::exec_event() was called on them, + may have opened tables, which we cannot be sure have been closed (because + maybe the Rows_log_event have not been found or will not be, because slave + SQL thread is stopping, or relay log has a missing tail etc). So we close + all thread's tables. And so the table mappings have to be cancelled. + 2) Rows_log_event::exec_event() may even have started statements or + transactions on them, which we need to rollback in case of error. + 3) If finding a Format_description_log_event after a BEGIN, we also need + to rollback before continuing with the next events. + 4) so we need this "context cleanup" function. + */ + if (error) + { + ha_autocommit_or_rollback(thd, 1); // if a "statement transaction" + end_trans(thd, ROLLBACK); // if a "real transaction" + } + m_table_map.clear_tables(); + close_thread_tables(thd); + unsafe_to_stop_at= 0; +} +#endif + #ifdef HAVE_EXPLICIT_TEMPLATE_INSTANTIATION template class I_List_iterator<i_string>; template class I_List_iterator<i_string_pair>; #endif - #endif /* HAVE_REPLICATION */ + diff --git a/sql/slave.h b/sql/slave.h index 4d3c338680d..6870aaca752 100644 --- a/sql/slave.h +++ b/sql/slave.h @@ -14,15 +14,19 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#ifdef HAVE_REPLICATION - #ifndef SLAVE_H #define SLAVE_H -#include "mysql.h" +#ifdef HAVE_REPLICATION + +#include "log.h" #include "my_list.h" +#include "rpl_filter.h" +#include "rpl_tblmap.h" +#include "rpl_rli.h" + #define SLAVE_NET_TIMEOUT 3600 -#define MAX_SLAVE_ERRMSG 1024 + #define MAX_SLAVE_ERROR 2000 /***************************************************************************** @@ -108,265 +112,7 @@ struct st_master_info; #define MYSQL_SLAVE_RUN_NOT_CONNECT 1 #define MYSQL_SLAVE_RUN_CONNECT 2 -/**************************************************************************** - - Replication SQL Thread - - st_relay_log_info contains: - - the current relay log - - the current relay log offset - - master log name - - master log sequence corresponding to the last update - - misc information specific to the SQL thread - - st_relay_log_info is initialized from the slave.info file if such exists. - Otherwise, data members are intialized with defaults. The initialization is - done with init_relay_log_info() call. - - The format of slave.info file: - - relay_log_name - relay_log_pos - master_log_name - master_log_pos - - To clean up, call end_relay_log_info() - -*****************************************************************************/ - -typedef struct st_relay_log_info -{ - /*** The following variables can only be read when protect by data lock ****/ - - /* - info_fd - file descriptor of the info file. set only during - initialization or clean up - safe to read anytime - cur_log_fd - file descriptor of the current read relay log - */ - File info_fd,cur_log_fd; - - /* - Protected with internal locks. - Must get data_lock when resetting the logs. - */ - MYSQL_LOG relay_log; - LOG_INFO linfo; - IO_CACHE cache_buf,*cur_log; - - /* The following variables are safe to read any time */ - - /* IO_CACHE of the info file - set only during init or end */ - IO_CACHE info_file; - - /* - When we restart slave thread we need to have access to the previously - created temporary tables. Modified only on init/end and by the SQL - thread, read only by SQL thread. - */ - TABLE *save_temporary_tables; - - /* - standard lock acquistion order to avoid deadlocks: - run_lock, data_lock, relay_log.LOCK_log, relay_log.LOCK_index - */ - pthread_mutex_t data_lock,run_lock; - - /* - start_cond is broadcast when SQL thread is started - stop_cond - when stopped - data_cond - when data protected by data_lock changes - */ - pthread_cond_t start_cond, stop_cond, data_cond; - - /* parent master info structure */ - struct st_master_info *mi; - - /* - Needed to deal properly with cur_log getting closed and re-opened with - a different log under our feet - */ - uint32 cur_log_old_open_count; - - /* - Let's call a group (of events) : - - a transaction - or - - an autocommiting query + its associated events (INSERT_ID, - TIMESTAMP...) - We need these rli coordinates : - - relay log name and position of the beginning of the group we currently are - executing. Needed to know where we have to restart when replication has - stopped in the middle of a group (which has been rolled back by the slave). - - relay log name and position just after the event we have just - executed. This event is part of the current group. - Formerly we only had the immediately above coordinates, plus a 'pending' - variable, but this dealt wrong with the case of a transaction starting on a - relay log and finishing (commiting) on another relay log. Case which can - happen when, for example, the relay log gets rotated because of - max_binlog_size. - */ - char group_relay_log_name[FN_REFLEN]; - ulonglong group_relay_log_pos; - char event_relay_log_name[FN_REFLEN]; - ulonglong event_relay_log_pos; - ulonglong future_event_relay_log_pos; - - /* - Original log name and position of the group we're currently executing - (whose coordinates are group_relay_log_name/pos in the relay log) - in the master's binlog. These concern the *group*, because in the master's - binlog the log_pos that comes with each event is the position of the - beginning of the group. - */ - char group_master_log_name[FN_REFLEN]; - volatile my_off_t group_master_log_pos; - - /* - Handling of the relay_log_space_limit optional constraint. - ignore_log_space_limit is used to resolve a deadlock between I/O and SQL - threads, the SQL thread sets it to unblock the I/O thread and make it - temporarily forget about the constraint. - */ - ulonglong log_space_limit,log_space_total; - bool ignore_log_space_limit; - - /* - When it commits, InnoDB internally stores the master log position it has - processed so far; the position to store is the one of the end of the - committing event (the COMMIT query event, or the event if in autocommit - mode). - */ -#if MYSQL_VERSION_ID < 40100 - ulonglong future_master_log_pos; -#else - ulonglong future_group_master_log_pos; -#endif - - time_t last_master_timestamp; - - /* - Needed for problems when slave stops and we want to restart it - skipping one or more events in the master log that have caused - errors, and have been manually applied by DBA already. - */ - volatile uint32 slave_skip_counter; - volatile ulong abort_pos_wait; /* Incremented on change master */ - volatile ulong slave_run_id; /* Incremented on slave start */ - pthread_mutex_t log_space_lock; - pthread_cond_t log_space_cond; - THD * sql_thd; - int last_slave_errno; -#ifndef DBUG_OFF - int events_till_abort; -#endif - char last_slave_error[MAX_SLAVE_ERRMSG]; - - /* if not set, the value of other members of the structure are undefined */ - bool inited; - volatile bool abort_slave; - volatile uint slave_running; - - /* - Condition and its parameters from START SLAVE UNTIL clause. - - UNTIL condition is tested with is_until_satisfied() method that is - called by exec_relay_log_event(). is_until_satisfied() caches the result - of the comparison of log names because log names don't change very often; - this cache is invalidated by parts of code which change log names with - notify_*_log_name_updated() methods. (They need to be called only if SQL - thread is running). - */ - - enum {UNTIL_NONE= 0, UNTIL_MASTER_POS, UNTIL_RELAY_POS} until_condition; - char until_log_name[FN_REFLEN]; - ulonglong until_log_pos; - /* extension extracted from log_name and converted to int */ - ulong until_log_name_extension; - /* - Cached result of comparison of until_log_name and current log name - -2 means unitialised, -1,0,1 are comarison results - */ - enum - { - UNTIL_LOG_NAMES_CMP_UNKNOWN= -2, UNTIL_LOG_NAMES_CMP_LESS= -1, - UNTIL_LOG_NAMES_CMP_EQUAL= 0, UNTIL_LOG_NAMES_CMP_GREATER= 1 - } until_log_names_cmp_result; - - char cached_charset[6]; - /* - trans_retries varies between 0 to slave_transaction_retries and counts how - many times the slave has retried the present transaction; gets reset to 0 - when the transaction finally succeeds. retried_trans is a cumulative - counter: how many times the slave has retried a transaction (any) since - slave started. - */ - ulong trans_retries, retried_trans; - - /* - If the end of the hot relay log is made of master's events ignored by the - slave I/O thread, these two keep track of the coords (in the master's - binlog) of the last of these events seen by the slave I/O thread. If not, - ign_master_log_name_end[0] == 0. - As they are like a Rotate event read/written from/to the relay log, they - are both protected by rli->relay_log.LOCK_log. - */ - char ign_master_log_name_end[FN_REFLEN]; - ulonglong ign_master_log_pos_end; - - st_relay_log_info(); - ~st_relay_log_info(); - - /* - Invalidate cached until_log_name and group_relay_log_name comparison - result. Should be called after any update of group_realy_log_name if - there chances that sql_thread is running. - */ - inline void notify_group_relay_log_name_update() - { - if (until_condition==UNTIL_RELAY_POS) - until_log_names_cmp_result= UNTIL_LOG_NAMES_CMP_UNKNOWN; - } - - /* - The same as previous but for group_master_log_name. - */ - inline void notify_group_master_log_name_update() - { - if (until_condition==UNTIL_MASTER_POS) - until_log_names_cmp_result= UNTIL_LOG_NAMES_CMP_UNKNOWN; - } - - inline void inc_event_relay_log_pos() - { - event_relay_log_pos= future_event_relay_log_pos; - } - - void inc_group_relay_log_pos(ulonglong log_pos, - bool skip_lock=0); - - int wait_for_pos(THD* thd, String* log_name, longlong log_pos, - longlong timeout); - void close_temporary_tables(); - - /* Check if UNTIL condition is satisfied. See slave.cc for more. */ - bool is_until_satisfied(); - inline ulonglong until_pos() - { - return ((until_condition == UNTIL_MASTER_POS) ? group_master_log_pos : - group_relay_log_pos); - } - /* - Last charset (6 bytes) seen by slave SQL thread is cached here; it helps - the thread save 3 get_charset() per Query_log_event if the charset is not - changing from event to event (common situation). - When the 6 bytes are equal to 0 is used to mean "cache is invalidated". - */ - void cached_charset_invalidate(); - bool cached_charset_compare(char *charset); -} RELAY_LOG_INFO; - - -Log_event* next_event(RELAY_LOG_INFO* rli); +static Log_event* next_event(RELAY_LOG_INFO* rli); /***************************************************************************** @@ -425,7 +171,7 @@ typedef struct st_master_info uint port; uint connect_retry; #ifndef DBUG_OFF - int events_till_abort; + int events_till_disconnect; #endif bool inited; volatile bool abort_slave; @@ -472,26 +218,11 @@ typedef struct st_master_info int queue_event(MASTER_INFO* mi,const char* buf,ulong event_len); -typedef struct st_table_rule_ent -{ - char* db; - char* tbl_name; - uint key_len; -} TABLE_RULE_ENT; - -#define TABLE_RULE_HASH_SIZE 16 -#define TABLE_RULE_ARR_SIZE 16 -#define MAX_SLAVE_ERRMSG 1024 - #define RPL_LOG_NAME (rli->group_master_log_name[0] ? rli->group_master_log_name :\ "FIRST") #define IO_RPL_LOG_NAME (mi->master_log_name[0] ? mi->master_log_name :\ "FIRST") -/* masks for start/stop operations on io and sql slave threads */ -#define SLAVE_IO 1 -#define SLAVE_SQL 2 - /* If the following is set, if first gives an error, second will be tried. Otherwise, if first fails, we fail. @@ -534,31 +265,14 @@ int mysql_table_dump(THD* thd, const char* db, int fetch_master_table(THD* thd, const char* db_name, const char* table_name, MASTER_INFO* mi, MYSQL* mysql, bool overwrite); -void table_rule_ent_hash_to_str(String* s, HASH* h); -void table_rule_ent_dynamic_array_to_str(String* s, DYNAMIC_ARRAY* a); bool show_master_info(THD* thd, MASTER_INFO* mi); bool show_binlog_info(THD* thd); -/* See if the query uses any tables that should not be replicated */ -bool tables_ok(THD* thd, TABLE_LIST* tables); - -/* - Check to see if the database is ok to operate on with respect to the - do and ignore lists - used in replication -*/ -int db_ok(const char* db, I_List<i_string> &do_list, - I_List<i_string> &ignore_list ); -int db_ok_with_wild_table(const char *db); - -int add_table_rule(HASH* h, const char* table_spec); -int add_wild_table_rule(DYNAMIC_ARRAY* a, const char* table_spec); -void init_table_rule_hash(HASH* h, bool* h_inited); -void init_table_rule_array(DYNAMIC_ARRAY* a, bool* a_inited); -const char *rewrite_db(const char* db, uint32 *new_db_len); const char *print_slave_db_safe(const char *db); int check_expected_error(THD* thd, RELAY_LOG_INFO* rli, int error_code); void skip_load_data_infile(NET* net); -void slave_print_error(RELAY_LOG_INFO* rli, int err_code, const char* msg, ...); +void slave_print_msg(enum loglevel level, RELAY_LOG_INFO* rli, + int err_code, const char* msg, ...); void end_slave(); /* clean up */ void init_master_info_with_options(MASTER_INFO* mi); @@ -588,11 +302,6 @@ pthread_handler_t handle_slave_sql(void *arg); extern bool volatile abort_loop; extern MASTER_INFO main_mi, *active_mi; /* active_mi for multi-master */ extern LIST master_list; -extern HASH replicate_do_table, replicate_ignore_table; -extern DYNAMIC_ARRAY replicate_wild_do_table, replicate_wild_ignore_table; -extern bool do_table_inited, ignore_table_inited, - wild_do_table_inited, wild_ignore_table_inited; -extern bool table_rules_on; extern my_bool replicate_same_server_id; extern int disconnect_slave_event_count, abort_slave_event_count ; @@ -607,12 +316,14 @@ extern my_bool master_ssl; extern my_string master_ssl_ca, master_ssl_capath, master_ssl_cert, master_ssl_cipher, master_ssl_key; -extern I_List<i_string> replicate_do_db, replicate_ignore_db; -extern I_List<i_string_pair> replicate_rewrite_db; extern I_List<THD> threads; -#endif -#else +#endif /* HAVE_REPLICATION */ + +/* masks for start/stop operations on io and sql slave threads */ #define SLAVE_IO 1 #define SLAVE_SQL 2 -#endif /* HAVE_REPLICATION */ + +#endif + + diff --git a/sql/sp.cc b/sql/sp.cc index a9b1a462d5f..f70e150419a 100644 --- a/sql/sp.cc +++ b/sql/sp.cc @@ -14,7 +14,6 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - #include "mysql_priv.h" #include "sp.h" #include "sp_head.h" @@ -466,10 +465,12 @@ static void sp_returns_type(THD *thd, String &result, sp_head *sp) { TABLE table; + TABLE_SHARE share; Field *field; - bzero(&table, sizeof(table)); + bzero((char*) &table, sizeof(table)); + bzero((char*) &share, sizeof(share)); table.in_use= thd; - table.s = &table.share_not_to_be_used; + table.s = &share; field= sp->create_result_field(0, 0, &table); field->sql_type(result); delete field; @@ -587,14 +588,14 @@ db_create_routine(THD *thd, int type, sp_head *sp) } ret= SP_OK; - if (table->file->write_row(table->record[0])) + if (table->file->ha_write_row(table->record[0])) ret= SP_WRITE_ROW_FAILED; else if (mysql_bin_log.is_open()) { thd->clear_error(); /* Such a statement can always go directly to binlog, no trans cache */ - Query_log_event qinfo(thd, thd->query, thd->query_length, 0, FALSE); - mysql_bin_log.write(&qinfo); + thd->binlog_query(THD::MYSQL_QUERY_TYPE, + thd->query, thd->query_length, FALSE, FALSE); } } @@ -620,7 +621,7 @@ db_drop_routine(THD *thd, int type, sp_name *name) DBUG_RETURN(SP_OPEN_TABLE_FAILED); if ((ret= db_find_routine_aux(thd, type, name, table)) == SP_OK) { - if (table->file->delete_row(table->record[0])) + if (table->file->ha_delete_row(table->record[0])) ret= SP_DELETE_ROW_FAILED; } close_thread_tables(thd); @@ -655,7 +656,7 @@ db_update_routine(THD *thd, int type, sp_name *name, st_sp_chistics *chistics) table->field[MYSQL_PROC_FIELD_COMMENT]->store(chistics->comment.str, chistics->comment.length, system_charset_info); - if ((table->file->update_row(table->record[1],table->record[0]))) + if ((table->file->ha_update_row(table->record[1],table->record[0]))) ret= SP_WRITE_ROW_FAILED; } close_thread_tables(thd); @@ -816,7 +817,7 @@ db_show_routine_status(THD *thd, int type, const char *wild) } } - table->file->ha_index_init(0); + table->file->ha_index_init(0, 1); if ((res= table->file->index_first(table->record[0]))) { res= (res == HA_ERR_END_OF_FILE) ? 0 : SP_INTERNAL_ERROR; @@ -866,7 +867,7 @@ sp_drop_db_routines(THD *thd, char *db) goto err; ret= SP_OK; - table->file->ha_index_init(0); + table->file->ha_index_init(0, 1); if (! table->file->index_read(table->record[0], key, keylen, HA_READ_KEY_EXACT)) { @@ -875,7 +876,7 @@ sp_drop_db_routines(THD *thd, char *db) do { - if (! table->file->delete_row(table->record[0])) + if (! table->file->ha_delete_row(table->record[0])) deleted= TRUE; /* We deleted something */ else { diff --git a/sql/sp_head.cc b/sql/sp_head.cc index 12f9260e7b1..1d171d929f3 100644 --- a/sql/sp_head.cc +++ b/sql/sp_head.cc @@ -172,11 +172,11 @@ sp_get_flags_for_command(LEX *lex) case SQLCOM_SHOW_ERRORS: case SQLCOM_SHOW_FIELDS: case SQLCOM_SHOW_GRANTS: - case SQLCOM_SHOW_INNODB_STATUS: + case SQLCOM_SHOW_ENGINE_STATUS: + case SQLCOM_SHOW_ENGINE_LOGS: + case SQLCOM_SHOW_ENGINE_MUTEX: case SQLCOM_SHOW_KEYS: - case SQLCOM_SHOW_LOGS: case SQLCOM_SHOW_MASTER_STAT: - case SQLCOM_SHOW_MUTEX_STATUS: case SQLCOM_SHOW_NEW_MASTER: case SQLCOM_SHOW_OPEN_TABLES: case SQLCOM_SHOW_PRIVILEGES: @@ -306,6 +306,9 @@ sp_eval_expr(THD *thd, Field *result_field, Item *expr_item) { DBUG_ENTER("sp_eval_expr"); + if (!expr_item) + DBUG_RETURN(TRUE); + if (!(expr_item= sp_prepare_func_item(thd, &expr_item))) DBUG_RETURN(TRUE); @@ -470,7 +473,7 @@ void sp_head::init_strings(THD *thd, LEX *lex, sp_name *name) { DBUG_ENTER("sp_head::init_strings"); - uchar *endp; /* Used to trim the end */ + const uchar *endp; /* Used to trim the end */ /* During parsing, we must use thd->mem_root */ MEM_ROOT *root= thd->mem_root; @@ -671,7 +674,8 @@ sp_head::create_result_field(uint field_max_length, const char *field_name, field_length= !m_return_field_def.length ? field_max_length : m_return_field_def.length; - field= ::make_field((char*) 0, /* field ptr */ + field= ::make_field(table->s, /* TABLE_SHARE ptr */ + (char*) 0, /* field ptr */ field_length, /* field [max] length */ (uchar*) "", /* null ptr */ 0, /* null bit */ @@ -681,8 +685,10 @@ sp_head::create_result_field(uint field_max_length, const char *field_name, m_return_field_def.geom_type, Field::NONE, /* unreg check */ m_return_field_def.interval, - field_name ? field_name : (const char *) m_name.str, - table); + field_name ? field_name : (const char *) m_name.str); + + if (field) + field->init(table); DBUG_RETURN(field); } @@ -696,6 +702,9 @@ int cmp_splocal_locations(Item_splocal * const *a, Item_splocal * const *b) /* StoredRoutinesBinlogging + This paragraph applies only to statement-based binlogging. Row-based + binlogging does not need anything special like this. + Top-down overview: 1. Statements @@ -1263,56 +1272,62 @@ sp_head::execute_function(THD *thd, Item **argp, uint argcount, thd->spcont= nctx; - binlog_save_options= thd->options; - need_binlog_call= mysql_bin_log.is_open() && (thd->options & OPTION_BIN_LOG); + /* + If row-based binlogging, we don't need to binlog the function's call, let + each substatement be binlogged its way. + */ + need_binlog_call= mysql_bin_log.is_open() && + (thd->options & OPTION_BIN_LOG) && !binlog_row_based; if (need_binlog_call) { reset_dynamic(&thd->user_var_events); mysql_bin_log.start_union_events(thd); + binlog_save_options= thd->options; + thd->options&= ~OPTION_BIN_LOG; } - - thd->options&= ~OPTION_BIN_LOG; + err_status= execute(thd); - thd->options= binlog_save_options; - - if (need_binlog_call) - mysql_bin_log.stop_union_events(thd); - if (need_binlog_call && thd->binlog_evt_union.unioned_events) + if (need_binlog_call) { - char buf[256]; - String bufstr(buf, sizeof(buf), &my_charset_bin); - bufstr.length(0); - bufstr.append(STRING_WITH_LEN("DO ")); - append_identifier(thd, &bufstr, m_name.str, m_name.length); - bufstr.append('('); - for (uint i=0; i < argcount; i++) - { - String str_value_holder; - String *str_value; - - if (i) - bufstr.append(','); - - str_value= sp_get_item_value(param_values[i], &str_value_holder); - - if (str_value) - bufstr.append(*str_value); - else - bufstr.append(STRING_WITH_LEN("NULL")); - } - bufstr.append(')'); - - Query_log_event qinfo(thd, bufstr.ptr(), bufstr.length(), - thd->binlog_evt_union.unioned_events_trans, FALSE); - if (mysql_bin_log.write(&qinfo) && - thd->binlog_evt_union.unioned_events_trans) + mysql_bin_log.stop_union_events(thd); + thd->options= binlog_save_options; + if (thd->binlog_evt_union.unioned_events) { - push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR, - "Invoked ROUTINE modified a transactional table but MySQL " - "failed to reflect this change in the binary log"); + char buf[256]; + String bufstr(buf, sizeof(buf), &my_charset_bin); + bufstr.length(0); + bufstr.append(STRING_WITH_LEN("DO ")); + append_identifier(thd, &bufstr, m_name.str, m_name.length); + bufstr.append('('); + for (uint i=0; i < argcount; i++) + { + String str_value_holder; + String *str_value; + + if (i) + bufstr.append(','); + + str_value= sp_get_item_value(param_values[i], &str_value_holder); + + if (str_value) + bufstr.append(*str_value); + else + bufstr.append(STRING_WITH_LEN("NULL")); + } + bufstr.append(')'); + + Query_log_event qinfo(thd, bufstr.ptr(), bufstr.length(), + thd->binlog_evt_union.unioned_events_trans, FALSE); + if (mysql_bin_log.write(&qinfo) && + thd->binlog_evt_union.unioned_events_trans) + { + push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR, + "Invoked ROUTINE modified a transactional table but MySQL " + "failed to reflect this change in the binary log"); + } + reset_dynamic(&thd->user_var_events); } - reset_dynamic(&thd->user_var_events); } if (m_type == TYPE_ENUM_FUNCTION && !err_status) diff --git a/sql/sp_head.h b/sql/sp_head.h index 2eebd35f6dc..ff3cb8eac3e 100644 --- a/sql/sp_head.h +++ b/sql/sp_head.h @@ -129,7 +129,7 @@ public: create_field m_return_field_def; /* This is used for FUNCTIONs only. */ - uchar *m_tmp_query; // Temporary pointer to sub query string + const uchar *m_tmp_query; // Temporary pointer to sub query string uint m_old_cmq; // Old CLIENT_MULTI_QUERIES value st_sp_chistics *m_chistics; ulong m_sql_mode; // For SHOW CREATE and execution @@ -178,7 +178,7 @@ public: */ HASH m_sroutines; // Pointers set during parsing - uchar *m_param_begin, *m_param_end, *m_body_begin; + const uchar *m_param_begin, *m_param_end, *m_body_begin; /* Security context for stored routine which should be run under diff --git a/sql/sql_acl.cc b/sql/sql_acl.cc index 061020e1952..b15320567c2 100644 --- a/sql/sql_acl.cc +++ b/sql/sql_acl.cc @@ -27,9 +27,6 @@ #include "mysql_priv.h" #include "hash_filo.h" -#ifdef HAVE_REPLICATION -#include "sql_repl.h" //for tables_ok() -#endif #include <m_ctype.h> #include <stdarg.h> #include "sp_head.h" @@ -37,6 +34,8 @@ #ifndef NO_EMBEDDED_ACCESS_CHECKS +#define FIRST_NON_YN_FIELD 26 + class acl_entry :public hash_filo_element { public: @@ -1436,7 +1435,7 @@ bool change_password(THD *thd, const char *host, const char *user, GRANT and REVOKE are applied the slave in/exclusion rules as they are some kind of updates to the mysql.% tables. */ - if (thd->slave_thread && table_rules_on) + if (thd->slave_thread && rpl_filter->is_on()) { /* The tables must be marked "updating" so that tables_ok() takes them into @@ -1444,7 +1443,7 @@ bool change_password(THD *thd, const char *host, const char *user, */ tables.updating= 1; /* Thanks to bzero, tables.next==0 */ - if (!tables_ok(thd, &tables)) + if (!(thd->spcont || rpl_filter->tables_ok(0, &tables))) DBUG_RETURN(0); } #endif @@ -1484,8 +1483,7 @@ bool change_password(THD *thd, const char *host, const char *user, acl_user->host.hostname ? acl_user->host.hostname : "", new_password)); thd->clear_error(); - Query_log_event qinfo(thd, buff, query_length, 0, FALSE); - mysql_bin_log.write(&qinfo); + thd->binlog_query(THD::MYSQL_QUERY_TYPE, buff, query_length, FALSE, FALSE); } end: close_thread_tables(thd); @@ -1659,7 +1657,7 @@ static bool update_user_table(THD *thd, TABLE *table, key_copy((byte *) user_key, table->record[0], table->key_info, table->key_info->key_length); - table->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS); + table->file->ha_retrieve_all_cols(); if (table->file->index_read_idx(table->record[0], 0, (byte *) user_key, table->key_info->key_length, HA_READ_KEY_EXACT)) @@ -1670,7 +1668,7 @@ static bool update_user_table(THD *thd, TABLE *table, } store_record(table,record[1]); table->field[2]->store(new_password, new_password_len, system_charset_info); - if ((error=table->file->update_row(table->record[1],table->record[0]))) + if ((error=table->file->ha_update_row(table->record[1],table->record[0]))) { table->file->print_error(error,MYF(0)); /* purecov: deadcode */ DBUG_RETURN(1); @@ -1749,7 +1747,7 @@ static int replace_user_table(THD *thd, TABLE *table, const LEX_USER &combo, key_copy(user_key, table->record[0], table->key_info, table->key_info->key_length); - table->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS); + table->file->ha_retrieve_all_cols(); if (table->file->index_read_idx(table->record[0], 0, user_key, table->key_info->key_length, HA_READ_KEY_EXACT)) @@ -1885,16 +1883,16 @@ static int replace_user_table(THD *thd, TABLE *table, const LEX_USER &combo, We should NEVER delete from the user table, as a uses can still use mysqld even if he doesn't have any privileges in the user table! */ - table->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS); + table->file->ha_retrieve_all_cols(); if (cmp_record(table,record[1]) && - (error=table->file->update_row(table->record[1],table->record[0]))) + (error=table->file->ha_update_row(table->record[1],table->record[0]))) { // This should never happen table->file->print_error(error,MYF(0)); /* purecov: deadcode */ error= -1; /* purecov: deadcode */ goto end; /* purecov: deadcode */ } } - else if ((error=table->file->write_row(table->record[0]))) // insert + else if ((error=table->file->ha_write_row(table->record[0]))) // insert { // This should never happen if (error && error != HA_ERR_FOUND_DUPP_KEY && error != HA_ERR_FOUND_DUPP_UNIQUE) /* purecov: inspected */ @@ -1967,7 +1965,7 @@ static int replace_db_table(TABLE *table, const char *db, key_copy(user_key, table->record[0], table->key_info, table->key_info->key_length); - table->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS); + table->file->ha_retrieve_all_cols(); if (table->file->index_read_idx(table->record[0],0, user_key, table->key_info->key_length, HA_READ_KEY_EXACT)) @@ -2003,17 +2001,18 @@ static int replace_db_table(TABLE *table, const char *db, /* update old existing row */ if (rights) { - table->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS); - if ((error=table->file->update_row(table->record[1],table->record[0]))) + table->file->ha_retrieve_all_cols(); + if ((error=table->file->ha_update_row(table->record[1], + table->record[0]))) goto table_error; /* purecov: deadcode */ } else /* must have been a revoke of all privileges */ { - if ((error = table->file->delete_row(table->record[1]))) + if ((error = table->file->ha_delete_row(table->record[1]))) goto table_error; /* purecov: deadcode */ } } - else if (rights && (error=table->file->write_row(table->record[0]))) + else if (rights && (error=table->file->ha_write_row(table->record[0]))) { if (error && error != HA_ERR_FOUND_DUPP_KEY) /* purecov: inspected */ goto table_error; /* purecov: deadcode */ @@ -2182,7 +2181,7 @@ GRANT_TABLE::GRANT_TABLE(TABLE *form, TABLE *col_privs) key_copy(key, col_privs->record[0], col_privs->key_info, key_prefix_len); col_privs->field[4]->store("",0, &my_charset_latin1); - col_privs->file->ha_index_init(0); + col_privs->file->ha_index_init(0, 1); if (col_privs->file->index_read(col_privs->record[0], (byte*) key, key_prefix_len, HA_READ_KEY_EXACT)) @@ -2236,10 +2235,10 @@ void free_grant_table(GRANT_TABLE *grant_table) /* Search after a matching grant. Prefer exact grants before not exact ones */ static GRANT_NAME *name_hash_search(HASH *name_hash, - const char *host,const char* ip, - const char *db, - const char *user, const char *tname, - bool exact) + const char *host,const char* ip, + const char *db, + const char *user, const char *tname, + bool exact) { char helping [NAME_LEN*2+USERNAME_LENGTH+3]; uint len; @@ -2327,7 +2326,7 @@ static int replace_column_table(GRANT_TABLE *g_t, List_iterator <LEX_COLUMN> iter(columns); class LEX_COLUMN *column; - table->file->ha_index_init(0); + table->file->ha_index_init(0, 1); while ((column= iter++)) { ulong privileges= column->rights; @@ -2342,7 +2341,7 @@ static int replace_column_table(GRANT_TABLE *g_t, key_copy(user_key, table->record[0], table->key_info, table->key_info->key_length); - table->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS); + table->file->ha_retrieve_all_cols(); if (table->file->index_read(table->record[0], user_key, table->key_info->key_length, HA_READ_KEY_EXACT)) @@ -2381,9 +2380,9 @@ static int replace_column_table(GRANT_TABLE *g_t, { GRANT_COLUMN *grant_column; if (privileges) - error=table->file->update_row(table->record[1],table->record[0]); + error=table->file->ha_update_row(table->record[1],table->record[0]); else - error=table->file->delete_row(table->record[1]); + error=table->file->ha_delete_row(table->record[1]); if (error) { table->file->print_error(error,MYF(0)); /* purecov: inspected */ @@ -2398,7 +2397,7 @@ static int replace_column_table(GRANT_TABLE *g_t, else // new grant { GRANT_COLUMN *grant_column; - if ((error=table->file->write_row(table->record[0]))) + if ((error=table->file->ha_write_row(table->record[0]))) { table->file->print_error(error,MYF(0)); /* purecov: inspected */ result= -1; /* purecov: inspected */ @@ -2420,7 +2419,7 @@ static int replace_column_table(GRANT_TABLE *g_t, key_copy(user_key, table->record[0], table->key_info, key_prefix_length); - table->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS); + table->file->ha_retrieve_all_cols(); if (table->file->index_read(table->record[0], user_key, key_prefix_length, HA_READ_KEY_EXACT)) @@ -2450,8 +2449,8 @@ static int replace_column_table(GRANT_TABLE *g_t, if (privileges) { int tmp_error; - if ((tmp_error=table->file->update_row(table->record[1], - table->record[0]))) + if ((tmp_error=table->file->ha_update_row(table->record[1], + table->record[0]))) { /* purecov: deadcode */ table->file->print_error(tmp_error,MYF(0)); /* purecov: deadcode */ result= -1; /* purecov: deadcode */ @@ -2463,7 +2462,7 @@ static int replace_column_table(GRANT_TABLE *g_t, else { int tmp_error; - if ((tmp_error = table->file->delete_row(table->record[1]))) + if ((tmp_error = table->file->ha_delete_row(table->record[1]))) { /* purecov: deadcode */ table->file->print_error(tmp_error,MYF(0)); /* purecov: deadcode */ result= -1; /* purecov: deadcode */ @@ -2519,7 +2518,7 @@ static int replace_table_table(THD *thd, GRANT_TABLE *grant_table, key_copy(user_key, table->record[0], table->key_info, table->key_info->key_length); - table->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS); + table->file->ha_retrieve_all_cols(); if (table->file->index_read_idx(table->record[0], 0, user_key, table->key_info->key_length, HA_READ_KEY_EXACT)) @@ -2571,15 +2570,15 @@ static int replace_table_table(THD *thd, GRANT_TABLE *grant_table, { if (store_table_rights || store_col_rights) { - if ((error=table->file->update_row(table->record[1],table->record[0]))) + if ((error=table->file->ha_update_row(table->record[1],table->record[0]))) goto table_error; /* purecov: deadcode */ } - else if ((error = table->file->delete_row(table->record[1]))) + else if ((error = table->file->ha_delete_row(table->record[1]))) goto table_error; /* purecov: deadcode */ } else { - error=table->file->write_row(table->record[0]); + error=table->file->ha_write_row(table->record[0]); if (error && error != HA_ERR_FOUND_DUPP_KEY) goto table_error; /* purecov: deadcode */ } @@ -2688,15 +2687,15 @@ static int replace_routine_table(THD *thd, GRANT_NAME *grant_name, { if (store_proc_rights) { - if ((error=table->file->update_row(table->record[1],table->record[0]))) + if ((error=table->file->ha_update_row(table->record[1],table->record[0]))) goto table_error; } - else if ((error= table->file->delete_row(table->record[1]))) + else if ((error= table->file->ha_delete_row(table->record[1]))) goto table_error; } else { - error=table->file->write_row(table->record[0]); + error=table->file->ha_write_row(table->record[0]); if (error && error != HA_ERR_FOUND_DUPP_KEY) goto table_error; } @@ -2797,9 +2796,10 @@ bool mysql_table_grant(THD *thd, TABLE_LIST *table_list, if (!(rights & CREATE_ACL)) { char buf[FN_REFLEN]; - sprintf(buf,"%s/%s/%s.frm",mysql_data_home, table_list->db, - table_list->table_name); - fn_format(buf,buf,"","",4+16+32); + build_table_filename(buf, sizeof(buf), table_list->db, + table_list->table_name, reg_ext); + fn_format(buf, buf, "", "", MY_UNPACK_FILENAME | MY_RESOLVE_SYMLINKS | + MY_RETURN_REAL_PATH | MY_APPEND_EXT); if (access(buf,F_OK)) { my_error(ER_NO_SUCH_TABLE, MYF(0), table_list->db, table_list->alias); @@ -2840,14 +2840,15 @@ bool mysql_table_grant(THD *thd, TABLE_LIST *table_list, GRANT and REVOKE are applied the slave in/exclusion rules as they are some kind of updates to the mysql.% tables. */ - if (thd->slave_thread && table_rules_on) + if (thd->slave_thread && rpl_filter->is_on()) + if (thd->slave_thread && rpl_filter->is_on()) { /* The tables must be marked "updating" so that tables_ok() takes them into account in tests. */ tables[0].updating= tables[1].updating= tables[2].updating= 1; - if (!tables_ok(thd, tables)) + if (!(thd->spcont || rpl_filter->tables_ok(0, tables))) DBUG_RETURN(FALSE); } #endif @@ -3047,14 +3048,14 @@ bool mysql_routine_grant(THD *thd, TABLE_LIST *table_list, bool is_proc, GRANT and REVOKE are applied the slave in/exclusion rules as they are some kind of updates to the mysql.% tables. */ - if (thd->slave_thread && table_rules_on) + if (thd->slave_thread && rpl_filter->is_on()) { /* The tables must be marked "updating" so that tables_ok() takes them into account in tests. */ tables[0].updating= tables[1].updating= 1; - if (!tables_ok(thd, tables)) + if (!(thd->spcont || rpl_filter->tables_ok(0, tables))) DBUG_RETURN(FALSE); } #endif @@ -3134,6 +3135,16 @@ bool mysql_routine_grant(THD *thd, TABLE_LIST *table_list, bool is_proc, } grant_option=TRUE; thd->mem_root= old_root; + /* + This flush is here only becuase there is code that writes rows to + system tables after executing a binlog_query(). + + TODO: Ensure that no writes are executed after a binlog_query() by + moving the writes to before calling binlog_query(). Then remove + this line (and add an assert inside send_ok() that checks that + everything is in a consistent state). + */ + thd->binlog_flush_pending_rows_event(true); rw_unlock(&LOCK_grant); if (!result && !no_error) send_ok(thd); @@ -3178,14 +3189,14 @@ bool mysql_grant(THD *thd, const char *db, List <LEX_USER> &list, GRANT and REVOKE are applied the slave in/exclusion rules as they are some kind of updates to the mysql.% tables. */ - if (thd->slave_thread && table_rules_on) + if (thd->slave_thread && rpl_filter->is_on()) { /* The tables must be marked "updating" so that tables_ok() takes them into account in tests. */ tables[0].updating= tables[1].updating= 1; - if (!tables_ok(thd, tables)) + if (!(thd->spcont || rpl_filter->tables_ok(0, tables))) DBUG_RETURN(FALSE); } #endif @@ -3329,8 +3340,8 @@ static my_bool grant_load(TABLE_LIST *tables) t_table = tables[0].table; c_table = tables[1].table; p_table= tables[2].table; - t_table->file->ha_index_init(0); - p_table->file->ha_index_init(0); + t_table->file->ha_index_init(0, 1); + p_table->file->ha_index_init(0, 1); if (!t_table->file->index_first(t_table->record[0])) { memex_ptr= &memex; @@ -3742,8 +3753,8 @@ bool check_column_grant_in_table_ref(THD *thd, TABLE_LIST * table_ref, /* Normal or temporary table. */ TABLE *table= table_ref->table; grant= &(table->grant); - db_name= table->s->db; - table_name= table->s->table_name; + db_name= table->s->db.str; + table_name= table->s->table_name.str; } if (grant->want_privilege) @@ -4601,7 +4612,7 @@ int open_grant_tables(THD *thd, TABLE_LIST *tables) GRANT and REVOKE are applied the slave in/exclusion rules as they are some kind of updates to the mysql.% tables. */ - if (thd->slave_thread && table_rules_on) + if (thd->slave_thread && rpl_filter->is_on()) { /* The tables must be marked "updating" so that tables_ok() takes them into @@ -4609,7 +4620,7 @@ int open_grant_tables(THD *thd, TABLE_LIST *tables) */ tables[0].updating=tables[1].updating=tables[2].updating= tables[3].updating=tables[4].updating=1; - if (!tables_ok(thd, tables)) + if (!(thd->spcont || rpl_filter->tables_ok(0, tables))) DBUG_RETURN(1); tables[0].updating=tables[1].updating=tables[2].updating= tables[3].updating=tables[4].updating=0;; @@ -4685,13 +4696,13 @@ static int modify_grant_table(TABLE *table, Field *host_field, system_charset_info); user_field->store(user_to->user.str, user_to->user.length, system_charset_info); - if ((error= table->file->update_row(table->record[1], table->record[0]))) + if ((error= table->file->ha_update_row(table->record[1], table->record[0]))) table->file->print_error(error, MYF(0)); } else { /* delete */ - if ((error=table->file->delete_row(table->record[0]))) + if ((error=table->file->ha_delete_row(table->record[0]))) table->file->print_error(error, MYF(0)); } @@ -4759,7 +4770,7 @@ static int handle_grant_table(TABLE_LIST *tables, uint table_no, bool drop, by the searched record, if it exists. */ DBUG_PRINT("info",("read table: '%s' search: '%s'@'%s'", - table->s->table_name, user_str, host_str)); + table->s->table_name.str, user_str, host_str)); host_field->store(host_str, user_from->host.length, system_charset_info); user_field->store(user_str, user_from->user.length, system_charset_info); @@ -4771,7 +4782,7 @@ static int handle_grant_table(TABLE_LIST *tables, uint table_no, bool drop, user_key, key_prefix_length, HA_READ_KEY_EXACT))) { - if (error != HA_ERR_KEY_NOT_FOUND) + if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) { table->file->print_error(error, MYF(0)); result= -1; @@ -4802,7 +4813,7 @@ static int handle_grant_table(TABLE_LIST *tables, uint table_no, bool drop, { #ifdef EXTRA_DEBUG DBUG_PRINT("info",("scan table: '%s' search: '%s'@'%s'", - table->s->table_name, user_str, host_str)); + table->s->table_name.str, user_str, host_str)); #endif while ((error= table->file->rnd_next(table->record[0])) != HA_ERR_END_OF_FILE) @@ -5704,7 +5715,7 @@ void update_schema_privilege(TABLE *table, char *buff, const char* db, table->field[i++]->store(column, col_length, cs); table->field[i++]->store(priv, priv_length, cs); table->field[i]->store(is_grantable, strlen(is_grantable), cs); - table->file->write_row(table->record[0]); + table->file->ha_write_row(table->record[0]); } diff --git a/sql/sql_acl.h b/sql/sql_acl.h index c8fadb73b0c..88704f5ff88 100644 --- a/sql/sql_acl.h +++ b/sql/sql_acl.h @@ -14,6 +14,8 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include "slave.h" // for tables_ok(), rpl_filter + #define SELECT_ACL (1L << 0) #define INSERT_ACL (1L << 1) #define UPDATE_ACL (1L << 2) @@ -50,7 +52,6 @@ */ #define EXTRA_ACL (1L << 29) #define NO_ACCESS (1L << 30) - #define DB_ACLS \ (UPDATE_ACL | SELECT_ACL | INSERT_ACL | DELETE_ACL | CREATE_ACL | DROP_ACL | \ GRANT_ACL | REFERENCES_ACL | INDEX_ACL | ALTER_ACL | CREATE_TMP_ACL | \ diff --git a/sql/sql_base.cc b/sql/sql_base.cc index be3b64a5ef6..74a5848fa0a 100644 --- a/sql/sql_base.cc +++ b/sql/sql_base.cc @@ -31,26 +31,35 @@ TABLE *unused_tables; /* Used by mysql_test */ HASH open_cache; /* Used by mysql_test */ - -static int open_unireg_entry(THD *thd, TABLE *entry, const char *db, - const char *name, const char *alias, - TABLE_LIST *table_list, MEM_ROOT *mem_root); +static HASH table_def_cache; +static TABLE_SHARE *oldest_unused_share, end_of_unused_share; +static pthread_mutex_t LOCK_table_share; +static bool table_def_inited= 0; + +static int open_unireg_entry(THD *thd, TABLE *entry, TABLE_LIST *table_list, + const char *alias, + char *cache_key, uint cache_key_length, + MEM_ROOT *mem_root); static void free_cache_entry(TABLE *entry); static void mysql_rm_tmp_tables(void); -static bool open_new_frm(THD *thd, const char *path, const char *alias, - const char *db, const char *table_name, +static bool open_new_frm(THD *thd, TABLE_SHARE *share, const char *alias, uint db_stat, uint prgflag, uint ha_open_flags, TABLE *outparam, TABLE_LIST *table_desc, MEM_ROOT *mem_root); +static void close_old_data_files(THD *thd, TABLE *table, bool abort_locks, + bool send_refresh); +static bool reopen_table(TABLE *table); + extern "C" byte *table_cache_key(const byte *record,uint *length, my_bool not_used __attribute__((unused))) { TABLE *entry=(TABLE*) record; - *length= entry->s->key_length; - return (byte*) entry->s->table_cache_key; + *length= entry->s->table_cache_key.length; + return (byte*) entry->s->table_cache_key.str; } + bool table_cache_init(void) { mysql_rm_tmp_tables(); @@ -62,21 +71,25 @@ bool table_cache_init(void) void table_cache_free(void) { DBUG_ENTER("table_cache_free"); - close_cached_tables((THD*) 0,0,(TABLE_LIST*) 0); - if (!open_cache.records) // Safety first - hash_free(&open_cache); + if (table_def_inited) + { + close_cached_tables((THD*) 0,0,(TABLE_LIST*) 0); + if (!open_cache.records) // Safety first + hash_free(&open_cache); + } DBUG_VOID_RETURN; } -uint cached_tables(void) +uint cached_open_tables(void) { return open_cache.records; } + #ifdef EXTRA_DEBUG static void check_unused(void) { - uint count=0,idx=0; + uint count= 0, open_files= 0, idx= 0; TABLE *cur_link,*start_link; if ((start_link=cur_link=unused_tables)) @@ -100,17 +113,544 @@ static void check_unused(void) TABLE *entry=(TABLE*) hash_element(&open_cache,idx); if (!entry->in_use) count--; + if (entry->file) + open_files++; } if (count != 0) { DBUG_PRINT("error",("Unused_links doesn't match open_cache: diff: %d", /* purecov: inspected */ count)); /* purecov: inspected */ } + +#ifdef NOT_SAFE_FOR_REPAIR + /* + check that open cache and table definition cache has same number of + aktive tables + */ + count= 0; + for (idx=0 ; idx < table_def_cache.records ; idx++) + { + TABLE_SHARE *entry= (TABLE_SHARE*) hash_element(&table_def_cache,idx); + count+= entry->ref_count; + } + if (count != open_files) + { + DBUG_PRINT("error", ("table_def ref_count: %u open_cache: %u", + count, open_files)); + DBUG_ASSERT(count == open_files); + } +#endif } #else #define check_unused() #endif + +/* + Create a table cache key + + SYNOPSIS + create_table_def_key() + thd Thread handler + key Create key here (must be of size MAX_DBKEY_LENGTH) + table_list Table definition + tmp_table Set if table is a tmp table + + IMPLEMENTATION + The table cache_key is created from: + db_name + \0 + table_name + \0 + + if the table is a tmp table, we add the following to make each tmp table + unique on the slave: + + 4 bytes for master thread id + 4 bytes pseudo thread id + + RETURN + Length of key +*/ + +uint create_table_def_key(THD *thd, char *key, TABLE_LIST *table_list, + bool tmp_table) +{ + uint key_length= (uint) (strmov(strmov(key, table_list->db)+1, + table_list->table_name)-key)+1; + if (tmp_table) + { + int4store(key + key_length, thd->server_id); + int4store(key + key_length + 4, thd->variables.pseudo_thread_id); + key_length+= TMP_TABLE_KEY_EXTRA; + } + return key_length; +} + + + +/***************************************************************************** + Functions to handle table definition cach (TABLE_SHARE) +*****************************************************************************/ + +extern "C" byte *table_def_key(const byte *record, uint *length, + my_bool not_used __attribute__((unused))) +{ + TABLE_SHARE *entry=(TABLE_SHARE*) record; + *length= entry->table_cache_key.length; + return (byte*) entry->table_cache_key.str; +} + + +static void table_def_free_entry(TABLE_SHARE *share) +{ + DBUG_ENTER("table_def_free_entry"); + if (share->prev) + { + /* remove from old_unused_share list */ + pthread_mutex_lock(&LOCK_table_share); + *share->prev= share->next; + share->next->prev= share->prev; + pthread_mutex_unlock(&LOCK_table_share); + } + free_table_share(share); + DBUG_VOID_RETURN; +} + + +bool table_def_init(void) +{ + table_def_inited= 1; + pthread_mutex_init(&LOCK_table_share, MY_MUTEX_INIT_FAST); + oldest_unused_share= &end_of_unused_share; + end_of_unused_share.prev= &oldest_unused_share; + + return hash_init(&table_def_cache, &my_charset_bin, table_def_size, + 0, 0, table_def_key, + (hash_free_key) table_def_free_entry, 0) != 0; +} + + +void table_def_free(void) +{ + DBUG_ENTER("table_def_free"); + if (table_def_inited) + { + table_def_inited= 0; + pthread_mutex_destroy(&LOCK_table_share); + hash_free(&table_def_cache); + } + DBUG_VOID_RETURN; +} + + +uint cached_table_definitions(void) +{ + return table_def_cache.records; +} + + +/* + Get TABLE_SHARE for a table. + + get_table_share() + thd Table share + table_list Table that should be opened + key Table cache key + key_length Length of key + db_flags Flags to open_table_def(): + OPEN_VIEW + error out: Error code from open_table_def() + + IMPLEMENTATION + Get a table definition from the table definition cache. + If it doesn't exist, create a new from the table definition file. + + NOTES + We must have wrlock on LOCK_open when we come here + (To be changed later) + + RETURN + 0 Error + # Share for table +*/ + +TABLE_SHARE *get_table_share(THD *thd, TABLE_LIST *table_list, char *key, + uint key_length, uint db_flags, int *error) +{ + TABLE_SHARE *share; + DBUG_ENTER("get_table_share"); + + *error= 0; + + /* Read table definition from cache */ + if ((share= (TABLE_SHARE*) hash_search(&table_def_cache,(byte*) key, + key_length))) + goto found; + + if (!(share= alloc_table_share(table_list, key, key_length))) + { +#ifdef NOT_YET + pthread_mutex_unlock(&LOCK_open); +#endif + DBUG_RETURN(0); + } + +#ifdef NOT_YET + // We need a write lock to be able to add a new entry + pthread_mutex_unlock(&LOCK_open); + pthread_mutex_lock(&LOCK_open); + /* Check that another thread didn't insert the same table in between */ + if ((old_share= hash_search(&table_def_cache, (byte*) key, key_length))) + { + (void) pthread_mutex_lock(&share->mutex); + free_table_share(share); + share= old_share; + goto found; + } +#endif + + /* + Lock mutex to be able to read table definition from file without + conflicts + */ + (void) pthread_mutex_lock(&share->mutex); + if (my_hash_insert(&table_def_cache, (byte*) share)) + { +#ifdef NOT_YET + pthread_mutex_unlock(&LOCK_open); + (void) pthread_mutex_unlock(&share->mutex); +#endif + free_table_share(share); + DBUG_RETURN(0); // return error + } +#ifdef NOT_YET + pthread_mutex_unlock(&LOCK_open); +#endif + if (open_table_def(thd, share, db_flags)) + { +#ifdef NOT_YET + /* + No such table or wrong table definition file + Lock first the table cache and then the mutex. + This will ensure that no other thread is using the share + structure. + */ + (void) pthread_mutex_unlock(&share->mutex); + (void) pthread_mutex_lock(&LOCK_open); + (void) pthread_mutex_lock(&share->mutex); +#endif + *error= share->error; + (void) hash_delete(&table_def_cache, (byte*) share); + DBUG_RETURN(0); + } + share->ref_count++; // Mark in use + DBUG_PRINT("exit", ("share: 0x%lx ref_count: %u", + (ulong) share, share->ref_count)); + (void) pthread_mutex_unlock(&share->mutex); + DBUG_RETURN(share); + +found: + /* + We found an existing table definition. Return it if we didn't get + an error when reading the table definition from file. + */ + + /* We must do a lock to ensure that the structure is initialized */ + (void) pthread_mutex_lock(&share->mutex); +#ifdef NOT_YET + pthread_mutex_unlock(&LOCK_open); +#endif + if (share->error) + { + /* Table definition contained an error */ + open_table_error(share, share->error, share->open_errno, share->errarg); + (void) pthread_mutex_unlock(&share->mutex); + DBUG_RETURN(0); + } + if (share->is_view && !(db_flags & OPEN_VIEW)) + { + open_table_error(share, 1, ENOENT, 0); + (void) pthread_mutex_unlock(&share->mutex); + DBUG_RETURN(0); + } + + if (!share->ref_count++ && share->prev) + { + /* + Share was not used before and it was in the old_unused_share list + Unlink share from this list + */ + DBUG_PRINT("info", ("Unlinking from not used list")); + pthread_mutex_lock(&LOCK_table_share); + *share->prev= share->next; + share->next->prev= share->prev; + share->next= 0; + share->prev= 0; + pthread_mutex_unlock(&LOCK_table_share); + } + (void) pthread_mutex_unlock(&share->mutex); + + /* Free cache if too big */ + while (table_def_cache.records > table_def_size && + oldest_unused_share->next) + { + pthread_mutex_lock(&oldest_unused_share->mutex); + VOID(hash_delete(&table_def_cache, (byte*) oldest_unused_share)); + } + + DBUG_PRINT("exit", ("share: 0x%lx ref_count: %u", + (ulong) share, share->ref_count)); + DBUG_RETURN(share); +} + + +/* + Get a table share. If it didn't exist, try creating it from engine + + For arguments and return values, see get_table_from_share() +*/ + +static TABLE_SHARE +*get_table_share_with_create(THD *thd, TABLE_LIST *table_list, + char *key, uint key_length, + uint db_flags, int *error) +{ + TABLE_SHARE *share; + int tmp; + DBUG_ENTER("get_table_share_with_create"); + + if ((share= get_table_share(thd, table_list, key, key_length, + db_flags, error)) || + thd->net.last_errno != ER_NO_SUCH_TABLE) + DBUG_RETURN(share); + + /* Table didn't exist. Check if some engine can provide it */ + if ((tmp= ha_create_table_from_engine(thd, table_list->db, + table_list->table_name)) < 0) + { + /* + No such table in any engine. + Hide "Table doesn't exist" errors if table belong to view + */ + if (table_list->belong_to_view) + { + TABLE_LIST *view= table_list->belong_to_view; + thd->clear_error(); + my_error(ER_VIEW_INVALID, MYF(0), + view->view_db.str, view->view_name.str); + } + DBUG_RETURN(0); + } + if (tmp) + { + /* Give right error message */ + thd->clear_error(); + DBUG_PRINT("error", ("Discovery of %s/%s failed", table_list->db, + table_list->table_name)); + my_printf_error(ER_UNKNOWN_ERROR, + "Failed to open '%-.64s', error while " + "unpacking from engine", + MYF(0), table_list->table_name); + DBUG_RETURN(0); + } + /* Table existed in engine. Let's open it */ + mysql_reset_errors(thd, 1); // Clear warnings + thd->clear_error(); // Clear error message + DBUG_RETURN(get_table_share(thd, table_list, key, key_length, + db_flags, error)); +} + + +/* + Mark that we are not using table share anymore. + + SYNOPSIS + release_table_share() + share Table share + release_type How the release should be done: + RELEASE_NORMAL + - Release without checking + RELEASE_WAIT_FOR_DROP + - Don't return until we get a signal that the + table is deleted or the thread is killed. + + IMPLEMENTATION + If ref_count goes to zero and (we have done a refresh or if we have + already too many open table shares) then delete the definition. + + If type == RELEASE_WAIT_FOR_DROP then don't return until we get a signal + that the table is deleted or the thread is killed. +*/ + +void release_table_share(TABLE_SHARE *share, enum release_type type) +{ + bool to_be_deleted= 0; + DBUG_ENTER("release_table_share"); + DBUG_PRINT("enter", + ("share: 0x%lx table: %s.%s ref_count: %u version: %lu", + (ulong) share, share->db.str, share->table_name.str, + share->ref_count, share->version)); + + safe_mutex_assert_owner(&LOCK_open); + + pthread_mutex_lock(&share->mutex); + if (!--share->ref_count) + { + if (share->version != refresh_version) + to_be_deleted=1; + else + { + /* Link share last in used_table_share list */ + DBUG_PRINT("info",("moving share to unused list")); + + DBUG_ASSERT(share->next == 0); + pthread_mutex_lock(&LOCK_table_share); + share->prev= end_of_unused_share.prev; + *end_of_unused_share.prev= share; + end_of_unused_share.prev= &share->next; + share->next= &end_of_unused_share; + pthread_mutex_unlock(&LOCK_table_share); + + to_be_deleted= (table_def_cache.records > table_def_size); + } + } + + if (to_be_deleted) + { + DBUG_PRINT("info", ("Deleting share")); + hash_delete(&table_def_cache, (byte*) share); + DBUG_VOID_RETURN; + } + pthread_mutex_unlock(&share->mutex); + DBUG_VOID_RETURN; + + +#ifdef NOT_YET + if (to_be_deleted) + { + /* + We must try again with new locks as we must get LOCK_open + before share->mutex + */ + pthread_mutex_unlock(&share->mutex); + pthread_mutex_lock(&LOCK_open); + pthread_mutex_lock(&share->mutex); + if (!share->ref_count) + { // No one is using this now + TABLE_SHARE *name_lock; + if (share->replace_with_name_lock && (name_lock=get_name_lock(share))) + { + /* + This code is execured when someone does FLUSH TABLES while on has + locked tables. + */ + (void) hash_search(&def_cache,(byte*) key,key_length); + hash_replace(&def_cache, def_cache.current_record,(byte*) name_lock); + } + else + { + /* Remove table definition */ + hash_delete(&def_cache,(byte*) share); + } + pthread_mutex_unlock(&LOCK_open); + free_table_share(share); + } + else + { + pthread_mutex_unlock(&LOCK_open); + if (type == RELEASE_WAIT_FOR_DROP) + wait_for_table(share, "Waiting for close"); + else + pthread_mutex_unlock(&share->mutex); + } + } + else if (type == RELEASE_WAIT_FOR_DROP) + wait_for_table(share, "Waiting for close"); + else + pthread_mutex_unlock(&share->mutex); +#endif +} + + +/* + Check if table definition exits in cache + + SYNOPSIS + get_cached_table_share() + db Database name + table_name Table name + + RETURN + 0 Not cached + # TABLE_SHARE for table +*/ + +TABLE_SHARE *get_cached_table_share(const char *db, const char *table_name) +{ + char key[NAME_LEN*2+2]; + TABLE_LIST table_list; + uint key_length; + safe_mutex_assert_owner(&LOCK_open); + + table_list.db= (char*) db; + table_list.table_name= (char*) table_name; + key_length= create_table_def_key((THD*) 0, key, &table_list, 0); + return (TABLE_SHARE*) hash_search(&table_def_cache,(byte*) key, key_length); +} + + +/* + Close file handle, but leave the table in the table cache + + SYNOPSIS + close_handle_and_leave_table_as_lock() + table Table handler + + NOTES + By leaving the table in the table cache, it disallows any other thread + to open the table + + thd->killed will be set if we run out of memory +*/ + + +static void close_handle_and_leave_table_as_lock(TABLE *table) +{ + TABLE_SHARE *share, *old_share= table->s; + MEM_ROOT *mem_root= &table->mem_root; + DBUG_ENTER("close_handle_and_leave_table_as_lock"); + + /* + Make a local copy of the table share and free the current one. + This has to be done to ensure that the table share is removed from + the table defintion cache as soon as the last instance is removed + */ + if ((share= (TABLE_SHARE*) alloc_root(mem_root, sizeof(*share)))) + { + bzero((char*) share, sizeof(*share)); + share->db.str= memdup_root(mem_root, old_share->db.str, + old_share->db.length+1); + share->db.length= old_share->db.length; + share->table_name.str= memdup_root(mem_root, + old_share->table_name.str, + old_share->table_name.length+1); + share->table_name.length= old_share->table_name.length; + share->table_cache_key.str= memdup_root(mem_root, + old_share->table_cache_key.str, + old_share->table_cache_key.length); + share->table_cache_key.length= old_share->table_cache_key.length; + share->tmp_table= INTERNAL_TMP_TABLE; // for intern_close_table() + } + + table->file->close(); + table->db_stat= 0; // Mark file closed + release_table_share(table->s, RELEASE_NORMAL); + table->s= share; + + DBUG_VOID_RETURN; +} + + + /* Create a list for all open tables matching SQL expression @@ -147,17 +687,14 @@ OPEN_TABLE_LIST *list_open_tables(THD *thd, const char *db, const char *wild) TABLE *entry=(TABLE*) hash_element(&open_cache,idx); TABLE_SHARE *share= entry->s; - DBUG_ASSERT(share->table_name != 0); - if ((!share->table_name)) // To be removed - continue; // Shouldn't happen - if (db && my_strcasecmp(system_charset_info, db, share->db)) + if (db && my_strcasecmp(system_charset_info, db, share->db.str)) continue; - if (wild && wild_compare(share->table_name,wild,0)) + if (wild && wild_compare(share->table_name.str, wild, 0)) continue; /* Check if user has SELECT privilege for any column in the table */ - table_list.db= (char*) share->db; - table_list.table_name= (char*) share->table_name; + table_list.db= share->db.str; + table_list.table_name= share->table_name.str; table_list.grant.privilege=0; if (check_table_access(thd,SELECT_ACL | EXTRA_ACL,&table_list,1)) @@ -165,8 +702,8 @@ OPEN_TABLE_LIST *list_open_tables(THD *thd, const char *db, const char *wild) /* need to check if we haven't already listed it */ for (table= open_list ; table ; table=table->next) { - if (!strcmp(table->table,share->table_name) && - !strcmp(table->db,entry->s->db)) + if (!strcmp(table->table, share->table_name.str) && + !strcmp(table->db, share->db.str)) { if (entry->in_use) table->in_use++; @@ -178,15 +715,15 @@ OPEN_TABLE_LIST *list_open_tables(THD *thd, const char *db, const char *wild) if (table) continue; if (!(*start_list = (OPEN_TABLE_LIST *) - sql_alloc(sizeof(**start_list)+share->key_length))) + sql_alloc(sizeof(**start_list)+share->table_cache_key.length))) { open_list=0; // Out of memory break; } strmov((*start_list)->table= strmov(((*start_list)->db= (char*) ((*start_list)+1)), - entry->s->db)+1, - entry->s->table_name); + share->db.str)+1, + share->table_name.str); (*start_list)->in_use= entry->in_use ? 1 : 0; (*start_list)->locked= entry->locked_by_name ? 1 : 0; start_list= &(*start_list)->next; @@ -203,10 +740,13 @@ OPEN_TABLE_LIST *list_open_tables(THD *thd, const char *db, const char *wild) void intern_close_table(TABLE *table) { // Free all structures + DBUG_ENTER("intern_close_table"); + free_io_cache(table); delete table->triggers; - if (table->file) - VOID(closefrm(table)); // close file + if (table->file) // Not true if name lock + VOID(closefrm(table, 1)); // close file + DBUG_VOID_RETURN; } /* @@ -223,7 +763,6 @@ void intern_close_table(TABLE *table) static void free_cache_entry(TABLE *table) { DBUG_ENTER("free_cache_entry"); - safe_mutex_assert_owner(&LOCK_open); intern_close_table(table); if (!table->in_use) @@ -273,6 +812,7 @@ bool close_cached_tables(THD *thd, bool if_wait_for_refresh, VOID(pthread_mutex_lock(&LOCK_open)); if (!tables) { + refresh_version++; // Force close of open tables while (unused_tables) { #ifdef EXTRA_DEBUG @@ -282,7 +822,12 @@ bool close_cached_tables(THD *thd, bool if_wait_for_refresh, VOID(hash_delete(&open_cache,(byte*) unused_tables)); #endif } - refresh_version++; // Force close of open tables + /* Free table shares */ + while (oldest_unused_share->next) + { + pthread_mutex_lock(&oldest_unused_share->mutex); + VOID(hash_delete(&table_def_cache, (byte*) oldest_unused_share)); + } } else { @@ -484,6 +1029,19 @@ void close_thread_tables(THD *thd, bool lock_in_use, bool skip_derived) /* Fallthrough */ } + /* + For RBR: before calling close_thread_tables(), storage engines + should autocommit. Hence if there is a a pending event, it belongs + to a non-transactional engine, which writes directly to the table, + and should therefore be flushed before unlocking and closing the + tables. The test above for locked tables will not be triggered + since RBR locks and unlocks tables on a per-event basis. + + TODO (WL#3023): Change the semantics so that RBR does not lock and + unlock tables on a per-event basis. + */ + thd->binlog_flush_pending_rows_event(true); + if (thd->lock) { mysql_unlock_tables(thd, thd->lock); @@ -504,11 +1062,10 @@ void close_thread_tables(THD *thd, bool lock_in_use, bool skip_derived) /* VOID(pthread_sigmask(SIG_SETMASK,&thd->block_signals,NULL)); */ if (!lock_in_use) VOID(pthread_mutex_lock(&LOCK_open)); - safe_mutex_assert_owner(&LOCK_open); DBUG_PRINT("info", ("thd->open_tables: %p", thd->open_tables)); - found_old_table= 0; + found_old_table= 0; while (thd->open_tables) found_old_table|=close_thread_table(thd, &thd->open_tables); thd->some_tables_deleted=0; @@ -566,7 +1123,7 @@ bool close_thread_table(THD *thd, TABLE **table_ptr) else { // Free memory and reset for next loop - table->file->reset(); + table->file->ha_reset(); } table->in_use=0; if (unused_tables) @@ -582,22 +1139,10 @@ bool close_thread_table(THD *thd, TABLE **table_ptr) DBUG_RETURN(found_old_table); } - /* Close and delete temporary tables */ - -void close_temporary(TABLE *table,bool delete_table) -{ - DBUG_ENTER("close_temporary"); - char path[FN_REFLEN]; - db_type table_type=table->s->db_type; - strmov(path,table->s->path); - free_io_cache(table); - closefrm(table); - my_free((char*) table,MYF(0)); - if (delete_table) - rm_temporary_table(table_type, path); - DBUG_VOID_RETURN; -} +/* + Close all temporary tables created by 'CREATE TEMPORARY TABLE' for thread +*/ void close_temporary_tables(THD *thd) { @@ -613,12 +1158,14 @@ void close_temporary_tables(THD *thd) query_buf_size= 50; // Enough for DROP ... TABLE IF EXISTS for (table=thd->temporary_tables ; table ; table=table->next) + { /* We are going to add 4 ` around the db/table names, so 1 does not look - enough; indeed it is enough, because table->key_length is greater (by 8, - because of server_id and thread_id) than db||table. + enough; indeed it is enough, because table->table_cache_key.length is + greater (by 8, because of server_id and thread_id) than db||table. */ - query_buf_size+= table->s->key_length+1; + query_buf_size+= table->s->table_cache_key.length+1; + } if ((query = alloc_root(thd->mem_root, query_buf_size))) // Better add "if exists", in case a RESET MASTER has been done @@ -629,23 +1176,24 @@ void close_temporary_tables(THD *thd) if (query) // we might be out of memory, but this is not fatal { // skip temporary tables not created directly by the user - if (table->s->table_name[0] != '#') + if (table->s->table_name.str[0] != '#') found_user_tables = 1; - end = strxmov(end,"`",table->s->db,"`.`", - table->s->table_name,"`,", NullS); + end= strxmov(end, "`",table->s->db.str, "`.`", + table->s->table_name.str, "`,", NullS); } next=table->next; - close_temporary(table, 1); + close_temporary(table, 1, 1); } - if (query && found_user_tables && mysql_bin_log.is_open()) + if (query && found_user_tables && mysql_bin_log.is_open() && + !binlog_row_based) // CREATE TEMP TABLE not binlogged if row-based { /* The -1 is to remove last ',' */ thd->clear_error(); Query_log_event qinfo(thd, query, (ulong)(end-query)-1, 0, FALSE); /* Imagine the thread had created a temp table, then was doing a SELECT, and - the SELECT was killed. Then it's not clever to mark the statement above as - "killed", because it's not really a statement updating data, and there + the SELECT was killed. Then it's not clever to mark the statement above + as "killed", because it's not really a statement updating data, and there are 99.99% chances it will succeed on slave. If a real update (one updating a persistent table) was killed on the master, then this real update will be logged with error_code=killed, @@ -831,43 +1379,93 @@ void update_non_unique_table_error(TABLE_LIST *update, } -TABLE **find_temporary_table(THD *thd, const char *db, const char *table_name) +TABLE *find_temporary_table(THD *thd, const char *db, const char *table_name) { char key[MAX_DBKEY_LENGTH]; - uint key_length= (uint) (strmov(strmov(key,db)+1,table_name)-key)+1; - TABLE *table,**prev; + uint key_length; + TABLE_LIST table_list; + TABLE *table; + + table_list.db= (char*) db; + table_list.table_name= (char*) table_name; + return find_temporary_table(thd, &table_list); +} - int4store(key+key_length,thd->server_id); - key_length += 4; - int4store(key+key_length,thd->variables.pseudo_thread_id); - key_length += 4; - prev= &thd->temporary_tables; - for (table=thd->temporary_tables ; table ; table=table->next) +TABLE *find_temporary_table(THD *thd, TABLE_LIST *table_list) +{ + char key[MAX_DBKEY_LENGTH]; + uint key_length; + TABLE *table; + + key_length= create_table_def_key(thd, key, table_list, 1); + for (table=thd->temporary_tables ; table ; table= table->next) { - if (table->s->key_length == key_length && - !memcmp(table->s->table_cache_key,key,key_length)) - return prev; - prev= &table->next; + if (table->s->table_cache_key.length == key_length && + !memcmp(table->s->table_cache_key.str, key, key_length)) + return table; } return 0; // Not a temporary table } -bool close_temporary_table(THD *thd, const char *db, const char *table_name) + +/* + Close temporary table and unlink from thd->temporary tables +*/ + +bool close_temporary_table(THD *thd, TABLE_LIST *table_list) { - TABLE *table,**prev; + TABLE *table; - if (!(prev=find_temporary_table(thd,db,table_name))) + if (!(table= find_temporary_table(thd, table_list))) return 1; - table= *prev; - *prev= table->next; - close_temporary(table, 1); - if (thd->slave_thread) - --slave_open_temp_tables; + close_temporary_table(thd, table, 1, 1); return 0; } /* + Close temporary table and unlink from thd->temporary tables +*/ + +void close_temporary_table(THD *thd, TABLE *table, + bool free_share, bool delete_table) +{ + TABLE **prev= table->open_prev; + if ((*table->open_prev= table->next)) + table->next->open_prev= prev; + if (thd->slave_thread) + slave_open_temp_tables--; + close_temporary(table, free_share, delete_table); +} + + +/* + Close and delete a temporary table + + NOTE + This dosn't unlink table from thd->temporary + If this is needed, use close_temporary_table() +*/ + +void close_temporary(TABLE *table, bool free_share, bool delete_table) +{ + handlerton *table_type= table->s->db_type; + DBUG_ENTER("close_temporary"); + + free_io_cache(table); + closefrm(table, 0); + if (delete_table) + rm_temporary_table(table_type, table->s->path.str); + if (free_share) + { + free_table_share(table->s); + my_free((char*) table,MYF(0)); + } + DBUG_VOID_RETURN; +} + + +/* Used by ALTER TABLE when the table is a temporary one. It changes something only if the ALTER contained a RENAME clause (otherwise, table_name is the old name). @@ -880,21 +1478,28 @@ bool rename_temporary_table(THD* thd, TABLE *table, const char *db, { char *key; TABLE_SHARE *share= table->s; - - if (!(key=(char*) alloc_root(&table->mem_root, - (uint) strlen(db)+ - (uint) strlen(table_name)+6+4))) - return 1; /* purecov: inspected */ - share->key_length= (uint) - (strmov((char*) (share->table_name= strmov(share->table_cache_key= key, - db)+1), - table_name) - share->table_cache_key)+1; - share->db= share->table_cache_key; - int4store(key+share->key_length, thd->server_id); - share->key_length+= 4; - int4store(key+share->key_length, thd->variables.pseudo_thread_id); - share->key_length+= 4; - return 0; + TABLE_LIST table_list; + uint db_length, table_length; + DBUG_ENTER("rename_temporary_table"); + + if (!(key=(char*) alloc_root(&share->mem_root, + (uint) (db_length= strlen(db))+ + (uint) (table_length= strlen(table_name))+6+4))) + DBUG_RETURN(1); /* purecov: inspected */ + + table_list.db= (char*) db; + table_list.table_name= (char*) table_name; + share->db.str= share->table_cache_key.str= key; + share->db.length= db_length; + share->table_cache_key.length= create_table_def_key(thd, key, + &table_list, 1); + /* + Here we use the fact that table_name is stored as the second component + in the 'key' (after db_name), where components are separated with \0 + */ + share->table_name.str= key+db_length+1; + share->table_name.length= table_length; + DBUG_RETURN(0); } @@ -924,16 +1529,16 @@ static void relink_unused(TABLE *table) TABLE *unlink_open_table(THD *thd, TABLE *list, TABLE *find) { char key[MAX_DBKEY_LENGTH]; - uint key_length= find->s->key_length; + uint key_length= find->s->table_cache_key.length; TABLE *start=list,**prev,*next; prev= &start; - memcpy(key, find->s->table_cache_key, key_length); + memcpy(key, find->s->table_cache_key.str, key_length); for (; list ; list=next) { next=list->next; - if (list->s->key_length == key_length && - !memcmp(list->s->table_cache_key, key, key_length)) + if (list->s->table_cache_key.length == key_length && + !memcmp(list->s->table_cache_key.str, key, key_length)) { if (thd->locked_tables) mysql_lock_remove(thd, thd->locked_tables,list); @@ -953,24 +1558,39 @@ TABLE *unlink_open_table(THD *thd, TABLE *list, TABLE *find) /* - When we call the following function we must have a lock on - LOCK_open ; This lock will be unlocked on return. + Wait for condition but allow the user to send a kill to mysqld + + SYNOPSIS + wait_for_condition() + thd Thread handler + mutex mutex that is currently hold that is associated with condition + Will be unlocked on return + cond Condition to wait for */ -void wait_for_refresh(THD *thd) +void wait_for_condition(THD *thd, pthread_mutex_t *mutex, pthread_cond_t *cond) { - safe_mutex_assert_owner(&LOCK_open); - /* Wait until the current table is up to date */ const char *proc_info; - thd->mysys_var->current_mutex= &LOCK_open; - thd->mysys_var->current_cond= &COND_refresh; + thd->mysys_var->current_mutex= mutex; + thd->mysys_var->current_cond= cond; proc_info=thd->proc_info; thd->proc_info="Waiting for table"; if (!thd->killed) - (void) pthread_cond_wait(&COND_refresh,&LOCK_open); + (void) pthread_cond_wait(cond, mutex); - pthread_mutex_unlock(&LOCK_open); // Must be unlocked first + /* + We must unlock mutex first to avoid deadlock becasue conditions are + sent to this thread by doing locks in the following order: + lock(mysys_var->mutex) + lock(mysys_var->current_mutex) + + One by effect of this that one can only use wait_for_condition with + condition variables that are guranteed to not disapper (freed) even if this + mutex is unlocked + */ + + pthread_mutex_unlock(mutex); pthread_mutex_lock(&thd->mysys_var->mutex); thd->mysys_var->current_mutex= 0; thd->mysys_var->current_cond= 0; @@ -1016,10 +1636,9 @@ bool reopen_name_locked_table(THD* thd, TABLE_LIST* table_list) orig_table= *table; key_length=(uint) (strmov(strmov(key,db)+1,table_name)-key)+1; - if (open_unireg_entry(thd, table, db, table_name, table_name, 0, - thd->mem_root) || - !(table->s->table_cache_key= memdup_root(&table->mem_root, (char*) key, - key_length))) + if (open_unireg_entry(thd, table, table_list, table_name, + table->s->table_cache_key.str, + table->s->table_cache_key.length, thd->mem_root)) { intern_close_table(table); /* @@ -1033,8 +1652,6 @@ bool reopen_name_locked_table(THD* thd, TABLE_LIST* table_list) } share= table->s; - share->db= share->table_cache_key; - share->key_length=key_length; share->version=0; share->flush_version=0; table->in_use = thd; @@ -1098,17 +1715,17 @@ TABLE *open_table(THD *thd, TABLE_LIST *table_list, MEM_ROOT *mem_root, if (thd->killed) DBUG_RETURN(0); - key_length= (uint) (strmov(strmov(key, table_list->db)+1, - table_list->table_name)-key)+1; - int4store(key + key_length, thd->server_id); - int4store(key + key_length + 4, thd->variables.pseudo_thread_id); + + key_length= (create_table_def_key(thd, key, table_list, 1) - + TMP_TABLE_KEY_EXTRA); if (!table_list->skip_temporary) { for (table= thd->temporary_tables; table ; table=table->next) { - if (table->s->key_length == key_length + TMP_TABLE_KEY_EXTRA && - !memcmp(table->s->table_cache_key, key, + if (table->s->table_cache_key.length == key_length + + TMP_TABLE_KEY_EXTRA && + !memcmp(table->s->table_cache_key.str, key, key_length + TMP_TABLE_KEY_EXTRA)) { if (table->query_id == thd->query_id || @@ -1135,8 +1752,8 @@ TABLE *open_table(THD *thd, TABLE_LIST *table_list, MEM_ROOT *mem_root, (int) TL_WRITE_ALLOW_WRITE); for (table=thd->open_tables; table ; table=table->next) { - if (table->s->key_length == key_length && - !memcmp(table->s->table_cache_key, key, key_length)) + if (table->s->table_cache_key.length == key_length && + !memcmp(table->s->table_cache_key.str, key, key_length)) { if (check_if_used && table->query_id && table->query_id != thd->query_id) @@ -1148,7 +1765,7 @@ TABLE *open_table(THD *thd, TABLE_LIST *table_list, MEM_ROOT *mem_root, is not already open by some calling stamement. */ my_error(ER_CANT_UPDATE_USED_TABLE_IN_SF_OR_TRG, MYF(0), - table->s->table_name); + table->s->table_name.str); DBUG_RETURN(0); } if (!my_strcasecmp(system_charset_info, table->alias, alias) && @@ -1202,10 +1819,9 @@ TABLE *open_table(THD *thd, TABLE_LIST *table_list, MEM_ROOT *mem_root, */ { char path[FN_REFLEN]; - db_type not_used; - strxnmov(path, FN_REFLEN, mysql_data_home, "/", table_list->db, "/", - table_list->table_name, reg_ext, NullS); - (void) unpack_filename(path, path); + enum legacy_db_type not_used; + build_table_filename(path, sizeof(path) - 1, + table_list->db, table_list->table_name, reg_ext); if (mysql_frm_type(thd, path, ¬_used) == FRMTYPE_VIEW) { /* @@ -1215,9 +1831,8 @@ TABLE *open_table(THD *thd, TABLE_LIST *table_list, MEM_ROOT *mem_root, TABLE tab; table= &tab; VOID(pthread_mutex_lock(&LOCK_open)); - if (!open_unireg_entry(thd, table, table_list->db, - table_list->table_name, - alias, table_list, mem_root)) + if (!open_unireg_entry(thd, table, table_list, alias, + key, key_length, mem_root)) { DBUG_ASSERT(table_list->view != 0); VOID(pthread_mutex_unlock(&LOCK_open)); @@ -1269,7 +1884,7 @@ TABLE *open_table(THD *thd, TABLE_LIST *table_list, MEM_ROOT *mem_root, */ close_old_data_files(thd,thd->open_tables,0,0); if (table->in_use != thd) - wait_for_refresh(thd); + wait_for_condition(thd, &LOCK_open, &COND_refresh); else { VOID(pthread_mutex_unlock(&LOCK_open)); @@ -1304,15 +1919,11 @@ TABLE *open_table(THD *thd, TABLE_LIST *table_list, MEM_ROOT *mem_root, VOID(pthread_mutex_unlock(&LOCK_open)); DBUG_RETURN(NULL); } - if (open_unireg_entry(thd, table, table_list->db, table_list->table_name, - alias, table_list, mem_root) || - (!table_list->view && - !(table->s->table_cache_key= memdup_root(&table->mem_root, - (char*) key, - key_length)))) + + if (open_unireg_entry(thd, table, table_list, alias, key, key_length, + mem_root)) { - table->next=table->prev=table; - free_cache_entry(table); + my_free((gptr)table, MYF(0)); VOID(pthread_mutex_unlock(&LOCK_open)); DBUG_RETURN(NULL); } @@ -1322,11 +1933,6 @@ TABLE *open_table(THD *thd, TABLE_LIST *table_list, MEM_ROOT *mem_root, VOID(pthread_mutex_unlock(&LOCK_open)); DBUG_RETURN(0); // VIEW } - share= table->s; - share->db= share->table_cache_key; - share->key_length= key_length; - share->version= refresh_version; - share->flush_version= flush_version; DBUG_PRINT("info", ("inserting table %p into the cache", table)); VOID(my_hash_insert(&open_cache,(byte*) table)); } @@ -1342,9 +1948,11 @@ TABLE *open_table(THD *thd, TABLE_LIST *table_list, MEM_ROOT *mem_root, table->reginfo.lock_type=TL_READ; /* Assume read */ reset: + DBUG_ASSERT(table->s->ref_count > 0 || table->s->tmp_table != NO_TMP_TABLE); + if (thd->lex->need_correct_ident()) table->alias_name_used= my_strcasecmp(table_alias_charset, - table->s->table_name, alias); + table->s->table_name.str, alias); /* Fix alias if table name changes */ if (strcmp(table->alias, alias)) { @@ -1379,53 +1987,64 @@ TABLE *find_locked_table(THD *thd, const char *db,const char *table_name) for (TABLE *table=thd->open_tables; table ; table=table->next) { - if (table->s->key_length == key_length && - !memcmp(table->s->table_cache_key,key,key_length)) + if (table->s->table_cache_key.length == key_length && + !memcmp(table->s->table_cache_key.str, key, key_length)) return table; } return(0); } -/**************************************************************************** -** Reopen an table because the definition has changed. The date file for the -** table is already closed. -** Returns 0 if ok. -** If table can't be reopened, the entry is unchanged. -****************************************************************************/ +/* + Reopen an table because the definition has changed. + + SYNOPSIS + reopen_table() + table Table object + + NOTES + The data file for the table is already closed and the share is released + The table has a 'dummy' share that mainly contains database and table name. + + RETURN + 0 ok + 1 error. The old table object is not changed. +*/ -bool reopen_table(TABLE *table,bool locked) +static bool reopen_table(TABLE *table) { TABLE tmp; - char *db= table->s->table_cache_key; - const char *table_name= table->s->table_name; bool error= 1; Field **field; uint key,part; + TABLE_LIST table_list; + THD *thd= table->in_use; DBUG_ENTER("reopen_table"); + DBUG_ASSERT(table->s->ref_count == 0); + DBUG_ASSERT(!table->sort.io_cache); + #ifdef EXTRA_DEBUG if (table->db_stat) sql_print_error("Table %s had a open data handler in reopen_table", table->alias); #endif - if (!locked) - VOID(pthread_mutex_lock(&LOCK_open)); - safe_mutex_assert_owner(&LOCK_open); - if (open_unireg_entry(table->in_use, &tmp, db, table_name, - table->alias, 0, table->in_use->mem_root)) - goto end; - free_io_cache(table); + table_list.db= table->s->db.str; + table_list.table_name= table->s->table_name.str; + table_list.table= table; + table_list.belong_to_view= 0; + table_list.next_local= 0; - if (!(tmp.s->table_cache_key= memdup_root(&tmp.mem_root,db, - table->s->key_length))) - { - delete tmp.triggers; - closefrm(&tmp); // End of memory + if (wait_for_locked_table_names(thd, &table_list)) + DBUG_RETURN(1); // Thread was killed + + if (open_unireg_entry(thd, &tmp, &table_list, + table->alias, + table->s->table_cache_key.str, + table->s->table_cache_key.length, + thd->mem_root)) goto end; - } - tmp.s->db= tmp.s->table_cache_key; /* This list copies variables set by open_table */ tmp.tablenr= table->tablenr; @@ -1437,12 +2056,11 @@ bool reopen_table(TABLE *table,bool locked) tmp.keys_in_use_for_query= tmp.s->keys_in_use; tmp.used_keys= tmp.s->keys_for_keyread; + tmp.s->table_map_id= table->s->table_map_id; + /* Get state */ - tmp.s->key_length= table->s->key_length; - tmp.in_use= table->in_use; + tmp.in_use= thd; tmp.reginfo.lock_type=table->reginfo.lock_type; - tmp.s->version= refresh_version; - tmp.s->tmp_table= table->s->tmp_table; tmp.grant= table->grant; /* Replace table in open list */ @@ -1451,11 +2069,10 @@ bool reopen_table(TABLE *table,bool locked) delete table->triggers; if (table->file) - VOID(closefrm(table)); // close file, free everything + VOID(closefrm(table, 1)); // close file, free everything *table= tmp; - table->s= &table->share_not_to_be_used; - table->file->change_table_ptr(table); + table->file->change_table_ptr(table, table->s); DBUG_ASSERT(table->alias != 0); for (field=table->field ; *field ; field++) @@ -1473,8 +2090,6 @@ bool reopen_table(TABLE *table,bool locked) error=0; end: - if (!locked) - VOID(pthread_mutex_unlock(&LOCK_open)); DBUG_RETURN(error); } @@ -1483,22 +2098,23 @@ bool reopen_table(TABLE *table,bool locked) Used with ALTER TABLE: Close all instanses of table when LOCK TABLES is in used; Close first all instances of table and then reopen them - */ +*/ bool close_data_tables(THD *thd,const char *db, const char *table_name) { TABLE *table; + DBUG_ENTER("close_data_tables"); + for (table=thd->open_tables; table ; table=table->next) { - if (!strcmp(table->s->table_name, table_name) && - !strcmp(table->s->db, db)) + if (!strcmp(table->s->table_name.str, table_name) && + !strcmp(table->s->db.str, db)) { mysql_lock_remove(thd, thd->locked_tables,table); - table->file->close(); - table->db_stat=0; + close_handle_and_leave_table_as_lock(table); } } - return 0; // For the future + DBUG_RETURN(0); // For the future } @@ -1509,20 +2125,21 @@ bool close_data_tables(THD *thd,const char *db, const char *table_name) bool reopen_tables(THD *thd,bool get_locks,bool in_refresh) { + TABLE *table,*next,**prev; + TABLE **tables,**tables_ptr; // For locks + bool error=0, not_used; DBUG_ENTER("reopen_tables"); - safe_mutex_assert_owner(&LOCK_open); if (!thd->open_tables) DBUG_RETURN(0); - TABLE *table,*next,**prev; - TABLE **tables,**tables_ptr; // For locks - bool error=0, not_used; + safe_mutex_assert_owner(&LOCK_open); if (get_locks) { /* The ptr is checked later */ uint opens=0; - for (table=thd->open_tables; table ; table=table->next) opens++; + for (table= thd->open_tables; table ; table=table->next) + opens++; tables= (TABLE**) my_alloca(sizeof(TABLE*)*opens); } else @@ -1534,7 +2151,7 @@ bool reopen_tables(THD *thd,bool get_locks,bool in_refresh) { uint db_stat=table->db_stat; next=table->next; - if (!tables || (!db_stat && reopen_table(table,1))) + if (!tables || (!db_stat && reopen_table(table))) { my_error(ER_CANT_REOPEN_TABLE, MYF(0), table->alias); VOID(hash_delete(&open_cache,(byte*) table)); @@ -1575,6 +2192,7 @@ bool reopen_tables(THD *thd,bool get_locks,bool in_refresh) DBUG_RETURN(error); } + /* Close handlers for tables in list, but leave the TABLE structure intact so that we can re-open these quickly @@ -1584,15 +2202,14 @@ bool reopen_tables(THD *thd,bool get_locks,bool in_refresh) void close_old_data_files(THD *thd, TABLE *table, bool abort_locks, bool send_refresh) { + bool found= send_refresh; DBUG_ENTER("close_old_data_files"); - bool found=send_refresh; + for (; table ; table=table->next) { if (table->s->version != refresh_version) { found=1; - if (!abort_locks) // If not from flush tables - table->s->version= refresh_version; // Let other threads use table if (table->db_stat) { if (abort_locks) @@ -1601,8 +2218,7 @@ void close_old_data_files(THD *thd, TABLE *table, bool abort_locks, mysql_lock_remove(thd, thd->locked_tables,table); table->locked_by_flush=1; // Will be reopened with locks } - table->file->close(); - table->db_stat=0; + close_handle_and_leave_table_as_lock(table); } } } @@ -1620,10 +2236,13 @@ void close_old_data_files(THD *thd, TABLE *table, bool abort_locks, bool table_is_used(TABLE *table, bool wait_for_name_lock) { + DBUG_ENTER("table_is_used"); do { - char *key= table->s->table_cache_key; - uint key_length= table->s->key_length; + char *key= table->s->table_cache_key.str; + uint key_length= table->s->table_cache_key.length; + + DBUG_PRINT("loop", ("table_name: %s", table->alias)); HASH_SEARCH_STATE state; for (TABLE *search= (TABLE*) hash_first(&open_cache, (byte*) key, key_length, &state); @@ -1631,13 +2250,28 @@ bool table_is_used(TABLE *table, bool wait_for_name_lock) search= (TABLE*) hash_next(&open_cache, (byte*) key, key_length, &state)) { - if (search->locked_by_flush || - search->locked_by_name && wait_for_name_lock || - search->db_stat && search->s->version < refresh_version) - return 1; // Table is used + DBUG_PRINT("info", ("share: 0x%lx locked_by_flush: %d " + "locked_by_name: %d db_stat: %u version: %u", + (ulong) search->s, + search->locked_by_flush, search->locked_by_name, + search->db_stat, + search->s->version)); + if (search->in_use == table->in_use) + continue; // Name locked by this thread + /* + We can't use the table under any of the following conditions: + - There is an name lock on it (Table is to be deleted or altered) + - If we are in flush table and we didn't execute the flush + - If the table engine is open and it's an old version + (We must wait until all engines are shut down to use the table) + */ + if (search->locked_by_name && wait_for_name_lock || + search->locked_by_flush || + (search->db_stat && search->s->version < refresh_version)) + return 1; } } while ((table=table->next)); - return 0; + DBUG_RETURN(0); } @@ -1684,8 +2318,8 @@ bool drop_locked_tables(THD *thd,const char *db, const char *table_name) for (table= thd->open_tables; table ; table=next) { next=table->next; - if (!strcmp(table->s->table_name, table_name) && - !strcmp(table->s->db, db)) + if (!strcmp(table->s->table_name.str, table_name) && + !strcmp(table->s->db.str, db)) { mysql_lock_remove(thd, thd->locked_tables,table); VOID(hash_delete(&open_cache,(byte*) table)); @@ -1720,8 +2354,8 @@ void abort_locked_tables(THD *thd,const char *db, const char *table_name) TABLE *table; for (table= thd->open_tables; table ; table= table->next) { - if (!strcmp(table->s->table_name,table_name) && - !strcmp(table->s->db, db)) + if (!strcmp(table->s->table_name.str, table_name) && + !strcmp(table->s->db.str, db)) { mysql_lock_abort(thd,table); break; @@ -1731,141 +2365,216 @@ void abort_locked_tables(THD *thd,const char *db, const char *table_name) /* + Function to assign a new table map id to a table. + + PARAMETERS + + table - Pointer to table structure + + PRE-CONDITION(S) + + table is non-NULL + The LOCK_open mutex is locked + + POST-CONDITION(S) + + table->s->table_map_id is given a value that with a high certainty + is not used by any other table. + + table->s->table_map_id is not ULONG_MAX. + */ +static void assign_new_table_id(TABLE *table) +{ + static ulong last_table_id= ULONG_MAX; + + DBUG_ENTER("assign_new_table_id(TABLE*)"); + + /* Preconditions */ + DBUG_ASSERT(table != NULL); + safe_mutex_assert_owner(&LOCK_open); + + ulong tid= ++last_table_id; /* get next id */ + /* There is one reserved number that cannot be used. */ + if (unlikely(tid == ULONG_MAX)) + tid= ++last_table_id; + table->s->table_map_id= tid; + DBUG_PRINT("info", ("table_id=%lu", tid)); + + /* Post conditions */ + DBUG_ASSERT(table->s->table_map_id != ULONG_MAX); + + DBUG_VOID_RETURN; +} + +/* Load a table definition from file and open unireg table SYNOPSIS open_unireg_entry() thd Thread handle entry Store open table definition here - db Database name - name Table name + table_list TABLE_LIST with db, table_name & belong_to_view alias Alias name - table_desc TABLE_LIST descriptor (used with views) + cache_key Key for share_cache + cache_key_length length of cache_key mem_root temporary mem_root for parsing NOTES Extra argument for open is taken from thd->open_options + One must have a lock on LOCK_open when calling this function RETURN 0 ok # Error */ -static int open_unireg_entry(THD *thd, TABLE *entry, const char *db, - const char *name, const char *alias, - TABLE_LIST *table_desc, MEM_ROOT *mem_root) + +static int open_unireg_entry(THD *thd, TABLE *entry, TABLE_LIST *table_list, + const char *alias, + char *cache_key, uint cache_key_length, + MEM_ROOT *mem_root) { - char path[FN_REFLEN]; int error; + TABLE_SHARE *share; uint discover_retry_count= 0; DBUG_ENTER("open_unireg_entry"); - strxmov(path, mysql_data_home, "/", db, "/", name, NullS); - while ((error= openfrm(thd, path, alias, - (uint) (HA_OPEN_KEYFILE | HA_OPEN_RNDFILE | - HA_GET_INDEX | HA_TRY_READ_ONLY | - NO_ERR_ON_NEW_FRM), - READ_KEYINFO | COMPUTE_TYPES | EXTRA_RECORD, - thd->open_options, entry)) && - (error != 5 || - (fn_format(path, path, 0, reg_ext, MY_UNPACK_FILENAME), - open_new_frm(thd, path, alias, db, name, - (uint) (HA_OPEN_KEYFILE | HA_OPEN_RNDFILE | - HA_GET_INDEX | HA_TRY_READ_ONLY), - READ_KEYINFO | COMPUTE_TYPES | EXTRA_RECORD, - thd->open_options, entry, table_desc, mem_root)))) + safe_mutex_assert_owner(&LOCK_open); + +retry: + if (!(share= get_table_share_with_create(thd, table_list, cache_key, + cache_key_length, + OPEN_VIEW, &error))) + DBUG_RETURN(1); + if (share->is_view) { - if (!entry->s || !entry->s->crashed) + /* Open view */ + error= (int) open_new_frm(thd, share, alias, + (uint) (HA_OPEN_KEYFILE | HA_OPEN_RNDFILE | + HA_GET_INDEX | HA_TRY_READ_ONLY), + READ_KEYINFO | COMPUTE_TYPES | EXTRA_RECORD, + thd->open_options, entry, table_list, + mem_root); + if (error) + goto err; + /* TODO: Don't free this */ + release_table_share(share, RELEASE_NORMAL); + DBUG_RETURN(0); + } + + while ((error= open_table_from_share(thd, share, alias, + (uint) (HA_OPEN_KEYFILE | + HA_OPEN_RNDFILE | + HA_GET_INDEX | + HA_TRY_READ_ONLY), + (READ_KEYINFO | COMPUTE_TYPES | + EXTRA_RECORD), + thd->open_options, entry))) + { + if (error == 7) // Table def changed { + share->version= 0; // Mark share as old + if (discover_retry_count++) // Retry once + goto err; + /* - Frm file could not be found on disk - Since it does not exist, no one can be using it - LOCK_open has been locked to protect from someone else - trying to discover the table at the same time. + TODO: + Here we should wait until all threads has released the table. + For now we do one retry. This may cause a deadlock if there + is other threads waiting for other tables used by this thread. + + Proper fix would be to if the second retry failed: + - Mark that table def changed + - Return from open table + - Close all tables used by this thread + - Start waiting that the share is released + - Retry by opening all tables again */ - if (discover_retry_count++ != 0) + if (ha_create_table_from_engine(thd, table_list->db, + table_list->table_name)) goto err; - if (ha_create_table_from_engine(thd, db, name) > 0) - { - /* Give right error message */ - thd->clear_error(); - DBUG_PRINT("error", ("Discovery of %s/%s failed", db, name)); - my_printf_error(ER_UNKNOWN_ERROR, - "Failed to open '%-.64s', error while " - "unpacking from engine", - MYF(0), name); - + /* + TO BE FIXED + To avoid deadlock, only wait for release if no one else is + using the share. + */ + if (share->ref_count != 1) goto err; - } - - mysql_reset_errors(thd, 1); // Clear warnings - thd->clear_error(); // Clear error message - continue; - } - - // Code below is for repairing a crashed file - TABLE_LIST table_list; - bzero((char*) &table_list, sizeof(table_list)); // just for safe - table_list.db=(char*) db; - table_list.table_name=(char*) name; - - safe_mutex_assert_owner(&LOCK_open); - - if ((error=lock_table_name(thd,&table_list))) - { - if (error < 0) - { - goto err; - } - if (wait_for_locked_table_names(thd,&table_list)) + /* Free share and wait until it's released by all threads */ + release_table_share(share, RELEASE_WAIT_FOR_DROP); + if (!thd->killed) { - unlock_table_name(thd,&table_list); - goto err; + mysql_reset_errors(thd, 1); // Clear warnings + thd->clear_error(); // Clear error message + goto retry; } + DBUG_RETURN(1); } - pthread_mutex_unlock(&LOCK_open); - thd->clear_error(); // Clear error message - error= 0; - if (openfrm(thd, path, alias, - (uint) (HA_OPEN_KEYFILE | HA_OPEN_RNDFILE | HA_GET_INDEX | - HA_TRY_READ_ONLY), - READ_KEYINFO | COMPUTE_TYPES | EXTRA_RECORD, - ha_open_options | HA_OPEN_FOR_REPAIR, - entry) || ! entry->file || - (entry->file->is_crashed() && entry->file->check_and_repair(thd))) - { - /* Give right error message */ - thd->clear_error(); - my_error(ER_NOT_KEYFILE, MYF(0), name, my_errno); - sql_print_error("Couldn't repair table: %s.%s",db,name); - if (entry->file) - closefrm(entry); - error=1; - } - else - thd->clear_error(); // Clear error message - pthread_mutex_lock(&LOCK_open); - unlock_table_name(thd,&table_list); - - if (error) + if (!entry->s || !entry->s->crashed) goto err; - break; - } - if (error == 5) - DBUG_RETURN(0); // we have just opened VIEW + // Code below is for repairing a crashed file + if ((error= lock_table_name(thd, table_list))) + { + if (error < 0) + goto err; + if (wait_for_locked_table_names(thd, table_list)) + { + unlock_table_name(thd, table_list); + goto err; + } + } + pthread_mutex_unlock(&LOCK_open); + thd->clear_error(); // Clear error message + error= 0; + if (open_table_from_share(thd, share, alias, + (uint) (HA_OPEN_KEYFILE | HA_OPEN_RNDFILE | + HA_GET_INDEX | + HA_TRY_READ_ONLY), + READ_KEYINFO | COMPUTE_TYPES | EXTRA_RECORD, + ha_open_options | HA_OPEN_FOR_REPAIR, + entry) || ! entry->file || + (entry->file->is_crashed() && entry->file->check_and_repair(thd))) + { + /* Give right error message */ + thd->clear_error(); + my_error(ER_NOT_KEYFILE, MYF(0), share->table_name.str, my_errno); + sql_print_error("Couldn't repair table: %s.%s", share->db.str, + share->table_name.str); + if (entry->file) + closefrm(entry, 0); + error=1; + } + else + thd->clear_error(); // Clear error message + pthread_mutex_lock(&LOCK_open); + unlock_table_name(thd, table_list); + + if (error) + goto err; + break; + } /* - We can't mark all tables in 'mysql' database as system since we don't - allow to lock such tables for writing with any other tables (even with - other system tables) and some privilege tables need this. - */ - if (!my_strcasecmp(system_charset_info, db, "mysql") && - !my_strcasecmp(system_charset_info, name, "proc")) - entry->s->system_table= 1; - - if (Table_triggers_list::check_n_load(thd, db, name, entry, 0)) + We assign a new table id under the protection of the LOCK_open + mutex. We assign a new table id here instead of inside openfrm() + since that function can be used without acquiring any lock (e.g., + inside ha_create_table()). Insted of creatint a new mutex and + using it for the sole purpose of serializing accesses to a static + variable, we assign the table id here. + + CAVEAT. This means that the table cannot be used for + binlogging/replication purposes, unless open_table() has been called + directly or indirectly. + */ + assign_new_table_id(entry); + + if (Table_triggers_list::check_n_load(thd, share->db.str, + share->table_name.str, entry, 0)) + { + closefrm(entry, 0); goto err; + } /* If we are here, there was no fatal error (but error may be still @@ -1877,13 +2586,14 @@ static int open_unireg_entry(THD *thd, TABLE *entry, const char *db, if (mysql_bin_log.is_open()) { char *query, *end; - uint query_buf_size= 20 + 2*NAME_LEN + 1; - if ((query= (char*)my_malloc(query_buf_size,MYF(MY_WME)))) + uint query_buf_size= 20 + share->db.length + share->table_name.length +1; + if ((query= (char*) my_malloc(query_buf_size,MYF(MY_WME)))) { + /* this DELETE FROM is needed even with row-based binlogging */ end = strxmov(strmov(query, "DELETE FROM `"), - db,"`.`",name,"`", NullS); - Query_log_event qinfo(thd, query, (ulong)(end-query), 0, FALSE); - mysql_bin_log.write(&qinfo); + share->db.str,"`.`",share->table_name.str,"`", NullS); + thd->binlog_query(THD::STMT_QUERY_TYPE, + query, (ulong)(end-query), FALSE, FALSE); my_free(query, MYF(0)); } else @@ -1893,25 +2603,19 @@ static int open_unireg_entry(THD *thd, TABLE *entry, const char *db, DBA on top of warning the client (which will automatically be done because of MYF(MY_WME) in my_malloc() above). */ - sql_print_error("When opening HEAP table, could not allocate \ -memory to write 'DELETE FROM `%s`.`%s`' to the binary log",db,name); + sql_print_error("When opening HEAP table, could not allocate memory " + "to write 'DELETE FROM `%s`.`%s`' to the binary log", + table_list->db, table_list->table_name); delete entry->triggers; - if (entry->file) - closefrm(entry); + closefrm(entry, 0); goto err; } } } DBUG_RETURN(0); + err: - /* Hide "Table doesn't exist" errors if table belong to view */ - if (thd->net.last_errno == ER_NO_SUCH_TABLE && - table_desc && table_desc->belong_to_view) - { - TABLE_LIST *view= table_desc->belong_to_view; - thd->clear_error(); - my_error(ER_VIEW_INVALID, MYF(0), view->view_db.str, view->view_name.str); - } + release_table_share(share, RELEASE_NORMAL); DBUG_RETURN(1); } @@ -2081,7 +2785,7 @@ int open_tables(THD *thd, TABLE_LIST **start, uint *counter, uint flags) for (TABLE_LIST *tmp= *start; tmp; tmp= tmp->next_global) { /* Close normal (not temporary) changed tables */ - if (tmp->table && ! tmp->table->s->tmp_table) + if (tmp->table && ! tmp->table->s->tmp_table != NO_TMP_TABLE) { if (tmp->table->s->version != refresh_version || ! tmp->table->db_stat) @@ -2606,8 +3310,22 @@ void close_tables_for_reopen(THD *thd, TABLE_LIST *tables) /* Open a single table without table caching and don't set it in open_list - Used by alter_table to open a temporary table and when creating - a temporary table with CREATE TEMPORARY ... + + SYNPOSIS + open_temporary_table() + thd Thread object + path Path (without .frm) + db database + table_name Table name + link_in_list 1 if table should be linked into thd->temporary_tables + + NOTES: + Used by alter_table to open a temporary table and when creating + a temporary table with CREATE TEMPORARY ... + + RETURN + 0 Error + # TABLE object */ TABLE *open_temporary_table(THD *thd, const char *path, const char *db, @@ -2615,51 +3333,53 @@ TABLE *open_temporary_table(THD *thd, const char *path, const char *db, { TABLE *tmp_table; TABLE_SHARE *share; + char cache_key[MAX_DBKEY_LENGTH], *saved_cache_key, *tmp_path; + uint key_length; + TABLE_LIST table_list; DBUG_ENTER("open_temporary_table"); - /* - The extra size in my_malloc() is for table_cache_key - 4 bytes for master thread id if we are in the slave - 1 byte to terminate db - 1 byte to terminate table_name - total of 6 extra bytes in my_malloc in addition to table/db stuff - */ - if (!(tmp_table=(TABLE*) my_malloc(sizeof(*tmp_table)+(uint) strlen(db)+ - (uint) strlen(table_name)+6+4, - MYF(MY_WME)))) + table_list.db= (char*) db; + table_list.table_name= (char*) table_name; + /* Create the cache_key for temporary tables */ + key_length= create_table_def_key(thd, cache_key, &table_list, 1); + + if (!(tmp_table= (TABLE*) my_malloc(sizeof(*tmp_table) + sizeof(*share) + + strlen(path)+1 + key_length, + MYF(MY_WME)))) DBUG_RETURN(0); /* purecov: inspected */ - if (openfrm(thd, path, table_name, - (uint) (HA_OPEN_KEYFILE | HA_OPEN_RNDFILE | HA_GET_INDEX), - READ_KEYINFO | COMPUTE_TYPES | EXTRA_RECORD, - ha_open_options, - tmp_table)) + share= (TABLE_SHARE*) (tmp_table+1); + tmp_path= (char*) (share+1); + saved_cache_key= strmov(tmp_path, path)+1; + memcpy(saved_cache_key, cache_key, key_length); + + init_tmp_table_share(share, saved_cache_key, key_length, + strend(saved_cache_key)+1, tmp_path); + + if (open_table_def(thd, share, 0) || + open_table_from_share(thd, share, table_name, + (uint) (HA_OPEN_KEYFILE | HA_OPEN_RNDFILE | + HA_GET_INDEX), + READ_KEYINFO | COMPUTE_TYPES | EXTRA_RECORD, + ha_open_options, + tmp_table)) { + /* No need to lock share->mutex as this is not needed for tmp tables */ + free_table_share(share); my_free((char*) tmp_table,MYF(0)); DBUG_RETURN(0); } - share= tmp_table->s; - tmp_table->reginfo.lock_type=TL_WRITE; // Simulate locked + tmp_table->reginfo.lock_type= TL_WRITE; // Simulate locked share->tmp_table= (tmp_table->file->has_transactions() ? TRANSACTIONAL_TMP_TABLE : TMP_TABLE); - share->table_cache_key= (char*) (tmp_table+1); - share->db= share->table_cache_key; - share->key_length= (uint) (strmov(((char*) (share->table_name= - strmov(share->table_cache_key, - db)+1)), - table_name) - - share->table_cache_key) +1; - int4store(share->table_cache_key + share->key_length, thd->server_id); - share->key_length+= 4; - int4store(share->table_cache_key + share->key_length, - thd->variables.pseudo_thread_id); - share->key_length+= 4; if (link_in_list) { - tmp_table->next=thd->temporary_tables; - thd->temporary_tables=tmp_table; + tmp_table->open_prev= &thd->temporary_tables; + if ((tmp_table->next= thd->temporary_tables)) + thd->temporary_tables->open_prev= &tmp_table->next; + thd->temporary_tables= tmp_table; if (thd->slave_thread) slave_open_temp_tables++; } @@ -2667,21 +3387,22 @@ TABLE *open_temporary_table(THD *thd, const char *path, const char *db, } -bool rm_temporary_table(enum db_type base, char *path) +bool rm_temporary_table(handlerton *base, char *path) { bool error=0; + handler *file; + char *ext; DBUG_ENTER("rm_temporary_table"); - fn_format(path, path,"",reg_ext,4); - unpack_filename(path,path); + strmov(ext= strend(path), reg_ext); if (my_delete(path,MYF(0))) error=1; /* purecov: inspected */ - *fn_ext(path)='\0'; // remove extension - handler *file= get_new_handler((TABLE*) 0, current_thd->mem_root, base); + *ext= 0; // remove extension + file= get_new_handler((TABLE_SHARE*) 0, current_thd->mem_root, base); if (file && file->delete_table(path)) { error=1; - sql_print_warning("Could not remove tmp table: '%s', error: %d", + sql_print_warning("Could not remove temporary table: '%s', error: %d", path, my_errno); } delete file; @@ -2708,15 +3429,20 @@ static void update_field_dependencies(THD *thd, Field *field, TABLE *table) { if (thd->set_query_id) { + table->file->ha_set_bit_in_rw_set(field->fieldnr, + (bool)(thd->set_query_id-1)); if (field->query_id != thd->query_id) { + if (table->get_fields_in_item_tree) + field->flags|= GET_FIXED_FIELDS_FLAG; field->query_id= thd->query_id; table->used_fields++; table->used_keys.intersect(field->part_of_key); } else thd->dupp_field= field; - } + } else if (table->get_fields_in_item_tree) + field->flags|= GET_FIXED_FIELDS_FLAG; } @@ -2933,8 +3659,18 @@ find_field_in_table(THD *thd, TABLE *table, const char *name, uint length, table->field[cached_field_index]->field_name, name)) field_ptr= table->field + cached_field_index; else if (table->s->name_hash.records) + { field_ptr= (Field**) hash_search(&table->s->name_hash, (byte*) name, length); + if (field_ptr) + { + /* + field_ptr points to field in TABLE_SHARE. Convert it to the matching + field in table + */ + field_ptr= (table->field + (field_ptr - table->s->field)); + } + } else { if (!(field_ptr= table->field)) @@ -2953,8 +3689,9 @@ find_field_in_table(THD *thd, TABLE *table, const char *name, uint length, { if (!allow_rowid || my_strcasecmp(system_charset_info, name, "_rowid") || - !(field=table->rowid_field)) + table->s->rowid_field_offset == 0) DBUG_RETURN((Field*) 0); + field= table->field[table->s->rowid_field_offset-1]; } update_field_dependencies(thd, field, table); @@ -3115,6 +3852,42 @@ find_field_in_table_ref(THD *thd, TABLE_LIST *table_list, /* + Find field in table, no side effects, only purpose is to check for field + in table object and get reference to the field if found. + + SYNOPSIS + find_field_in_table_sef() + + table table where to find + name Name of field searched for + + RETURN + 0 field is not found + # pointer to field +*/ + +Field *find_field_in_table_sef(TABLE *table, const char *name) +{ + Field **field_ptr; + if (table->s->name_hash.records) + field_ptr= (Field**)hash_search(&table->s->name_hash,(byte*) name, + strlen(name)); + else + { + if (!(field_ptr= table->field)) + return (Field *)0; + for (; *field_ptr; ++field_ptr) + if (!my_strcasecmp(system_charset_info, (*field_ptr)->field_name, name)) + break; + } + if (field_ptr) + return *field_ptr; + else + return (Field *)0; +} + + +/* Find field in table list. SYNOPSIS @@ -3763,15 +4536,19 @@ mark_common_columns(THD *thd, TABLE_LIST *table_ref_1, TABLE_LIST *table_ref_2, if (field_1) { + TABLE *table_1= nj_col_1->table_ref->table; /* Mark field_1 used for table cache. */ field_1->query_id= thd->query_id; - nj_col_1->table_ref->table->used_keys.intersect(field_1->part_of_key); + table_1->file->ha_set_bit_in_read_set(field_1->fieldnr); + table_1->used_keys.intersect(field_1->part_of_key); } if (field_2) { + TABLE *table_2= nj_col_2->table_ref->table; /* Mark field_2 used for table cache. */ field_2->query_id= thd->query_id; - nj_col_2->table_ref->table->used_keys.intersect(field_2->part_of_key); + table_2->file->ha_set_bit_in_read_set(field_2->fieldnr); + table_2->used_keys.intersect(field_2->part_of_key); } if (using_fields != NULL) @@ -4239,11 +5016,11 @@ int setup_wild(THD *thd, TABLE_LIST *tables, List<Item> &fields, ****************************************************************************/ bool setup_fields(THD *thd, Item **ref_pointer_array, - List<Item> &fields, bool set_query_id, + List<Item> &fields, ulong set_query_id, List<Item> *sum_func_list, bool allow_sum_func) { reg2 Item *item; - bool save_set_query_id= thd->set_query_id; + ulong save_set_query_id= thd->set_query_id; nesting_map save_allow_sum_func= thd->lex->allow_sum_func; List_iterator<Item> it(fields); DBUG_ENTER("setup_fields"); @@ -4466,7 +5243,7 @@ bool get_key_map_from_key_list(key_map *map, TABLE *table, 0) { my_error(ER_KEY_COLUMN_DOES_NOT_EXITS, MYF(0), name->c_ptr(), - table->s->table_name); + table->s->table_name.str); map->set_all(); return 1; } @@ -4629,6 +5406,7 @@ insert_fields(THD *thd, Name_resolution_context *context, const char *db_name, if (field->query_id == thd->query_id) thd->dupp_field= field; field->query_id= thd->query_id; + field->table->file->ha_set_bit_in_read_set(field->fieldnr); if (table) table->used_keys.intersect(field->part_of_key); @@ -4668,7 +5446,10 @@ insert_fields(THD *thd, Name_resolution_context *context, const char *db_name, For NATURAL joins, used_tables is updated in the IF above. */ if (table) + { table->used_fields= table->s->fields; + table->file->ha_set_all_bits_in_read_set(); + } } if (found) DBUG_RETURN(FALSE); @@ -4965,28 +5746,28 @@ static void mysql_rm_tmp_tables(void) for (i=0; i<=mysql_tmpdir_list.max; i++) { tmpdir=mysql_tmpdir_list.list[i]; - /* See if the directory exists */ + /* See if the directory exists */ if (!(dirp = my_dir(tmpdir,MYF(MY_WME | MY_DONT_SORT)))) continue; /* Remove all SQLxxx tables from directory */ - for (idx=0 ; idx < (uint) dirp->number_off_files ; idx++) - { - file=dirp->dir_entry+idx; + for (idx=0 ; idx < (uint) dirp->number_off_files ; idx++) + { + file=dirp->dir_entry+idx; - /* skiping . and .. */ - if (file->name[0] == '.' && (!file->name[1] || - (file->name[1] == '.' && !file->name[2]))) - continue; + /* skiping . and .. */ + if (file->name[0] == '.' && (!file->name[1] || + (file->name[1] == '.' && !file->name[2]))) + continue; - if (!bcmp(file->name,tmp_file_prefix,tmp_file_prefix_length)) - { + if (!bcmp(file->name,tmp_file_prefix,tmp_file_prefix_length)) + { sprintf(filePath,"%s%s",tmpdir,file->name); VOID(my_delete(filePath,MYF(MY_WME))); + } } - } - my_dirend(dirp); + my_dirend(dirp); } DBUG_VOID_RETURN; } @@ -5015,7 +5796,7 @@ void remove_db_from_cache(const char *db) for (uint idx=0 ; idx < open_cache.records ; idx++) { TABLE *table=(TABLE*) hash_element(&open_cache,idx); - if (!strcmp(table->s->db, db)) + if (!strcmp(table->s->db.str, db)) { table->s->version= 0L; /* Free when thread is ready */ if (!table->in_use) @@ -5028,7 +5809,11 @@ void remove_db_from_cache(const char *db) /* -** free all unused tables + free all unused tables + + NOTE + This is called by 'handle_manager' when one wants to periodicly flush + all not used tables. */ void flush_tables() @@ -5061,7 +5846,8 @@ bool remove_table_from_cache(THD *thd, const char *db, const char *table_name, char key[MAX_DBKEY_LENGTH]; uint key_length; TABLE *table; - bool result=0, signalled= 0; + TABLE_SHARE *share; + bool result= 0, signalled= 0; DBUG_ENTER("remove_table_from_cache"); key_length=(uint) (strmov(strmov(key,db)+1,table_name)-key)+1; @@ -5085,6 +5871,7 @@ bool remove_table_from_cache(THD *thd, const char *db, const char *table_name, } else if (in_use != thd) { + DBUG_PRINT("info", ("Table was in use by other thread")); in_use->some_tables_deleted=1; if (table->db_stat) result=1; @@ -5116,10 +5903,30 @@ bool remove_table_from_cache(THD *thd, const char *db, const char *table_name, } } else + { + DBUG_PRINT("info", ("Table was in use by current thread. db_stat: %u", + table->db_stat)); result= result || (flags & RTFC_OWNED_BY_THD_FLAG); + } } while (unused_tables && !unused_tables->s->version) VOID(hash_delete(&open_cache,(byte*) unused_tables)); + + DBUG_PRINT("info", ("Removing table from table_def_cache")); + /* Remove table from table definition cache if it's not in use */ + if ((share= (TABLE_SHARE*) hash_search(&table_def_cache,(byte*) key, + key_length))) + { + DBUG_PRINT("info", ("share version: %lu ref_count: %u", + share->version, share->ref_count)); + share->version= 0; // Mark for delete + if (share->ref_count == 0) + { + pthread_mutex_lock(&share->mutex); + VOID(hash_delete(&table_def_cache, (byte*) share)); + } + } + if (result && (flags & RTFC_WAIT_OTHER_THREAD_FLAG)) { if (!(flags & RTFC_CHECK_KILLED_FLAG) || !thd->killed) @@ -5152,6 +5959,7 @@ bool remove_table_from_cache(THD *thd, const char *db, const char *table_name, DBUG_RETURN(result); } + int setup_ftfuncs(SELECT_LEX *select_lex) { List_iterator<Item_func_match> li(*(select_lex->ftfunc_list)), @@ -5196,7 +6004,7 @@ int init_ftfuncs(THD *thd, SELECT_LEX *select_lex, bool no_order) SYNOPSIS open_new_frm() THD thread handler - path path to .frm + path path to .frm file (without extension) alias alias for table db database table_name name of table @@ -5210,18 +6018,20 @@ int init_ftfuncs(THD *thd, SELECT_LEX *select_lex, bool no_order) */ static bool -open_new_frm(THD *thd, const char *path, const char *alias, - const char *db, const char *table_name, +open_new_frm(THD *thd, TABLE_SHARE *share, const char *alias, uint db_stat, uint prgflag, uint ha_open_flags, TABLE *outparam, TABLE_LIST *table_desc, MEM_ROOT *mem_root) { LEX_STRING pathstr; File_parser *parser; + char path[FN_REFLEN]; DBUG_ENTER("open_new_frm"); - pathstr.str= (char*) path; - pathstr.length= strlen(path); + /* Create path with extension */ + pathstr.length= (uint) (strxmov(path, share->normalized_path.str, reg_ext, + NullS)- path); + pathstr.str= path; if ((parser= sql_parse_prepare(&pathstr, mem_root, 1))) { @@ -5229,7 +6039,8 @@ open_new_frm(THD *thd, const char *path, const char *alias, { if (table_desc == 0 || table_desc->required_type == FRMTYPE_TABLE) { - my_error(ER_WRONG_OBJECT, MYF(0), db, table_name, "BASE TABLE"); + my_error(ER_WRONG_OBJECT, MYF(0), share->db.str, share->table_name.str, + "BASE TABLE"); goto err; } if (mysql_make_view(thd, parser, table_desc)) @@ -5238,7 +6049,7 @@ open_new_frm(THD *thd, const char *path, const char *alias, else { /* only VIEWs are supported now */ - my_error(ER_FRM_UNKNOWN_TYPE, MYF(0), path, parser->type()->str); + my_error(ER_FRM_UNKNOWN_TYPE, MYF(0), share->path, parser->type()->str); goto err; } DBUG_RETURN(0); diff --git a/sql/sql_binlog.cc b/sql/sql_binlog.cc new file mode 100644 index 00000000000..cc0e9714d85 --- /dev/null +++ b/sql/sql_binlog.cc @@ -0,0 +1,135 @@ +/* Copyright (C) 2005 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "mysql_priv.h" +#include "base64.h" + +/* + Execute a BINLOG statement + + TODO: This currently assumes a MySQL 5.x binlog. + When we'll have binlog with a different format, to execute the + BINLOG command properly the server will need to know which format + the BINLOG command's event is in. mysqlbinlog should then send + the Format_description_log_event of the binlog it reads and the + server thread should cache this format into + rli->description_event_for_exec. +*/ + +void mysql_client_binlog_statement(THD* thd) +{ + DBUG_PRINT("info",("binlog base64: '%*s'", + (thd->lex->comment.length < 2048 ? + thd->lex->comment.length : 2048), + thd->lex->comment.str)); + + /* + Temporarily turn off send_ok, since different events handle this + differently + */ + my_bool nsok= thd->net.no_send_ok; + thd->net.no_send_ok= TRUE; + + const my_size_t coded_len= thd->lex->comment.length + 1; + const my_size_t event_len= base64_needed_decoded_length(coded_len); + DBUG_ASSERT(coded_len > 0); + + /* + Allocation + */ + if (!thd->rli_fake) + thd->rli_fake= new RELAY_LOG_INFO; + + const Format_description_log_event *desc= + new Format_description_log_event(4); + + const char *error= 0; + char *buf= (char *) my_malloc(event_len, MYF(MY_WME)); + Log_event *ev; + int res; + + /* + Out of memory check + */ + if (!(thd->rli_fake && desc && buf)) + { + my_error(ER_OUTOFMEMORY, MYF(0), 1); /* needed 1 bytes */ + goto end; + } + + thd->rli_fake->sql_thd= thd; + thd->rli_fake->no_storage= TRUE; + + res= base64_decode(thd->lex->comment.str, coded_len, buf); + + DBUG_PRINT("info",("binlog base64 decoded_len=%d, event_len=%d\n", + res, uint4korr(buf + EVENT_LEN_OFFSET))); + /* + Note that 'res' is the correct event length, 'event_len' was + calculated based on the base64-string that possibly contained + extra spaces, so it can be longer than the real event. + */ + if (res < EVENT_LEN_OFFSET + || (uint) res != uint4korr(buf+EVENT_LEN_OFFSET)) + { + my_error(ER_SYNTAX_ERROR, MYF(0)); + goto end; + } + + ev= Log_event::read_log_event(buf, res, &error, desc); + + DBUG_PRINT("info",("binlog base64 err=%s", error)); + if (!ev) + { + /* + This could actually be an out-of-memory, but it is more + likely causes by a bad statement + */ + my_error(ER_SYNTAX_ERROR, MYF(0)); + goto end; + } + + DBUG_PRINT("info",("ev->get_type_code()=%d", ev->get_type_code())); + DBUG_PRINT("info",("buf+EVENT_TYPE_OFFSET=%d", buf+EVENT_TYPE_OFFSET)); + + ev->thd= thd; + if (ev->exec_event(thd->rli_fake)) + { + my_error(ER_UNKNOWN_ERROR, MYF(0), "Error executing BINLOG statement"); + goto end; + } + + /* + Restore setting of no_send_ok + */ + thd->net.no_send_ok= nsok; + + DBUG_PRINT("info",("binlog base64 execution finished successfully")); + send_ok(thd); + +end: + /* + Restore setting of no_send_ok + */ + thd->net.no_send_ok= nsok; + + if (ev) + delete ev; + if (desc) + delete desc; + if (buf) + my_free(buf, MYF(0)); +} diff --git a/sql/sql_bitmap.h b/sql/sql_bitmap.h index 0f5b6dcd35e..35c501ede56 100644 --- a/sql/sql_bitmap.h +++ b/sql/sql_bitmap.h @@ -25,7 +25,7 @@ template <uint default_width> class Bitmap { MY_BITMAP map; - uchar buffer[(default_width+7)/8]; + uint32 buffer[(default_width+31)/32]; public: Bitmap() { init(); } Bitmap(const Bitmap& from) { *this=from; } @@ -48,14 +48,14 @@ public: void intersect(ulonglong map2buff) { MY_BITMAP map2; - bitmap_init(&map2, (uchar *)&map2buff, sizeof(ulonglong)*8, 0); + bitmap_init(&map2, (uint32 *)&map2buff, sizeof(ulonglong)*8, 0); bitmap_intersect(&map, &map2); } /* Use highest bit for all bits above sizeof(ulonglong)*8. */ void intersect_extended(ulonglong map2buff) { intersect(map2buff); - if (map.bitmap_size > sizeof(ulonglong)) + if (map.n_bits > sizeof(ulonglong) * 8) bitmap_set_above(&map, sizeof(ulonglong), test(map2buff & (LL(1) << (sizeof(ulonglong) * 8 - 1)))); } @@ -70,7 +70,7 @@ public: char *print(char *buf) const { char *s=buf; - const uchar *e=buffer, *b=e+sizeof(buffer)-1; + const uchar *e=(uchar *)buffer, *b=e+sizeof(buffer)-1; while (!*b && b>e) b--; if ((*s=_dig_vec_upper[*b >> 4]) != '0') diff --git a/sql/sql_cache.cc b/sql/sql_cache.cc index cf3ba9c8c40..aec370e104a 100644 --- a/sql/sql_cache.cc +++ b/sql/sql_cache.cc @@ -303,7 +303,7 @@ TODO list: #ifndef MASTER #include "../srclib/myisammrg/myrg_def.h" #else -#include "../myisammrg/myrg_def.h" +#include "../storage/myisammrg/myrg_def.h" #endif #ifdef EMBEDDED_LIBRARY @@ -850,7 +850,7 @@ sql mode: 0x%lx, sort len: %lu, conncat len: %lu", if (thd->db_length) { memcpy(thd->query+thd->query_length+1, thd->db, thd->db_length); - DBUG_PRINT("qcache", ("database : %s length %u", + DBUG_PRINT("qcache", ("database: %s length: %u", thd->db, thd->db_length)); } else @@ -986,9 +986,10 @@ Query_cache::send_result_to_client(THD *thd, char *sql, uint query_length) while (sql[i]=='(') i++; + /* Test if the query is a SELECT - (pre-space is removed in dispatch_command) + (pre-space is removed in dispatch_command). First '/' looks like comment before command it is not frequently appeared in real lihe, consequently we can @@ -997,7 +998,7 @@ Query_cache::send_result_to_client(THD *thd, char *sql, uint query_length) if ((my_toupper(system_charset_info, sql[i]) != 'S' || my_toupper(system_charset_info, sql[i + 1]) != 'E' || my_toupper(system_charset_info, sql[i + 2]) != 'L') && - sql[i] != '/') + sql[0] != '/') { DBUG_PRINT("qcache", ("The statement is not a SELECT; Not cached")); goto err; @@ -1016,7 +1017,7 @@ Query_cache::send_result_to_client(THD *thd, char *sql, uint query_length) if (thd->db_length) { memcpy(sql+query_length+1, thd->db, thd->db_length); - DBUG_PRINT("qcache", ("database: '%s' length %u", + DBUG_PRINT("qcache", ("database: '%s' length: %u", thd->db, thd->db_length)); } else @@ -1113,9 +1114,9 @@ sql mode: 0x%lx, sort len: %lu, conncat len: %lu", */ for (tmptable= thd->temporary_tables; tmptable ; tmptable= tmptable->next) { - if (tmptable->s->key_length - TMP_TABLE_KEY_EXTRA == + if (tmptable->s->table_cache_key.length - TMP_TABLE_KEY_EXTRA == table->key_length() && - !memcmp(tmptable->s->table_cache_key, table->data(), + !memcmp(tmptable->s->table_cache_key.str, table->data(), table->key_length())) { DBUG_PRINT("qcache", @@ -1278,7 +1279,7 @@ void Query_cache::invalidate(CHANGED_TABLE_LIST *tables_used) for (; tables_used; tables_used= tables_used->next) { invalidate_table((byte*) tables_used->key, tables_used->key_length); - DBUG_PRINT("qcache", (" db %s, table %s", tables_used->key, + DBUG_PRINT("qcache", ("db: %s table: %s", tables_used->key, tables_used->key+ strlen(tables_used->key)+1)); } @@ -2145,7 +2146,8 @@ void Query_cache::invalidate_table(TABLE_LIST *table_list) void Query_cache::invalidate_table(TABLE *table) { - invalidate_table((byte*) table->s->table_cache_key, table->s->key_length); + invalidate_table((byte*) table->s->table_cache_key.str, + table->s->table_cache_key.length); } void Query_cache::invalidate_table(byte * key, uint32 key_length) @@ -2206,7 +2208,7 @@ Query_cache::register_tables_from_list(TABLE_LIST *tables_used, { char key[MAX_DBKEY_LENGTH]; uint key_length; - DBUG_PRINT("qcache", ("view %s, db %s", + DBUG_PRINT("qcache", ("view: %s db: %s", tables_used->view_name.str, tables_used->view_db.str)); key_length= (uint) (strmov(strmov(key, tables_used->view_db.str) + 1, @@ -2226,21 +2228,22 @@ Query_cache::register_tables_from_list(TABLE_LIST *tables_used, else { DBUG_PRINT("qcache", - ("table %s, db %s, openinfo at 0x%lx, keylen %u, key at 0x%lx", - tables_used->table->s->table_name, - tables_used->table->s->table_cache_key, + ("table: %s db: %s openinfo: 0x%lx keylen: %u key: 0x%lx", + tables_used->table->s->table_name.str, + tables_used->table->s->table_cache_key.str, (ulong) tables_used->table, - tables_used->table->s->key_length, - (ulong) tables_used->table->s->table_cache_key)); - if (!insert_table(tables_used->table->s->key_length, - tables_used->table->s->table_cache_key, block_table, + tables_used->table->s->table_cache_key.length, + (ulong) tables_used->table->s->table_cache_key.str)); + if (!insert_table(tables_used->table->s->table_cache_key.length, + tables_used->table->s->table_cache_key.str, + block_table, tables_used->db_length, tables_used->table->file->table_cache_type(), tables_used->callback_func, tables_used->engine_data)) DBUG_RETURN(0); - if (tables_used->table->s->db_type == DB_TYPE_MRG_MYISAM) + if (tables_used->table->s->db_type == &myisammrg_hton) { ha_myisammrg *handler = (ha_myisammrg *) tables_used->table->file; MYRG_INFO *file = handler->myrg_info(); @@ -2833,16 +2836,16 @@ static TABLE_COUNTER_TYPE process_and_count_tables(TABLE_LIST *tables_used, table_count++; if (tables_used->view) { - DBUG_PRINT("qcache", ("view %s, db %s", + DBUG_PRINT("qcache", ("view: %s db: %s", tables_used->view_name.str, tables_used->view_db.str)); *tables_type|= HA_CACHE_TBL_NONTRANSACT; } else { - DBUG_PRINT("qcache", ("table %s, db %s, type %u", - tables_used->table->s->table_name, - tables_used->table->s->table_cache_key, + DBUG_PRINT("qcache", ("table: %s db: %s type: %u", + tables_used->table->s->table_name.str, + tables_used->table->s->db.str, tables_used->table->s->db_type)); if (tables_used->derived) { @@ -2860,15 +2863,15 @@ static TABLE_COUNTER_TYPE process_and_count_tables(TABLE_LIST *tables_used, (*tables_type & HA_CACHE_TBL_NOCACHE) || (tables_used->db_length == 5 && my_strnncoll(table_alias_charset, - (uchar*)tables_used->table->s->table_cache_key, 6, + (uchar*)tables_used->table->s->table_cache_key.str, 6, (uchar*)"mysql",6) == 0)) { DBUG_PRINT("qcache", - ("select not cacheable: temporary, system or \ - other non-cacheable table(s)")); + ("select not cacheable: temporary, system or " + "other non-cacheable table(s)")); DBUG_RETURN(0); } - if (tables_used->table->s->db_type == DB_TYPE_MRG_MYISAM) + if (tables_used->table->s->db_type == &myisammrg_hton) { ha_myisammrg *handler = (ha_myisammrg *)tables_used->table->file; MYRG_INFO *file = handler->myrg_info(); @@ -2947,11 +2950,13 @@ my_bool Query_cache::ask_handler_allowance(THD *thd, for (; tables_used; tables_used= tables_used->next_global) { TABLE *table; + handler *handler; if (!(table= tables_used->table)) continue; - handler *handler= table->file; - if (!handler->register_query_cache_table(thd, table->s->table_cache_key, - table->s->key_length, + handler= table->file; + if (!handler->register_query_cache_table(thd, + table->s->table_cache_key.str, + table->s->table_cache_key.length, &tables_used->callback_func, &tables_used->engine_data)) { diff --git a/sql/sql_cache.h b/sql/sql_cache.h index 69a0d6cd05d..b7531696150 100644 --- a/sql/sql_cache.h +++ b/sql/sql_cache.h @@ -215,6 +215,8 @@ struct Query_cache_memory_bin struct Query_cache_memory_bin_step { +public: + Query_cache_memory_bin_step() {} ulong size; ulong increment; uint idx; diff --git a/sql/sql_class.cc b/sql/sql_class.cc index ed2089546da..853e8be6629 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -27,6 +27,8 @@ #endif #include "mysql_priv.h" +#include <my_bitmap.h> +#include "log_event.h" #include <m_ctype.h> #include <sys/stat.h> #include <thr_alarm.h> @@ -174,7 +176,7 @@ Open_tables_state::Open_tables_state(ulong version_arg) THD::THD() :Statement(CONVENTIONAL_EXECUTION, 0, ALLOC_ROOT_MIN_BLOCK_SIZE, 0), - Open_tables_state(refresh_version), + Open_tables_state(refresh_version), rli_fake(0), lock_id(&main_lock_id), user_time(0), in_sub_stmt(0), global_read_lock(0), is_fatal_error(0), rand_used(0), time_zone_used(0), @@ -227,6 +229,9 @@ THD::THD() ull=0; system_thread= cleanup_done= abort_on_warning= no_warnings_for_error= 0; peer_port= 0; // For SHOW PROCESSLIST +#ifdef HAVE_ROW_BASED_REPLICATION + transaction.m_pending_rows_event= 0; +#endif #ifdef __WIN__ real_id = 0; #endif @@ -288,7 +293,7 @@ void THD::init(void) variables.date_format); variables.datetime_format= date_time_format_copy((THD*) 0, variables.datetime_format); -#ifdef HAVE_NDBCLUSTER_DB +#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE variables.ndb_use_transactions= 1; #endif pthread_mutex_unlock(&LOCK_global_system_variables); @@ -440,6 +445,11 @@ THD::~THD() #ifndef DBUG_OFF dbug_sentry= THD_SENTRY_GONE; #endif +#ifndef EMBEDDED_LIBRARY + if (rli_fake) + delete rli_fake; +#endif + DBUG_VOID_RETURN; } @@ -667,7 +677,8 @@ void THD::add_changed_table(TABLE *table) DBUG_ASSERT((options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) && table->file->has_transactions()); - add_changed_table(table->s->table_cache_key, table->s->key_length); + add_changed_table(table->s->table_cache_key.str, + table->s->table_cache_key.length); DBUG_VOID_RETURN; } @@ -744,6 +755,13 @@ int THD::send_explain_fields(select_result *result) field_list.push_back(new Item_empty_string("select_type", 19, cs)); field_list.push_back(item= new Item_empty_string("table", NAME_LEN, cs)); item->maybe_null= 1; +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (lex->describe & DESCRIBE_PARTITIONS) + { + field_list.push_back(item= new Item_empty_string("partitions", 10, cs)); + item->maybe_null= 1; + } +#endif field_list.push_back(item= new Item_empty_string("type", 10, cs)); item->maybe_null= 1; field_list.push_back(item=new Item_empty_string("possible_keys", @@ -911,7 +929,7 @@ bool select_send::send_data(List<Item> &items) return 0; } -#ifdef HAVE_INNOBASE_DB +#ifdef WITH_INNOBASE_STORAGE_ENGINE /* We may be passing the control from mysqld to the client: release the InnoDB adaptive hash S-latch to avoid thread deadlocks if it was reserved @@ -947,7 +965,7 @@ bool select_send::send_data(List<Item> &items) bool select_send::send_eof() { -#ifdef HAVE_INNOBASE_DB +#ifdef WITH_INNOBASE_STORAGE_ENGINE /* We may be passing the control from mysqld to the client: release the InnoDB adaptive hash S-latch to avoid thread deadlocks if it was reserved by thd */ @@ -1062,7 +1080,8 @@ static File create_file(THD *thd, char *path, sql_exchange *exchange, if (!dirname_length(exchange->file_name)) { - strxnmov(path, FN_REFLEN, mysql_real_data_home, thd->db ? thd->db : "", NullS); + strxnmov(path, FN_REFLEN-1, mysql_real_data_home, thd->db ? thd->db : "", + NullS); (void) fn_format(path, exchange->file_name, path, "", option); } else @@ -1957,7 +1976,27 @@ void THD::reset_sub_statement_state(Sub_statement_state *backup, backup->client_capabilities= client_capabilities; backup->savepoints= transaction.savepoints; - if (!lex->requires_prelocking() || is_update_query(lex->sql_command)) +#ifdef HAVE_ROW_BASED_REPLICATION + /* + For row-based replication and before executing a function/trigger, + the pending rows event has to be flushed. The function/trigger + might execute statement that require the pending event to be + flushed. A simple example: + + CREATE FUNCTION foo() RETURNS INT + BEGIN + SAVEPOINT x; + RETURN 0; + END + + INSERT INTO t1 VALUES (1), (foo()), (2); + */ + if (binlog_row_based) + binlog_flush_pending_rows_event(false); +#endif /* HAVE_ROW_BASED_REPLICATION */ + + if ((!lex->requires_prelocking() || is_update_query(lex->sql_command)) && + !binlog_row_based) options&= ~OPTION_BIN_LOG; /* Disable result sets */ client_capabilities &= ~CLIENT_MULTI_RESULTS; @@ -2099,3 +2138,439 @@ void xid_cache_delete(XID_STATE *xid_state) pthread_mutex_unlock(&LOCK_xid_cache); } +/* + Implementation of interface to write rows to the binary log through the + thread. The thread is responsible for writing the rows it has + inserted/updated/deleted. +*/ + +#ifndef MYSQL_CLIENT +#ifdef HAVE_ROW_BASED_REPLICATION + +/* + Template member function for ensuring that there is an rows log + event of the apropriate type before proceeding. + + PRE CONDITION: + - Events of type 'RowEventT' have the type code 'type_code'. + + POST CONDITION: + If a non-NULL pointer is returned, the pending event for thread 'thd' will + be an event of type 'RowEventT' (which have the type code 'type_code') + will either empty or have enough space to hold 'needed' bytes. In + addition, the columns bitmap will be correct for the row, meaning that + the pending event will be flushed if the columns in the event differ from + the columns suppled to the function. + + RETURNS + If no error, a non-NULL pending event (either one which already existed or + the newly created one). + If error, NULL. + */ + +template <class RowsEventT> Rows_log_event* +THD::binlog_prepare_pending_rows_event(TABLE* table, uint32 serv_id, + MY_BITMAP const* cols, + my_size_t colcnt, + my_size_t needed, + bool is_transactional) +{ + /* Pre-conditions */ + DBUG_ASSERT(table->s->table_map_id != ULONG_MAX); + + /* Fetch the type code for the RowsEventT template parameter */ + int const type_code= RowsEventT::TYPE_CODE; + + /* + There is no good place to set up the transactional data, so we + have to do it here. + */ + if (binlog_setup_trx_data()) + return NULL; + + Rows_log_event* pending= binlog_get_pending_rows_event(); + + if (unlikely(pending && !pending->is_valid())) + return NULL; + + /* + Check if the current event is non-NULL and a write-rows + event. Also check if the table provided is mapped: if it is not, + then we have switched to writing to a new table. + If there is no pending event, we need to create one. If there is a pending + event, but it's not about the same table id, or not of the same type + (between Write, Update and Delete), or not the same affected columns, or + going to be too big, flush this event to disk and create a new pending + event. + */ + if (!pending || + pending->server_id != serv_id || + pending->get_table_id() != table->s->table_map_id || + pending->get_type_code() != type_code || + pending->get_data_size() + needed > opt_binlog_rows_event_max_size || + pending->get_width() != colcnt || + !bitmap_cmp(pending->get_cols(), cols)) + { + /* Create a new RowsEventT... */ + Rows_log_event* const + ev= new RowsEventT(this, table, table->s->table_map_id, cols, + is_transactional); + if (unlikely(!ev)) + return NULL; + ev->server_id= serv_id; // I don't like this, it's too easy to forget. + /* + flush the pending event and replace it with the newly created + event... + */ + if (unlikely(mysql_bin_log.flush_and_set_pending_rows_event(this, ev))) + { + delete ev; + return NULL; + } + + return ev; /* This is the new pending event */ + } + return pending; /* This is the current pending event */ +} + +/* + Instansiate the versions we need, we have -fno-implicit-template as + compiling option. +*/ +template Rows_log_event* +THD::binlog_prepare_pending_rows_event<Write_rows_log_event> +(TABLE*, uint32, MY_BITMAP const*, my_size_t colcnt, size_t, bool); + +template Rows_log_event* +THD::binlog_prepare_pending_rows_event<Delete_rows_log_event> +(TABLE*, uint32, MY_BITMAP const*, my_size_t colcnt, size_t, bool); + +template Rows_log_event* +THD::binlog_prepare_pending_rows_event<Update_rows_log_event> +(TABLE*, uint32, MY_BITMAP const*, my_size_t colcnt, size_t, bool); + +static char const* +field_type_name(enum_field_types type) +{ + switch (type) + { + case MYSQL_TYPE_DECIMAL: + return "MYSQL_TYPE_DECIMAL"; + case MYSQL_TYPE_TINY: + return "MYSQL_TYPE_TINY"; + case MYSQL_TYPE_SHORT: + return "MYSQL_TYPE_SHORT"; + case MYSQL_TYPE_LONG: + return "MYSQL_TYPE_LONG"; + case MYSQL_TYPE_FLOAT: + return "MYSQL_TYPE_FLOAT"; + case MYSQL_TYPE_DOUBLE: + return "MYSQL_TYPE_DOUBLE"; + case MYSQL_TYPE_NULL: + return "MYSQL_TYPE_NULL"; + case MYSQL_TYPE_TIMESTAMP: + return "MYSQL_TYPE_TIMESTAMP"; + case MYSQL_TYPE_LONGLONG: + return "MYSQL_TYPE_LONGLONG"; + case MYSQL_TYPE_INT24: + return "MYSQL_TYPE_INT24"; + case MYSQL_TYPE_DATE: + return "MYSQL_TYPE_DATE"; + case MYSQL_TYPE_TIME: + return "MYSQL_TYPE_TIME"; + case MYSQL_TYPE_DATETIME: + return "MYSQL_TYPE_DATETIME"; + case MYSQL_TYPE_YEAR: + return "MYSQL_TYPE_YEAR"; + case MYSQL_TYPE_NEWDATE: + return "MYSQL_TYPE_NEWDATE"; + case MYSQL_TYPE_VARCHAR: + return "MYSQL_TYPE_VARCHAR"; + case MYSQL_TYPE_BIT: + return "MYSQL_TYPE_BIT"; + case MYSQL_TYPE_NEWDECIMAL: + return "MYSQL_TYPE_NEWDECIMAL"; + case MYSQL_TYPE_ENUM: + return "MYSQL_TYPE_ENUM"; + case MYSQL_TYPE_SET: + return "MYSQL_TYPE_SET"; + case MYSQL_TYPE_TINY_BLOB: + return "MYSQL_TYPE_TINY_BLOB"; + case MYSQL_TYPE_MEDIUM_BLOB: + return "MYSQL_TYPE_MEDIUM_BLOB"; + case MYSQL_TYPE_LONG_BLOB: + return "MYSQL_TYPE_LONG_BLOB"; + case MYSQL_TYPE_BLOB: + return "MYSQL_TYPE_BLOB"; + case MYSQL_TYPE_VAR_STRING: + return "MYSQL_TYPE_VAR_STRING"; + case MYSQL_TYPE_STRING: + return "MYSQL_TYPE_STRING"; + case MYSQL_TYPE_GEOMETRY: + return "MYSQL_TYPE_GEOMETRY"; + } + return "Unknown"; +} + +my_size_t THD::max_row_length_blob(TABLE *table, const byte *data) const +{ + my_size_t length= 0; + TABLE_SHARE *table_s= table->s; + uint* const beg= table_s->blob_field; + uint* const end= beg + table_s->blob_fields; + + for (uint *ptr= beg ; ptr != end ; ++ptr) + { + Field_blob* const blob= (Field_blob*) table->field[*ptr]; + length+= blob->get_length(data + blob->offset()) + 2; + } + + return length; +} + +my_size_t THD::pack_row(TABLE *table, MY_BITMAP const* cols, byte *row_data, + const byte *record) const +{ + Field **p_field= table->field, *field= *p_field; + int n_null_bytes= table->s->null_bytes; + my_ptrdiff_t const offset= record - (byte*) table->record[0]; + + memcpy(row_data, record, n_null_bytes); + byte *ptr= row_data+n_null_bytes; + + for (int i= 0 ; field ; i++, p_field++, field= *p_field) + { + if (bitmap_is_set(cols,i)) + ptr= field->pack(ptr, field->ptr + offset); + } + + /* + my_ptrdiff_t is signed, size_t is unsigned. Assert that the + conversion will work correctly. + */ + DBUG_ASSERT(ptr - row_data >= 0); + return (static_cast<size_t>(ptr - row_data)); +} + +int THD::binlog_write_row(TABLE* table, bool is_trans, + MY_BITMAP const* cols, my_size_t colcnt, + byte const *record) +{ + DBUG_ASSERT(binlog_row_based && mysql_bin_log.is_open()); + + /* + Pack records into format for transfer. We are allocating more + memory than needed, but that doesn't matter. + */ + bool error= 0; + byte *row_data= table->write_row_record; + my_size_t const max_len= max_row_length(table, record); + + /* + * Allocate room for a row (if needed) + */ + if (!row_data) + { + if (!table->s->blob_fields) + { + /* multiply max_len by 2 so it can be used for update_row as well */ + table->write_row_record= alloc_root(&table->mem_root, 2*max_len); + if (!table->write_row_record) + return HA_ERR_OUT_OF_MEM; + row_data= table->write_row_record; + } + else if (unlikely(!(row_data= my_malloc(max_len, MYF(MY_WME))))) + return HA_ERR_OUT_OF_MEM; + } + my_size_t const len= pack_row(table, cols, row_data, record); + + Rows_log_event* const + ev= binlog_prepare_pending_rows_event<Write_rows_log_event> + (table, server_id, cols, colcnt, len, is_trans); + + /* add_row_data copies row_data to internal buffer */ + error= likely(ev != 0) ? ev->add_row_data(row_data,len) : HA_ERR_OUT_OF_MEM ; + + if (table->write_row_record == 0) + my_free(row_data, MYF(MY_WME)); + + return error; +} + +int THD::binlog_update_row(TABLE* table, bool is_trans, + MY_BITMAP const* cols, my_size_t colcnt, + const byte *before_record, + const byte *after_record) +{ + DBUG_ASSERT(binlog_row_based && mysql_bin_log.is_open()); + + bool error= 0; + my_size_t const before_maxlen = max_row_length(table, before_record); + my_size_t const after_maxlen = max_row_length(table, after_record); + + byte *row_data= table->write_row_record; + byte *before_row, *after_row; + if (row_data != 0) + { + before_row= row_data; + after_row= before_row + before_maxlen; + } + else + { + if (unlikely(!(row_data= my_multi_malloc(MYF(MY_WME), + &before_row, before_maxlen, + &after_row, after_maxlen, + NULL)))) + return HA_ERR_OUT_OF_MEM; + } + + my_size_t const before_size= pack_row(table, cols, before_row, + before_record); + my_size_t const after_size= pack_row(table, cols, after_row, + after_record); + + Rows_log_event* const + ev= binlog_prepare_pending_rows_event<Update_rows_log_event> + (table, server_id, cols, colcnt, before_size + after_size, is_trans); + + error= (unlikely(!ev)) || ev->add_row_data(before_row, before_size) || + ev->add_row_data(after_row, after_size); + + if (!table->write_row_record) + { + /* add_row_data copies row_data to internal buffer */ + my_free(row_data, MYF(MY_WME)); + } + + return error; +} + +int THD::binlog_delete_row(TABLE* table, bool is_trans, + MY_BITMAP const* cols, my_size_t colcnt, + byte const *record) +{ + DBUG_ASSERT(binlog_row_based && mysql_bin_log.is_open()); + + /* + Pack records into format for transfer. We are allocating more + memory than needed, but that doesn't matter. + */ + bool error= 0; + my_size_t const max_len= max_row_length(table, record); + byte *row_data= table->write_row_record; + if (!row_data && unlikely(!(row_data= my_malloc(max_len, MYF(MY_WME))))) + return HA_ERR_OUT_OF_MEM; + my_size_t const len= pack_row(table, cols, row_data, record); + + Rows_log_event* const + ev= binlog_prepare_pending_rows_event<Delete_rows_log_event> + (table, server_id, cols, colcnt, len, is_trans); + + error= (unlikely(!ev)) || ev->add_row_data(row_data, len); + + /* add_row_data copies row_data */ + if (table->write_row_record == 0) + my_free(row_data, MYF(MY_WME)); + + return error; +} + + +int THD::binlog_flush_pending_rows_event(bool stmt_end) +{ + DBUG_ENTER("THD::binlog_flush_pending_rows_event"); + if (!binlog_row_based || !mysql_bin_log.is_open()) + DBUG_RETURN(0); + + /* + Mark the event as the last event of a statement if the stmt_end + flag is set. + */ + int error= 0; + if (Rows_log_event *pending= binlog_get_pending_rows_event()) + { + if (stmt_end) + { + pending->set_flags(Rows_log_event::STMT_END_F); + pending->flags|= LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F; + } + + /* + We only bother to set the pending event if it is non-NULL. This + is essential for correctness, since there is not necessarily a + trx_data created for the thread if the pending event is NULL. + */ + error= mysql_bin_log.flush_and_set_pending_rows_event(this, 0); + } + + DBUG_RETURN(error); +} + + +void THD::binlog_delete_pending_rows_event() +{ + if (Rows_log_event *pending= binlog_get_pending_rows_event()) + { + delete pending; + binlog_set_pending_rows_event(0); + } +} + +#endif /* HAVE_ROW_BASED_REPLICATION */ + +/* + Member function that will log query, either row-based or + statement-based depending on the value of the 'binlog_row_based' + variable and the value of the 'qtype' flag. + + This function should be called after the all calls to ha_*_row() + functions have been issued, but before tables are unlocked and + closed. + + RETURN VALUE + Error code, or 0 if no error. +*/ +int THD::binlog_query(THD::enum_binlog_query_type qtype, + char const *query, ulong query_len, + bool is_trans, bool suppress_use) +{ + DBUG_ENTER("THD::binlog_query"); + DBUG_ASSERT(query && mysql_bin_log.is_open()); + int error= binlog_flush_pending_rows_event(true); + switch (qtype) + { + case THD::MYSQL_QUERY_TYPE: + /* + Using this query type is a conveniece hack, since we have been + moving back and forth between using RBR for replication of + system tables and not using it. + + Make sure to change in check_table_binlog_row_based() according + to how you treat this. + */ + case THD::ROW_QUERY_TYPE: + if (binlog_row_based) + DBUG_RETURN(binlog_flush_pending_rows_event(true)); + /* Otherwise, we fall through */ + case THD::STMT_QUERY_TYPE: + /* + Most callers of binlog_query() ignore the error code, assuming + that the statement will always be written to the binlog. In + case of error above, we therefore just continue and write the + statement to the binary log. + */ + { + Query_log_event qinfo(this, query, query_len, is_trans, suppress_use); + qinfo.flags|= LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F; + DBUG_RETURN(mysql_bin_log.write(&qinfo)); + } + break; + + case THD::QUERY_TYPE_COUNT: + default: + DBUG_ASSERT(0 <= qtype && qtype < QUERY_TYPE_COUNT); + } + DBUG_RETURN(0); +} + +#endif /* !defined(MYSQL_CLIENT) */ diff --git a/sql/sql_class.h b/sql/sql_class.h index c025b4f0774..1ef3322bc8f 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -21,19 +21,20 @@ #pragma interface /* gcc class implementation */ #endif -// TODO: create log.h and move all the log header stuff there +#include "log.h" +#include "rpl_rli.h" +#include "rpl_tblmap.h" class Query_log_event; class Load_log_event; class Slave_log_event; -class Format_description_log_event; class sp_rcontext; class sp_cache; +class Rows_log_event; enum enum_enable_or_disable { LEAVE_AS_IS, ENABLE, DISABLE }; enum enum_ha_read_modes { RFIRST, RNEXT, RPREV, RLAST, RKEY, RNEXT_SAME }; enum enum_duplicates { DUP_ERROR, DUP_REPLACE, DUP_UPDATE }; -enum enum_log_type { LOG_CLOSED, LOG_TO_BE_OPENED, LOG_NORMAL, LOG_NEW, LOG_BIN}; enum enum_delay_key_write { DELAY_KEY_WRITE_NONE, DELAY_KEY_WRITE_ON, DELAY_KEY_WRITE_ALL }; @@ -50,117 +51,6 @@ extern const char **errmesg; #define TC_HEURISTIC_RECOVER_ROLLBACK 2 extern uint tc_heuristic_recover; -/* - Transaction Coordinator log - a base abstract class - for two different implementations -*/ -class TC_LOG -{ - public: - int using_heuristic_recover(); - TC_LOG() {} - virtual ~TC_LOG() {} - - virtual int open(const char *opt_name)=0; - virtual void close()=0; - virtual int log(THD *thd, my_xid xid)=0; - virtual void unlog(ulong cookie, my_xid xid)=0; -}; - -class TC_LOG_DUMMY: public TC_LOG // use it to disable the logging -{ - public: - int open(const char *opt_name) { return 0; } - void close() { } - int log(THD *thd, my_xid xid) { return 1; } - void unlog(ulong cookie, my_xid xid) { } -}; - -#ifdef HAVE_MMAP -class TC_LOG_MMAP: public TC_LOG -{ - public: // only to keep Sun Forte on sol9x86 happy - typedef enum { - POOL, // page is in pool - ERROR, // last sync failed - DIRTY // new xids added since last sync - } PAGE_STATE; - - private: - typedef struct st_page { - struct st_page *next; // page a linked in a fifo queue - my_xid *start, *end; // usable area of a page - my_xid *ptr; // next xid will be written here - int size, free; // max and current number of free xid slots on the page - int waiters; // number of waiters on condition - PAGE_STATE state; // see above - pthread_mutex_t lock; // to access page data or control structure - pthread_cond_t cond; // to wait for a sync - } PAGE; - - char logname[FN_REFLEN]; - File fd; - my_off_t file_length; - uint npages, inited; - uchar *data; - struct st_page *pages, *syncing, *active, *pool, *pool_last; - /* - note that, e.g. LOCK_active is only used to protect - 'active' pointer, to protect the content of the active page - one has to use active->lock. - Same for LOCK_pool and LOCK_sync - */ - pthread_mutex_t LOCK_active, LOCK_pool, LOCK_sync; - pthread_cond_t COND_pool, COND_active; - - public: - TC_LOG_MMAP(): inited(0) {} - int open(const char *opt_name); - void close(); - int log(THD *thd, my_xid xid); - void unlog(ulong cookie, my_xid xid); - int recover(); - - private: - void get_active_from_pool(); - int sync(); - int overflow(); -}; -#else -#define TC_LOG_MMAP TC_LOG_DUMMY -#endif - -extern TC_LOG *tc_log; -extern TC_LOG_MMAP tc_log_mmap; -extern TC_LOG_DUMMY tc_log_dummy; - -/* log info errors */ -#define LOG_INFO_EOF -1 -#define LOG_INFO_IO -2 -#define LOG_INFO_INVALID -3 -#define LOG_INFO_SEEK -4 -#define LOG_INFO_MEM -6 -#define LOG_INFO_FATAL -7 -#define LOG_INFO_IN_USE -8 - -/* bitmap to SQL_LOG::close() */ -#define LOG_CLOSE_INDEX 1 -#define LOG_CLOSE_TO_BE_OPENED 2 -#define LOG_CLOSE_STOP_EVENT 4 - -struct st_relay_log_info; - -typedef struct st_log_info -{ - char log_file_name[FN_REFLEN]; - my_off_t index_file_offset, index_file_start_offset; - my_off_t pos; - bool fatal; // if the purge happens to give us a negative offset - pthread_mutex_t lock; - st_log_info():fatal(0) { pthread_mutex_init(&lock, MY_MUTEX_INIT_FAST);} - ~st_log_info() { pthread_mutex_destroy(&lock);} -} LOG_INFO; - typedef struct st_user_var_events { user_var_entry *user_var_event; @@ -173,188 +63,6 @@ typedef struct st_user_var_events #define RP_LOCK_LOG_IS_ALREADY_LOCKED 1 #define RP_FORCE_ROTATE 2 -class Log_event; - -/* - TODO split MYSQL_LOG into base MYSQL_LOG and - MYSQL_QUERY_LOG, MYSQL_SLOW_LOG, MYSQL_BIN_LOG - most of the code from MYSQL_LOG should be in the MYSQL_BIN_LOG - only (TC_LOG included) - - TODO use mmap instead of IO_CACHE for binlog - (mmap+fsync is two times faster than write+fsync) -*/ - -class MYSQL_LOG: public TC_LOG -{ - private: - /* LOCK_log and LOCK_index are inited by init_pthread_objects() */ - pthread_mutex_t LOCK_log, LOCK_index; - pthread_mutex_t LOCK_prep_xids; - pthread_cond_t COND_prep_xids; - pthread_cond_t update_cond; - ulonglong bytes_written; - time_t last_time,query_start; - IO_CACHE log_file; - IO_CACHE index_file; - char *name; - char time_buff[20],db[NAME_LEN+1]; - char log_file_name[FN_REFLEN],index_file_name[FN_REFLEN]; - /* - The max size before rotation (usable only if log_type == LOG_BIN: binary - logs and relay logs). - For a binlog, max_size should be max_binlog_size. - For a relay log, it should be max_relay_log_size if this is non-zero, - max_binlog_size otherwise. - max_size is set in init(), and dynamically changed (when one does SET - GLOBAL MAX_BINLOG_SIZE|MAX_RELAY_LOG_SIZE) by fix_max_binlog_size and - fix_max_relay_log_size). - */ - ulong max_size; - ulong prepared_xids; /* for tc log - number of xids to remember */ - volatile enum_log_type log_type; - enum cache_type io_cache_type; - // current file sequence number for load data infile binary logging - uint file_id; - uint open_count; // For replication - int readers_count; - bool write_error, inited; - bool need_start_event; - /* - no_auto_events means we don't want any of these automatic events : - Start/Rotate/Stop. That is, in 4.x when we rotate a relay log, we don't - want a Rotate_log event to be written to the relay log. When we start a - relay log etc. So in 4.x this is 1 for relay logs, 0 for binlogs. - In 5.0 it's 0 for relay logs too! - */ - bool no_auto_events; - friend class Log_event; - -public: - /* - These describe the log's format. This is used only for relay logs. - _for_exec is used by the SQL thread, _for_queue by the I/O thread. It's - necessary to have 2 distinct objects, because the I/O thread may be reading - events in a different format from what the SQL thread is reading (consider - the case of a master which has been upgraded from 5.0 to 5.1 without doing - RESET MASTER, or from 4.x to 5.0). - */ - Format_description_log_event *description_event_for_exec, - *description_event_for_queue; - - MYSQL_LOG(); - /* - note that there's no destructor ~MYSQL_LOG() ! - The reason is that we don't want it to be automatically called - on exit() - but only during the correct shutdown process - */ - - int open(const char *opt_name); - void close(); - int log(THD *thd, my_xid xid); - void unlog(ulong cookie, my_xid xid); - int recover(IO_CACHE *log, Format_description_log_event *fdle); - void reset_bytes_written() - { - bytes_written = 0; - } - void harvest_bytes_written(ulonglong* counter) - { -#ifndef DBUG_OFF - char buf1[22],buf2[22]; -#endif - DBUG_ENTER("harvest_bytes_written"); - (*counter)+=bytes_written; - DBUG_PRINT("info",("counter: %s bytes_written: %s", llstr(*counter,buf1), - llstr(bytes_written,buf2))); - bytes_written=0; - DBUG_VOID_RETURN; - } - void set_max_size(ulong max_size_arg); - void signal_update(); - void wait_for_update(THD* thd, bool master_or_slave); - void set_need_start_event() { need_start_event = 1; } - void init(enum_log_type log_type_arg, - enum cache_type io_cache_type_arg, - bool no_auto_events_arg, ulong max_size); - void init_pthread_objects(); - void cleanup(); - bool open(const char *log_name, - enum_log_type log_type, - const char *new_name, - enum cache_type io_cache_type_arg, - bool no_auto_events_arg, ulong max_size, - bool null_created); - const char *generate_name(const char *log_name, const char *suffix, - bool strip_ext, char *buff); - /* simplified open_xxx wrappers for the gigantic open above */ - bool open_query_log(const char *log_name) - { - char buf[FN_REFLEN]; - return open(generate_name(log_name, ".log", 0, buf), - LOG_NORMAL, 0, WRITE_CACHE, 0, 0, 0); - } - bool open_slow_log(const char *log_name) - { - char buf[FN_REFLEN]; - return open(generate_name(log_name, "-slow.log", 0, buf), - LOG_NORMAL, 0, WRITE_CACHE, 0, 0, 0); - } - bool open_index_file(const char *index_file_name_arg, - const char *log_name); - void new_file(bool need_lock); - bool write(THD *thd, enum enum_server_command command, - const char *format,...); - bool write(THD *thd, const char *query, uint query_length, - time_t query_start=0); - bool write(Log_event* event_info); // binary log write - bool write(THD *thd, IO_CACHE *cache, Log_event *commit_event); - - void start_union_events(THD *thd); - void stop_union_events(THD *thd); - bool is_query_in_union(THD *thd, query_id_t query_id_param); - - /* - v stands for vector - invoked as appendv(buf1,len1,buf2,len2,...,bufn,lenn,0) - */ - bool appendv(const char* buf,uint len,...); - bool append(Log_event* ev); - - int generate_new_name(char *new_name,const char *old_name); - void make_log_name(char* buf, const char* log_ident); - bool is_active(const char* log_file_name); - int update_log_index(LOG_INFO* linfo, bool need_update_threads); - void rotate_and_purge(uint flags); - bool flush_and_sync(); - int purge_logs(const char *to_log, bool included, - bool need_mutex, bool need_update_threads, - ulonglong *decrease_log_space); - int purge_logs_before_date(time_t purge_time); - int purge_first_log(struct st_relay_log_info* rli, bool included); - bool reset_logs(THD* thd); - void close(uint exiting); - - // iterating through the log index file - int find_log_pos(LOG_INFO* linfo, const char* log_name, - bool need_mutex); - int find_next_log(LOG_INFO* linfo, bool need_mutex); - int get_current_log(LOG_INFO* linfo); - uint next_file_id(); - inline bool is_open() { return log_type != LOG_CLOSED; } - inline char* get_index_fname() { return index_file_name;} - inline char* get_log_fname() { return log_file_name; } - inline char* get_name() { return name; } - inline pthread_mutex_t* get_log_lock() { return &LOCK_log; } - inline IO_CACHE* get_log_file() { return &log_file; } - - inline void lock_index() { pthread_mutex_lock(&LOCK_index);} - inline void unlock_index() { pthread_mutex_unlock(&LOCK_index);} - inline IO_CACHE *get_index_file() { return &index_file;} - inline uint32 get_open_count() { return open_count; } -}; - - typedef struct st_copy_info { ha_rows records; ha_rows deleted; @@ -408,11 +116,13 @@ public: List<key_part_spec> columns; const char *name; bool generated; + LEX_STRING *parser_name; Key(enum Keytype type_par, const char *name_arg, enum ha_key_alg alg_par, - bool generated_arg, List<key_part_spec> &cols) + bool generated_arg, List<key_part_spec> &cols, + LEX_STRING *parser_arg= 0) :type(type_par), algorithm(alg_par), columns(cols), name(name_arg), - generated(generated_arg) + generated(generated_arg), parser_name(parser_arg) {} ~Key() {} /* Equality comparison of keys (ignoring name) */ @@ -459,27 +169,6 @@ public: #include "sql_lex.h" /* Must be here */ -/* Needed to be able to have an I_List of char* strings in mysqld.cc. */ - -class i_string: public ilink -{ -public: - char* ptr; - i_string():ptr(0) { } - i_string(char* s) : ptr(s) {} -}; - -/* needed for linked list of two strings for replicate-rewrite-db */ -class i_string_pair: public ilink -{ -public: - char* key; - char* val; - i_string_pair():key(0),val(0) { } - i_string_pair(char* key_arg, char* val_arg) : key(key_arg),val(val_arg) {} -}; - - class delayed_insert; class select_result; @@ -523,7 +212,7 @@ struct system_variables ulong read_rnd_buff_size; ulong div_precincrement; ulong sortbuff_size; - ulong table_type; + handlerton *table_type; ulong tmp_table_size; ulong tx_isolation; ulong completion_type; @@ -556,16 +245,16 @@ struct system_variables ulong sync_replication_slave_id; ulong sync_replication_timeout; #endif /* HAVE_REPLICATION */ -#ifdef HAVE_INNOBASE_DB my_bool innodb_table_locks; my_bool innodb_support_xa; -#endif /* HAVE_INNOBASE_DB */ -#ifdef HAVE_NDBCLUSTER_DB ulong ndb_autoincrement_prefetch_sz; my_bool ndb_force_send; my_bool ndb_use_exact_count; my_bool ndb_use_transactions; -#endif /* HAVE_NDBCLUSTER_DB */ + my_bool ndb_index_stat_enable; + ulong ndb_index_stat_cache_entries; + ulong ndb_index_stat_update_freq; + my_bool old_alter_table; my_bool old_passwords; /* Only charset part of these variables is sensible */ @@ -624,6 +313,7 @@ typedef struct system_status_var ulong net_big_packet_count; ulong opened_tables; + ulong opened_shares; ulong select_full_join_count; ulong select_full_range_join_count; ulong select_range_count; @@ -777,8 +467,15 @@ public: /* - if set_query_id=1, we set field->query_id for all fields. In that case field list can not contain duplicates. + 0: Means query_id is not set and no indicator to handler of fields used + is set + 1: Means query_id is set for fields in list and bit in read set is set + to inform handler of that field is to be read + 2: Means query is set for fields in list and bit is set in update set + to inform handler that it needs to update this field in write_row + and update_row */ - bool set_query_id; + ulong set_query_id; LEX_STRING name; /* name for named prepared statements */ LEX *lex; // parse tree descriptor @@ -1091,6 +788,9 @@ class THD :public Statement, public Open_tables_state { public: + /* Used to execute base64 coded binlog events in MySQL server */ + RELAY_LOG_INFO* rli_fake; + /* Constant for THD::where initialization in the beginning of every query. @@ -1195,12 +895,96 @@ public: /* container for handler's private per-connection data */ void *ha_data[MAX_HA]; + +#ifdef HAVE_ROW_BASED_REPLICATION +#ifndef MYSQL_CLIENT + + /* + Public interface to write rows to the binlog + */ + int binlog_write_row(TABLE* table, bool is_transactional, + MY_BITMAP const* cols, my_size_t colcnt, + const byte *buf); + int binlog_delete_row(TABLE* table, bool is_transactional, + MY_BITMAP const* cols, my_size_t colcnt, + const byte *buf); + int binlog_update_row(TABLE* table, bool is_transactional, + MY_BITMAP const* cols, my_size_t colcnt, + const byte *old_data, const byte *new_data); + + void set_server_id(uint32 sid) { server_id = sid; } + + /* + Member functions to handle pending event for row-level logging. + */ + template <class RowsEventT> Rows_log_event* + binlog_prepare_pending_rows_event(TABLE* table, uint32 serv_id, + MY_BITMAP const* cols, + my_size_t colcnt, + my_size_t needed, + bool is_transactional); + Rows_log_event* binlog_get_pending_rows_event() const; + void binlog_set_pending_rows_event(Rows_log_event* ev); + int binlog_setup_trx_data(); + + my_size_t max_row_length_blob(TABLE* table, const byte *data) const; + my_size_t max_row_length(TABLE* table, const byte *data) const + { + TABLE_SHARE *table_s= table->s; + my_size_t length= table_s->reclength + 2 * table_s->fields; + if (table_s->blob_fields == 0) + return length; + + return (length+max_row_length_blob(table,data)); + } + + my_size_t pack_row(TABLE* table, MY_BITMAP const* cols, byte *row_data, + const byte *data) const; + + int binlog_flush_pending_rows_event(bool stmt_end); + void binlog_delete_pending_rows_event(); + +#endif +#endif /* HAVE_ROW_BASED_REPLICATION */ +#ifndef MYSQL_CLIENT + enum enum_binlog_query_type { + /* + The query can be logged row-based or statement-based + */ + ROW_QUERY_TYPE, + + /* + The query has to be logged statement-based + */ + STMT_QUERY_TYPE, + + /* + The query represents a change to a table in the "mysql" + database and is currently mapped to ROW_QUERY_TYPE. + */ + MYSQL_QUERY_TYPE, + QUERY_TYPE_COUNT + }; + + int binlog_query(enum_binlog_query_type qtype, + char const *query, ulong query_len, + bool is_trans, bool suppress_use); +#endif + +public: + struct st_transactions { SAVEPOINT *savepoints; THD_TRANS all; // Trans since BEGIN WORK THD_TRANS stmt; // Trans for current statement bool on; // see ha_enable_transaction() + XID xid; // transaction identifier + enum xa_states xa_state; // used by external XA only XID_STATE xid_state; +#ifdef HAVE_ROW_BASED_REPLICATION + Rows_log_event *m_pending_rows_event; +#endif + /* Tables changed in transaction (that must be invalidated in query cache). List contain only transactional tables, that not invalidated in query @@ -1757,6 +1541,7 @@ class select_create: public select_insert { HA_CREATE_INFO *create_info; MYSQL_LOCK *lock; Field **field; + bool create_table_written; public: select_create (TABLE_LIST *table, HA_CREATE_INFO *create_info_par, @@ -1765,9 +1550,11 @@ public: List<Item> &select_fields,enum_duplicates duplic, bool ignore) :select_insert (NULL, NULL, &select_fields, 0, 0, duplic, ignore), create_table(table), extra_fields(&fields_par),keys(&keys_par), create_info(create_info_par), - lock(0) + lock(0), create_table_written(FALSE) {} int prepare(List<Item> &list, SELECT_LEX_UNIT *u); + + void binlog_show_create_table(); void store_values(List<Item> &values); void send_error(uint errcode,const char *err); bool send_eof(); diff --git a/sql/sql_db.cc b/sql/sql_db.cc index 2500b213f4c..d91f091174f 100644 --- a/sql/sql_db.cc +++ b/sql/sql_db.cc @@ -272,7 +272,7 @@ static bool write_db_opt(THD *thd, const char *path, HA_CREATE_INFO *create) if ((file=my_create(path, CREATE_MODE,O_RDWR | O_TRUNC,MYF(MY_WME))) >= 0) { ulong length; - length= (ulong) (strxnmov(buf, sizeof(buf), "default-character-set=", + length= (ulong) (strxnmov(buf, sizeof(buf)-1, "default-character-set=", create->default_table_charset->csname, "\ndefault-collation=", create->default_table_charset->name, @@ -425,8 +425,7 @@ bool mysql_create_db(THD *thd, char *db, HA_CREATE_INFO *create_info, } /* Check directory */ - strxmov(path, mysql_data_home, "/", db, NullS); - path_len= unpack_dirname(path,path); // Convert if not unix + path_len= build_table_filename(path, sizeof(path), db, "", ""); path[path_len-1]= 0; // Remove last '/' from path if (my_stat(path,&stat_info,MYF(0))) @@ -549,9 +548,12 @@ bool mysql_alter_db(THD *thd, const char *db, HA_CREATE_INFO *create_info) if ((error=wait_if_global_read_lock(thd,0,1))) goto exit2; - /* Check directory */ - strxmov(path, mysql_data_home, "/", db, "/", MY_DB_OPT_FILE, NullS); - fn_format(path, path, "", "", MYF(MY_UNPACK_FILENAME)); + /* + Recreate db options file: /dbpath/.db.opt + We pass MY_DB_OPT_FILE as "extension" to avoid + "table name to file name" encoding. + */ + build_table_filename(path, sizeof(path), db, "", MY_DB_OPT_FILE); if ((error=write_db_opt(thd, path, create_info))) goto exit; @@ -629,8 +631,7 @@ bool mysql_rm_db(THD *thd,char *db,bool if_exists, bool silent) goto exit2; } - (void) sprintf(path,"%s/%s",mysql_data_home,db); - length= unpack_dirname(path,path); // Convert if not unix + length= build_table_filename(path, sizeof(path), db, "", ""); strmov(path+length, MY_DB_OPT_FILE); // Append db option file name del_dbopt(path); // Remove dboption hash entry path[length]= '\0'; // Remove file name @@ -852,7 +853,8 @@ static long mysql_rm_known_files(THD *thd, MY_DIR *dirp, const char *db, found_other_files++; continue; } - extension= fn_ext(file->name); + if (!(extension= strrchr(file->name, '.'))) + extension= strend(file->name); if (find_type(extension, &deletable_extentions,1+2) <= 0) { if (find_type(extension, ha_known_exts(),1+2) <= 0) @@ -870,7 +872,9 @@ static long mysql_rm_known_files(THD *thd, MY_DIR *dirp, const char *db, if (!table_list) goto err; table_list->db= (char*) (table_list+1); - strmov(table_list->table_name= strmov(table_list->db,db)+1, file->name); + table_list->table_name= strmov(table_list->db, db) + 1; + VOID(filename_to_tablename(file->name, table_list->table_name, + strlen(file->name) + 1)); table_list->alias= table_list->table_name; // If lower_case_table_names=2 /* Link into list */ (*tot_list_next)= table_list; @@ -1151,8 +1155,7 @@ bool mysql_change_db(THD *thd, const char *name, bool no_access_check) } } #endif - (void) sprintf(path,"%s/%s",mysql_data_home,dbname); - length=unpack_dirname(path,path); // Convert if not unix + length= build_table_filename(path, sizeof(path), dbname, "", ""); if (length && path[length-1] == FN_LIBCHAR) path[length-1]=0; // remove ending '\' if (my_access(path,F_OK)) diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc index d8a8f28b92b..764aa435cae 100644 --- a/sql/sql_delete.cc +++ b/sql/sql_delete.cc @@ -30,7 +30,8 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, SQL_LIST *order, ha_rows limit, ulonglong options, bool reset_auto_increment) { - int error; + bool will_batch; + int error, loc_error; TABLE *table; SQL_SELECT *select=0; READ_RECORD info; @@ -39,6 +40,7 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, ha_rows deleted; uint usable_index= MAX_KEY; SELECT_LEX *select_lex= &thd->lex->select_lex; + bool ha_delete_row_bypassed= 0; DBUG_ENTER("mysql_delete"); if (open_and_lock_tables(thd, table_list)) @@ -76,15 +78,18 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, !(specialflag & (SPECIAL_NO_NEW_FUNC | SPECIAL_SAFE_MODE)) && !(table->triggers && table->triggers->has_delete_triggers())) { - deleted= table->file->records; + ha_rows const maybe_deleted= table->file->records; if (!(error=table->file->delete_all_rows())) { error= -1; // ok + deleted= maybe_deleted; + ha_delete_row_bypassed= 1; goto cleanup; } if (error != HA_ERR_WRONG_COMMAND) { table->file->print_error(error,MYF(0)); + ha_delete_row_bypassed= 1; error=0; goto cleanup; } @@ -194,6 +199,7 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, deleted=0L; init_ftfuncs(thd, select_lex, 1); thd->proc_info="updating"; + will_batch= !table->file->start_bulk_delete(); while (!(error=info.read_record(&info)) && !thd->killed && !thd->net.report_error) { @@ -209,7 +215,7 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, break; } - if (!(error=table->file->delete_row(table->record[0]))) + if (!(error= table->file->ha_delete_row(table->record[0]))) { deleted++; if (table->triggers && @@ -245,7 +251,13 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, } if (thd->killed && !error) error= 1; // Aborted - thd->proc_info="end"; + if (will_batch && (loc_error= table->file->end_bulk_delete())) + { + if (error != 1) + table->file->print_error(loc_error,MYF(0)); + error=1; + } + thd->proc_info= "end"; end_read_record(&info); free_io_cache(table); // Will not do any harm if (options & OPTION_QUICK) @@ -285,10 +297,24 @@ cleanup: { if (error < 0) thd->clear_error(); - Query_log_event qinfo(thd, thd->query, thd->query_length, - transactional_table, FALSE); - if (mysql_bin_log.write(&qinfo) && transactional_table) + + /* + If 'handler::delete_all_rows()' was called, we replicate + statement-based; otherwise, 'ha_delete_row()' was used to + delete specific rows which we might log row-based. + */ + THD::enum_binlog_query_type const + query_type(ha_delete_row_bypassed ? + THD::STMT_QUERY_TYPE : + THD::ROW_QUERY_TYPE); + int log_result= thd->binlog_query(query_type, + thd->query, thd->query_length, + transactional_table, FALSE); + + if (log_result && transactional_table) + { error=1; + } } if (!transactional_table) thd->options|=OPTION_STATUS_NO_TRANS_UPDATE; @@ -584,7 +610,7 @@ bool multi_delete::send_data(List<Item> &values) TRG_ACTION_BEFORE, FALSE)) DBUG_RETURN(1); table->status|= STATUS_DELETED; - if (!(error=table->file->delete_row(table->record[0]))) + if (!(error=table->file->ha_delete_row(table->record[0]))) { deleted++; if (table->triggers && @@ -658,7 +684,8 @@ void multi_delete::send_error(uint errcode,const char *err) int multi_delete::do_deletes() { - int local_error= 0, counter= 0; + int local_error= 0, counter= 0, error; + bool will_batch; DBUG_ENTER("do_deletes"); DBUG_ASSERT(do_delete); @@ -686,6 +713,7 @@ int multi_delete::do_deletes() been deleted by foreign key handling */ info.ignore_not_found_rows= 1; + will_batch= !table->file->start_bulk_delete(); while (!(local_error=info.read_record(&info)) && !thd->killed) { if (table->triggers && @@ -695,7 +723,7 @@ int multi_delete::do_deletes() local_error= 1; break; } - if ((local_error=table->file->delete_row(table->record[0]))) + if ((local_error=table->file->ha_delete_row(table->record[0]))) { table->file->print_error(local_error,MYF(0)); break; @@ -709,6 +737,14 @@ int multi_delete::do_deletes() break; } } + if (will_batch && (error= table->file->end_bulk_delete())) + { + if (!local_error) + { + local_error= error; + table->file->print_error(local_error,MYF(0)); + } + } end_read_record(&info); if (thd->killed && !local_error) local_error= 1; @@ -754,10 +790,13 @@ bool multi_delete::send_eof() { if (local_error == 0) thd->clear_error(); - Query_log_event qinfo(thd, thd->query, thd->query_length, - transactional_tables, FALSE); - if (mysql_bin_log.write(&qinfo) && !normal_tables) + if (thd->binlog_query(THD::ROW_QUERY_TYPE, + thd->query, thd->query_length, + transactional_tables, FALSE) && + !normal_tables) + { local_error=1; // Log write failed: roll back the SQL statement + } } if (!transactional_tables) thd->options|=OPTION_STATUS_NO_TRANS_UPDATE; @@ -796,29 +835,32 @@ bool mysql_truncate(THD *thd, TABLE_LIST *table_list, bool dont_send_ok) { HA_CREATE_INFO create_info; char path[FN_REFLEN]; - TABLE **table_ptr; + TABLE *table; bool error; + uint path_length; DBUG_ENTER("mysql_truncate"); bzero((char*) &create_info,sizeof(create_info)); /* If it is a temporary table, close and regenerate it */ - if (!dont_send_ok && (table_ptr=find_temporary_table(thd,table_list->db, - table_list->table_name))) + if (!dont_send_ok && (table= find_temporary_table(thd, table_list))) { - TABLE *table= *table_ptr; - table->file->info(HA_STATUS_AUTO | HA_STATUS_NO_LOCK); - db_type table_type= table->s->db_type; + handlerton *table_type= table->s->db_type; + TABLE_SHARE *share= table->s; if (!ha_check_storage_engine_flag(table_type, HTON_CAN_RECREATE)) goto trunc_by_del; - strmov(path, table->s->path); - *table_ptr= table->next; // Unlink table from list - close_temporary(table,0); - *fn_ext(path)=0; // Remove the .frm extension - ha_create_table(path, &create_info,1); + + table->file->info(HA_STATUS_AUTO | HA_STATUS_NO_LOCK); + + close_temporary_table(thd, table, 0, 0); // Don't free share + ha_create_table(thd, share->normalized_path.str, + share->db.str, share->table_name.str, &create_info, 1); // We don't need to call invalidate() because this table is not in cache - if ((error= (int) !(open_temporary_table(thd, path, table_list->db, - table_list->table_name, 1)))) + if ((error= (int) !(open_temporary_table(thd, share->path.str, + share->db.str, + share->table_name.str, 1)))) (void) rm_temporary_table(table_type, path); + free_table_share(share); + my_free((char*) table,MYF(0)); /* If we return here we will not have logged the truncation to the bin log and we will not send_ok() to the client. @@ -826,13 +868,12 @@ bool mysql_truncate(THD *thd, TABLE_LIST *table_list, bool dont_send_ok) goto end; } - (void) sprintf(path,"%s/%s/%s%s",mysql_data_home,table_list->db, - table_list->table_name,reg_ext); - fn_format(path, path, "", "", MY_UNPACK_FILENAME); + path_length= build_table_filename(path, sizeof(path), table_list->db, + table_list->table_name, reg_ext); if (!dont_send_ok) { - db_type table_type; + enum legacy_db_type table_type; mysql_frm_type(thd, path, &table_type); if (table_type == DB_TYPE_UNKNOWN) { @@ -840,15 +881,20 @@ bool mysql_truncate(THD *thd, TABLE_LIST *table_list, bool dont_send_ok) table_list->db, table_list->table_name); DBUG_RETURN(TRUE); } - if (!ha_check_storage_engine_flag(table_type, HTON_CAN_RECREATE) + if (!ha_check_storage_engine_flag(ha_resolve_by_legacy_type(thd, table_type), + HTON_CAN_RECREATE) || thd->lex->sphead) goto trunc_by_del; if (lock_and_wait_for_table_name(thd, table_list)) DBUG_RETURN(TRUE); } - *fn_ext(path)=0; // Remove the .frm extension - error= ha_create_table(path,&create_info,1); + // Remove the .frm extension + // AIX 5.2 64-bit compiler bug (BUG#16155): this crashes, replacement works. + // *(path + path_length - reg_ext_length)= '\0'; + path[path_length - reg_ext_length] = 0; + error= ha_create_table(thd, path, table_list->db, table_list->table_name, + &create_info, 1); query_cache_invalidate3(thd, table_list, 0); end: @@ -858,10 +904,13 @@ end: { if (mysql_bin_log.is_open()) { + /* + TRUNCATE must always be statement-based binlogged (not row-based) so + we don't test binlog_row_based. + */ thd->clear_error(); - Query_log_event qinfo(thd, thd->query, thd->query_length, - 0, FALSE); - mysql_bin_log.write(&qinfo); + thd->binlog_query(THD::STMT_QUERY_TYPE, + thd->query, thd->query_length, FALSE, FALSE); } send_ok(thd); // This should return record count } @@ -877,7 +926,7 @@ end: } DBUG_RETURN(error); - trunc_by_del: +trunc_by_del: /* Probably InnoDB table */ ulong save_options= thd->options; table_list->lock_type= TL_WRITE; diff --git a/sql/sql_derived.cc b/sql/sql_derived.cc index e1817985cbd..5a9871c07c5 100644 --- a/sql/sql_derived.cc +++ b/sql/sql_derived.cc @@ -179,8 +179,8 @@ exit: } orig_table_list->derived_result= derived_result; orig_table_list->table= table; - orig_table_list->table_name= (char*) table->s->table_name; - orig_table_list->table_name_length= strlen((char*)table->s->table_name); + orig_table_list->table_name= table->s->table_name.str; + orig_table_list->table_name_length= table->s->table_name.length; table->derived_select_number= first_select->select_number; table->s->tmp_table= TMP_TABLE; #ifndef NO_EMBEDDED_ACCESS_CHECKS diff --git a/sql/sql_handler.cc b/sql/sql_handler.cc index fae48c7d164..f1bbf6f0f5e 100644 --- a/sql/sql_handler.cc +++ b/sql/sql_handler.cc @@ -403,7 +403,8 @@ bool mysql_ha_read(THD *thd, TABLE_LIST *tables, #if MYSQL_VERSION_ID < 40100 char buff[MAX_DBKEY_LENGTH]; if (*tables->db) - strxnmov(buff, sizeof(buff), tables->db, ".", tables->table_name, NullS); + strxnmov(buff, sizeof(buff)-1, tables->db, ".", tables->table_name, + NullS); else strncpy(buff, tables->alias, sizeof(buff)); my_error(ER_UNKNOWN_TABLE, MYF(0), buff, "HANDLER"); @@ -464,7 +465,7 @@ bool mysql_ha_read(THD *thd, TABLE_LIST *tables, if (keyname) { table->file->ha_index_or_rnd_end(); - table->file->ha_index_init(keyno); + table->file->ha_index_init(keyno, 1); error= table->file->index_first(table->record[0]); } else @@ -486,7 +487,7 @@ bool mysql_ha_read(THD *thd, TABLE_LIST *tables, case RLAST: DBUG_ASSERT(keyname != 0); table->file->ha_index_or_rnd_end(); - table->file->ha_index_init(keyno); + table->file->ha_index_init(keyno, 1); error= table->file->index_last(table->record[0]); mode=RPREV; break; @@ -525,7 +526,7 @@ bool mysql_ha_read(THD *thd, TABLE_LIST *tables, if (!(key= (byte*) thd->calloc(ALIGN_SIZE(key_len)))) goto err; table->file->ha_index_or_rnd_end(); - table->file->ha_index_init(keyno); + table->file->ha_index_init(keyno, 1); key_copy(key, table->record[0], table->key_info + keyno, key_len); error= table->file->index_read(table->record[0], key,key_len,ha_rkey_mode); @@ -635,14 +636,15 @@ int mysql_ha_flush(THD *thd, TABLE_LIST *tables, uint mode_flags, while (*table_ptr) { if ((!*tmp_tables->db || - !my_strcasecmp(&my_charset_latin1, (*table_ptr)->s->db, + !my_strcasecmp(&my_charset_latin1, (*table_ptr)->s->db.str, tmp_tables->db)) && - ! my_strcasecmp(&my_charset_latin1, (*table_ptr)->s->table_name, + ! my_strcasecmp(&my_charset_latin1, + (*table_ptr)->s->table_name.str, tmp_tables->table_name)) { DBUG_PRINT("info",("*table_ptr '%s'.'%s' as '%s'", - (*table_ptr)->s->db, - (*table_ptr)->s->table_name, + (*table_ptr)->s->db.str, + (*table_ptr)->s->table_name.str, (*table_ptr)->alias)); /* The first time it is required, lock for close_thread_table(). */ if (! did_lock && ! is_locked) @@ -712,7 +714,7 @@ static int mysql_ha_flush_table(THD *thd, TABLE **table_ptr, uint mode_flags) TABLE *table= *table_ptr; DBUG_ENTER("mysql_ha_flush_table"); DBUG_PRINT("enter",("'%s'.'%s' as '%s' flags: 0x%02x", - table->s->db, table->s->table_name, + table->s->db.str, table->s->table_name.str, table->alias, mode_flags)); if ((hash_tables= (TABLE_LIST*) hash_search(&thd->handler_tables_hash, diff --git a/sql/sql_help.cc b/sql/sql_help.cc index b47412981ea..0715aeeaa5b 100644 --- a/sql/sql_help.cc +++ b/sql/sql_help.cc @@ -286,8 +286,8 @@ int get_topics_for_keyword(THD *thd, TABLE *topics, TABLE *relations, rtopic_id= find_fields[help_relation_help_topic_id].field; rkey_id= find_fields[help_relation_help_keyword_id].field; - topics->file->ha_index_init(iindex_topic); - relations->file->ha_index_init(iindex_relations); + topics->file->ha_index_init(iindex_topic,1); + relations->file->ha_index_init(iindex_relations,1); rkey_id->store((longlong) key_id, TRUE); rkey_id->get_key_image(buff, rkey_id->pack_length(), Field::itRAW); diff --git a/sql/sql_insert.cc b/sql/sql_insert.cc index 736be2310cb..29df467b916 100644 --- a/sql/sql_insert.cc +++ b/sql/sql_insert.cc @@ -21,6 +21,7 @@ #include "sp_head.h" #include "sql_trigger.h" #include "sql_select.h" +#include "sql_show.h" static int check_null_fields(THD *thd,TABLE *entry); #ifndef EMBEDDED_LIBRARY @@ -96,13 +97,18 @@ static int check_insert_fields(THD *thd, TABLE_LIST *table_list, Field_iterator_table fields; fields.set_table(table); if (check_grant_all_columns(thd, INSERT_ACL, &table->grant, - table->s->db, table->s->table_name, + table->s->db.str, table->s->table_name.str, &fields)) return -1; } #endif clear_timestamp_auto_bits(table->timestamp_field_type, TIMESTAMP_AUTO_SET_ON_INSERT); + /* + No fields are provided so all fields must be provided in the values. + Thus we set all bits in the write set. + */ + table->file->ha_set_all_bits_in_write_set(); } else { // Part field list @@ -129,7 +135,11 @@ static int check_insert_fields(THD *thd, TABLE_LIST *table_list, */ table_list->next_local= 0; context->resolve_in_table_list_only(table_list); - res= setup_fields(thd, 0, fields, 1, 0, 0); + /* + Indicate fields in list is to be updated by setting set_query_id + parameter to 2. This sets the bit in the write_set for each field. + */ + res= setup_fields(thd, 0, fields, 2, 0, 0); /* Restore the current context. */ ctx_state.restore_state(context, table_list); @@ -222,9 +232,10 @@ static int check_update_fields(THD *thd, TABLE_LIST *insert_table_list, /* Check the fields we are going to modify. This will set the query_id - of all used fields to the threads query_id. + of all used fields to the threads query_id. It will also set all + fields into the write set of this table. */ - if (setup_fields(thd, 0, update_fields, 1, 0, 0)) + if (setup_fields(thd, 0, update_fields, 2, 0, 0)) return -1; if (table->timestamp_field) @@ -234,7 +245,10 @@ static int check_update_fields(THD *thd, TABLE_LIST *insert_table_list, clear_timestamp_auto_bits(table->timestamp_field_type, TIMESTAMP_AUTO_SET_ON_UPDATE); else + { table->timestamp_field->query_id= timestamp_query_id; + table->file->ha_set_bit_in_write_set(table->timestamp_field->fieldnr); + } } return 0; @@ -563,10 +577,13 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list, { if (error <= 0) thd->clear_error(); - Query_log_event qinfo(thd, thd->query, thd->query_length, - transactional_table, FALSE); - if (mysql_bin_log.write(&qinfo) && transactional_table) - error=1; + if (thd->binlog_query(THD::ROW_QUERY_TYPE, + thd->query, thd->query_length, + transactional_table, FALSE) && + transactional_table) + { + error=1; + } } if (!transactional_table) thd->options|=OPTION_STATUS_NO_TRANS_UPDATE; @@ -659,8 +676,8 @@ static bool check_view_insertability(THD * thd, TABLE_LIST *view) *trans_end= trans_start + num; Field_translator *trans; Field **field_ptr= table->field; - uint used_fields_buff_size= (table->s->fields + 7) / 8; - uchar *used_fields_buff= (uchar*)thd->alloc(used_fields_buff_size); + uint used_fields_buff_size= bitmap_buffer_size(table->s->fields); + uint32 *used_fields_buff= (uint32*)thd->alloc(used_fields_buff_size); MY_BITMAP used_fields; DBUG_ENTER("check_key_in_view"); @@ -669,7 +686,7 @@ static bool check_view_insertability(THD * thd, TABLE_LIST *view) DBUG_ASSERT(view->table != 0 && view->field_translation != 0); - bitmap_init(&used_fields, used_fields_buff, used_fields_buff_size * 8, 0); + bitmap_init(&used_fields, used_fields_buff, table->s->fields, 0); bitmap_clear_all(&used_fields); view->contain_auto_increment= 0; @@ -882,7 +899,7 @@ bool mysql_prepare_insert(THD *thd, TABLE_LIST *table_list, select_lex->first_execution= 0; } if (duplic == DUP_UPDATE || duplic == DUP_REPLACE) - table->file->extra(HA_EXTRA_RETRIEVE_PRIMARY_KEY); + table->file->ha_retrieve_all_pk(); DBUG_RETURN(FALSE); } @@ -932,10 +949,11 @@ int write_record(THD *thd, TABLE *table,COPY_INFO *info) DBUG_ENTER("write_record"); info->records++; + if (info->handle_duplicates == DUP_REPLACE || info->handle_duplicates == DUP_UPDATE) { - while ((error=table->file->write_row(table->record[0]))) + while ((error=table->file->ha_write_row(table->record[0]))) { uint key_nr; if (error != HA_WRITE_SKIP) @@ -1019,7 +1037,7 @@ int write_record(THD *thd, TABLE *table,COPY_INFO *info) thd->clear_next_insert_id= 0; thd->next_insert_id= 0; } - if ((error=table->file->update_row(table->record[1],table->record[0]))) + if ((error=table->file->ha_update_row(table->record[1],table->record[0]))) { if ((error == HA_ERR_FOUND_DUPP_KEY) && info->ignore) goto ok_or_after_trg_err; @@ -1058,8 +1076,8 @@ int write_record(THD *thd, TABLE *table,COPY_INFO *info) thd->clear_next_insert_id= 0; thd->next_insert_id= 0; } - if ((error=table->file->update_row(table->record[1], - table->record[0]))) + if ((error=table->file->ha_update_row(table->record[1], + table->record[0]))) goto err; info->deleted++; trg_error= (table->triggers && @@ -1076,7 +1094,7 @@ int write_record(THD *thd, TABLE *table,COPY_INFO *info) table->triggers->process_triggers(thd, TRG_EVENT_DELETE, TRG_ACTION_BEFORE, TRUE)) goto before_trg_err; - if ((error=table->file->delete_row(table->record[1]))) + if ((error=table->file->ha_delete_row(table->record[1]))) goto err; info->deleted++; if (!table->file->has_transactions()) @@ -1097,7 +1115,7 @@ int write_record(THD *thd, TABLE *table,COPY_INFO *info) table->triggers->process_triggers(thd, TRG_EVENT_INSERT, TRG_ACTION_AFTER, TRUE)); } - else if ((error=table->file->write_row(table->record[0]))) + else if ((error=table->file->ha_write_row(table->record[0]))) { if (!info->ignore || (error != HA_ERR_FOUND_DUPP_KEY && error != HA_ERR_FOUND_DUPP_UNIQUE)) @@ -1183,16 +1201,15 @@ int check_that_all_fields_are_given_values(THD *thd, TABLE *entry, class delayed_row :public ilink { public: - char *record,*query; + char *record; enum_duplicates dup; time_t start_time; bool query_start_used,last_insert_id_used,insert_id_used, ignore, log_query; ulonglong last_insert_id; timestamp_auto_set_type timestamp_field_type; - uint query_length; delayed_row(enum_duplicates dup_arg, bool ignore_arg, bool log_query_arg) - :record(0), query(0), dup(dup_arg), ignore(ignore_arg), log_query(log_query_arg) {} + :record(0), dup(dup_arg), ignore(ignore_arg), log_query(log_query_arg) {} ~delayed_row() { x_free(record); @@ -1202,6 +1219,9 @@ public: class delayed_insert :public ilink { uint locks_in_memory; + char *query; + ulong query_length; + ulong query_allocated; public: THD thd; TABLE *table; @@ -1215,7 +1235,7 @@ public: TABLE_LIST table_list; // Argument delayed_insert() - :locks_in_memory(0), + :locks_in_memory(0), query(0), query_length(0), query_allocated(0), table(0),tables_in_use(0),stacked_inserts(0), status(0), dead(0), group_count(0) { @@ -1241,6 +1261,7 @@ public: } ~delayed_insert() { + my_free(query, MYF(MY_WME|MY_ALLOW_ZERO_PTR)); /* The following is not really needed, but just for safety */ delayed_row *row; while ((row=rows.get())) @@ -1260,6 +1281,25 @@ public: VOID(pthread_cond_broadcast(&COND_thread_count)); /* Tell main we are ready */ } + int set_query(char const *q, ulong qlen) { + if (q && qlen > 0) + { + if (query_allocated < qlen + 1) + { + ulong const flags(MY_WME|MY_FREE_ON_ERROR|MY_ALLOW_ZERO_PTR); + query= my_realloc(query, qlen + 1, MYF(flags)); + if (query == 0) + return HA_ERR_OUT_OF_MEM; + query_allocated= qlen; + } + query_length= qlen; + memcpy(query, q, qlen + 1); + } + else + query_length= 0; + return 0; + } + /* The following is for checking when we can delete ourselves */ inline void lock() { @@ -1298,8 +1338,8 @@ delayed_insert *find_handler(THD *thd, TABLE_LIST *table_list) delayed_insert *tmp; while ((tmp=it++)) { - if (!strcmp(tmp->thd.db,table_list->db) && - !strcmp(table_list->table_name,tmp->table->s->table_name)) + if (!strcmp(tmp->thd.db, table_list->db) && + !strcmp(table_list->table_name, tmp->table->s->table_name.str)) { tmp->lock(); break; @@ -1452,6 +1492,7 @@ TABLE *delayed_insert::get_local_table(THD* client_thd) my_ptrdiff_t adjust_ptrs; Field **field,**org_field, *found_next_number_field; TABLE *copy; + TABLE_SHARE *share= table->s; /* First request insert thread to get a lock */ status=1; @@ -1477,19 +1518,16 @@ TABLE *delayed_insert::get_local_table(THD* client_thd) client_thd->proc_info="allocating local table"; copy= (TABLE*) client_thd->alloc(sizeof(*copy)+ - (table->s->fields+1)*sizeof(Field**)+ - table->s->reclength); + (share->fields+1)*sizeof(Field**)+ + share->reclength); if (!copy) goto error; *copy= *table; - copy->s= ©->share_not_to_be_used; - // No name hashing - bzero((char*) ©->s->name_hash,sizeof(copy->s->name_hash)); - /* We don't need to change the file handler here */ + /* We don't need to change the file handler here */ field=copy->field=(Field**) (copy+1); - copy->record[0]=(byte*) (field+table->s->fields+1); - memcpy((char*) copy->record[0],(char*) table->record[0],table->s->reclength); + copy->record[0]=(byte*) (field+share->fields+1); + memcpy((char*) copy->record[0],(char*) table->record[0],share->reclength); /* Make a copy of all fields */ @@ -1501,7 +1539,7 @@ TABLE *delayed_insert::get_local_table(THD* client_thd) if (!(*field= (*org_field)->new_field(client_thd->mem_root,copy))) return 0; (*field)->orig_table= copy; // Remove connection - (*field)->move_field(adjust_ptrs); // Point at copy->record[0] + (*field)->move_field_offset(adjust_ptrs); // Point at copy->record[0] if (*org_field == found_next_number_field) (*field)->table->found_next_number_field= *field; } @@ -1512,13 +1550,11 @@ TABLE *delayed_insert::get_local_table(THD* client_thd) { /* Restore offset as this may have been reset in handle_inserts */ copy->timestamp_field= - (Field_timestamp*) copy->field[table->s->timestamp_field_offset]; + (Field_timestamp*) copy->field[share->timestamp_field_offset]; copy->timestamp_field->unireg_check= table->timestamp_field->unireg_check; copy->timestamp_field_type= copy->timestamp_field->get_auto_set_type(); } - /* _rowid is not used with delayed insert */ - copy->rowid_field=0; /* Adjust in_use for pointing to client thread */ copy->in_use= client_thd; @@ -1536,8 +1572,9 @@ TABLE *delayed_insert::get_local_table(THD* client_thd) /* Put a question in queue */ -static int write_delayed(THD *thd,TABLE *table,enum_duplicates duplic, bool ignore, - char *query, uint query_length, bool log_on) +static int write_delayed(THD *thd,TABLE *table,enum_duplicates duplic, + bool ignore, char *query, uint query_length, + bool log_on) { delayed_row *row=0; delayed_insert *di=thd->di; @@ -1552,18 +1589,22 @@ static int write_delayed(THD *thd,TABLE *table,enum_duplicates duplic, bool igno if (thd->killed || !(row= new delayed_row(duplic, ignore, log_on))) goto err; +#if 0 if (!query) query_length=0; - if (!(row->record= (char*) my_malloc(table->s->reclength+query_length+1, - MYF(MY_WME)))) +#endif + if (!(row->record= (char*) my_malloc(table->s->reclength, MYF(MY_WME)))) goto err; memcpy(row->record, table->record[0], table->s->reclength); + di->set_query(query, query_length); +#if 0 if (query_length) { row->query= row->record+table->s->reclength; memcpy(row->query,query,query_length+1); } row->query_length= query_length; +#endif row->start_time= thd->start_time; row->query_start_used= thd->query_start_used; row->last_insert_id_used= thd->last_insert_id_used; @@ -1887,7 +1928,21 @@ bool delayed_insert::handle_inserts(void) { int error; ulong max_rows; - bool using_ignore=0, using_bin_log=mysql_bin_log.is_open(); + bool using_ignore=0, + using_bin_log= mysql_bin_log.is_open(); + +#if 0 + /* + The actual text for the query is added to the first row in the + list. Since the row is destroyed, with all it's memory, we need + to take a copy of it to be able to log it after all rows have been + applied. + */ + uint const query_length= rows.head()->query_length; + char *const query= static_cast<char*>(my_alloca(query_length+1)); + memcpy(query, rows.head()->query, query_length); +#endif + delayed_row *row; DBUG_ENTER("handle_inserts"); @@ -1900,7 +1955,7 @@ bool delayed_insert::handle_inserts(void) if (thr_upgrade_write_delay_lock(*thd.lock->locks)) { /* This can only happen if thread is killed by shutdown */ - sql_print_error(ER(ER_DELAYED_CANT_CHANGE_LOCK),table->s->table_name); + sql_print_error(ER(ER_DELAYED_CANT_CHANGE_LOCK),table->s->table_name.str); goto err; } @@ -1953,11 +2008,6 @@ bool delayed_insert::handle_inserts(void) using_ignore=0; table->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY); } - if (row->query && row->log_query && using_bin_log) - { - Query_log_event qinfo(&thd, row->query, row->query_length, 0, FALSE); - mysql_bin_log.write(&qinfo); - } if (table->s->blob_fields) free_delayed_insert_blobs(table); thread_safe_sub(delayed_rows_in_use,1,&LOCK_delayed_status); @@ -1972,8 +2022,7 @@ bool delayed_insert::handle_inserts(void) on this table until all entries has been processed */ if (group_count++ >= max_rows && (row= rows.head()) && - (!(row->log_query & using_bin_log) || - row->query)) + (!(row->log_query & using_bin_log))) { group_count=0; if (stacked_inserts || tables_in_use) // Let these wait a while @@ -1993,7 +2042,8 @@ bool delayed_insert::handle_inserts(void) if (thr_reschedule_write_lock(*thd.lock->locks)) { /* This should never happen */ - sql_print_error(ER(ER_DELAYED_CANT_CHANGE_LOCK),table->s->table_name); + sql_print_error(ER(ER_DELAYED_CANT_CHANGE_LOCK), + table->s->table_name.str); } if (!using_bin_log) table->file->extra(HA_EXTRA_WRITE_CACHE); @@ -2008,6 +2058,10 @@ bool delayed_insert::handle_inserts(void) thd.proc_info=0; table->next_number_field=0; pthread_mutex_unlock(&mutex); + + /* After releasing the mutex, to prevent deadlocks. */ + thd.binlog_query(THD::ROW_QUERY_TYPE, query, query_length, FALSE, FALSE); + if ((error=table->file->extra(HA_EXTRA_NO_CACHE))) { // This shouldn't happen table->file->print_error(error,MYF(0)); @@ -2205,6 +2259,16 @@ select_insert::prepare(List<Item> &values, SELECT_LEX_UNIT *u) check_that_all_fields_are_given_values(thd, table, table_list)) || table_list->prepare_where(thd, 0, TRUE) || table_list->prepare_check_option(thd)); + + /* + For non-transactional non-temporary tables, we set the + OPTION_STATUS_NO_TRANS_UPDATE flag here. The send_eof() function + is used by both the select_insert and the select_create classes, + so setting it there would clash. + */ + if (!(table->file->has_transactions() || table->s->tmp_table)) + thd->options|=OPTION_STATUS_NO_TRANS_UPDATE; + DBUG_RETURN(res); } @@ -2246,7 +2310,7 @@ select_insert::~select_insert() if (table) { table->next_number_field=0; - table->file->reset(); + table->file->ha_reset(); } thd->count_cuted_fields= CHECK_FIELD_IGNORE; thd->abort_on_warning= 0; @@ -2334,9 +2398,31 @@ void select_insert::send_error(uint errcode,const char *err) table->file->end_bulk_insert(); /* If at least one row has been inserted/modified and will stay in the table - (the table doesn't have transactions) (example: we got a duplicate key - error while inserting into a MyISAM table) we must write to the binlog (and + (the table doesn't have transactions) we must write to the binlog (and the error code will make the slave stop). + + For many errors (example: we got a duplicate key error while + inserting into a MyISAM table), no row will be added to the table, + so passing the error to the slave will not help since there will + be an error code mismatch (the inserts will succeed on the slave + with no error). + + If we are using row-based replication we have two cases where this + code is executed: replication of CREATE-SELECT and replication of + INSERT-SELECT. + + When replicating a CREATE-SELECT statement, we shall not write the + events to the binary log. To prevent the ha_rollback_stmt() below + from writing to the binary log, we have to pretend that the table + is transactional, even if it actually is not. Therefore, the + OPTION_STATUS_NO_TRANS_UPDATE is cleared in + select_create::prepare() and will remain cleared here. + + When replicating INSERT-SELECT, we shall not write the events to + the binary log for transactional table, but shall write all events + if there is one or more writes to non-transactional tables. In + this case, the OPTION_STATUS_NO_TRANS_UPDATE is set if there is a + write to a non-transactional table, otherwise it is cleared. */ if ((info.copied || info.deleted || info.updated) && !table->file->has_transactions()) @@ -2345,11 +2431,10 @@ void select_insert::send_error(uint errcode,const char *err) thd->insert_id(last_insert_id); // For binary log if (mysql_bin_log.is_open()) { - Query_log_event qinfo(thd, thd->query, thd->query_length, - table->file->has_transactions(), FALSE); - mysql_bin_log.write(&qinfo); + thd->binlog_query(THD::ROW_QUERY_TYPE, thd->query, thd->query_length, + table->file->has_transactions(), FALSE); } - if (!table->s->tmp_table) + if (!binlog_row_based && !table->s->tmp_table) thd->options|=OPTION_STATUS_NO_TRANS_UPDATE; } if (info.copied || info.deleted || info.updated) @@ -2371,26 +2456,36 @@ bool select_insert::send_eof() /* We must invalidate the table in the query cache before binlog writing - and ha_autocommit_or_rollback - */ + and ha_autocommit_or_rollback. + If nothing was inserted in the table, there is no need to emit a + ROLLBACK statement to the binary log, so in that case we clear + OPTION_STATUS_NO_TRANS_UPDATE. + + Observe that select_insert::send_eof() is used by both + select_insert and select_create and that they set the flag in + different manners. See Note 1 below for more info. + */ if (info.copied || info.deleted || info.updated) - { query_cache_invalidate3(thd, table, 1); - if (!(table->file->has_transactions() || table->s->tmp_table)) - thd->options|=OPTION_STATUS_NO_TRANS_UPDATE; - } + else + thd->options&= ~OPTION_STATUS_NO_TRANS_UPDATE; if (last_insert_id) thd->insert_id(last_insert_id); // For binary log - /* Write to binlog before commiting transaction */ + /* + Write to binlog before commiting transaction. No statement will + be written by the binlog_query() below in RBR mode. All the + events are in the transaction cache and will be written when + ha_autocommit_or_rollback() is issued below. + */ if (mysql_bin_log.is_open()) { if (!error) thd->clear_error(); - Query_log_event qinfo(thd, thd->query, thd->query_length, - table->file->has_transactions(), FALSE); - mysql_bin_log.write(&qinfo); + thd->binlog_query(THD::ROW_QUERY_TYPE, + thd->query, thd->query_length, + table->file->has_transactions(), FALSE); } if ((error2=ha_autocommit_or_rollback(thd,error)) && ! error) error=error2; @@ -2460,8 +2555,62 @@ select_create::prepare(List<Item> &values, SELECT_LEX_UNIT *u) } +void +select_create::binlog_show_create_table() +{ + /* + Note 1: In RBR mode, we generate a CREATE TABLE statement for the + created table by calling store_create_info() (behaves as SHOW + CREATE TABLE). In the event of an error, nothing should be + written to the binary log, even if the table is non-transactional; + therefore we pretend that the generated CREATE TABLE statement is + for a transactional table. The event will then be put in the + transaction cache, and any subsequent events (e.g., table-map + events and binrow events) will also be put there. We can then use + ha_autocommit_or_rollback() to either throw away the entire + kaboodle of events, or write them to the binary log. + + We write the CREATE TABLE statement here and not in prepare() + since there potentially are sub-selects or accesses to information + schema that will do a close_thread_tables(), destroying the + statement transaction cache. + + To ensure that the event kaboodle is not written to the binary log + on rollback, we clear the OPTION_STATUS_NO_TRANS_UPDATE bit of + thd->options. + */ + DBUG_ASSERT(binlog_row_based && !create_table_written); + + thd->options&= ~OPTION_STATUS_NO_TRANS_UPDATE; + char buf[2048]; + String query(buf, sizeof(buf), system_charset_info); + query.length(0); // Have to zero it since constructor doesn't + + TABLE_LIST tables; + memset(&tables, 0, sizeof(tables)); + tables.table = table; + + int result= store_create_info(thd, &tables, &query, create_info); + DBUG_ASSERT(result == 0); /* store_create_info() always return 0 */ + thd->binlog_query(THD::STMT_QUERY_TYPE, + query.ptr(), query.length(), + /* is_trans */ TRUE, + /* suppress_use */ FALSE); +} + + void select_create::store_values(List<Item> &values) { + /* + Before writing the first row, we write the CREATE TABLE statement + to the binlog. + */ + if (binlog_row_based && !create_table_written) + { + binlog_show_create_table(); + create_table_written= TRUE; + } + fill_record_n_invoke_before_triggers(thd, field, values, 1, table->triggers, TRG_EVENT_INSERT); } @@ -2481,6 +2630,16 @@ void select_create::send_error(uint errcode,const char *err) bool select_create::send_eof() { + /* + If no rows where written to the binary log, we write the CREATE + TABLE statement to the binlog. + */ + if (binlog_row_based && !create_table_written) + { + binlog_show_create_table(); + create_table_written= TRUE; + } + bool tmp=select_insert::send_eof(); if (tmp) abort(); @@ -2520,10 +2679,11 @@ void select_create::abort() if (table) { table->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY); - enum db_type table_type=table->s->db_type; + handlerton *table_type=table->s->db_type; if (!table->s->tmp_table) { ulong version= table->s->version; + table->s->version= 0; hash_delete(&open_cache,(byte*) table); if (!create_info->table_existed) quick_rm_table(table_type, create_table->db, create_table->table_name); @@ -2532,8 +2692,8 @@ void select_create::abort() VOID(pthread_cond_broadcast(&COND_refresh)); } else if (!create_info->table_existed) - close_temporary_table(thd, create_table->db, create_table->table_name); - table=0; + close_temporary_table(thd, table, 1, 1); + table=0; // Safety } VOID(pthread_mutex_unlock(&LOCK_open)); } diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc index 946b6a28430..a6892f204a4 100644 --- a/sql/sql_lex.cc +++ b/sql/sql_lex.cc @@ -28,7 +28,8 @@ We are using pointer to this variable for distinguishing between assignment to NEW row field (when parsing trigger definition) and structured variable. */ -sys_var_long_ptr trg_new_row_fake_var(0, 0); + +sys_var *trg_new_row_fake_var= (sys_var*) 0x01; /* Macros to look like lex */ @@ -109,7 +110,7 @@ void lex_free(void) (We already do too much here) */ -void lex_start(THD *thd, uchar *buf,uint length) +void lex_start(THD *thd, const uchar *buf, uint length) { LEX *lex= thd->lex; DBUG_ENTER("lex_start"); @@ -159,6 +160,7 @@ void lex_start(THD *thd, uchar *buf,uint length) lex->yylineno = 1; lex->in_comment=0; lex->length=0; + lex->part_info= 0; lex->select_lex.in_sum_expr=0; lex->select_lex.expr_list.empty(); lex->select_lex.ftfunc_list_alloc.empty(); @@ -197,9 +199,9 @@ void lex_end(LEX *lex) static int find_keyword(LEX *lex, uint len, bool function) { - uchar *tok=lex->tok_start; + const uchar *tok=lex->tok_start; - SYMBOL *symbol = get_hash_symbol((const char *)tok,len,function); + SYMBOL *symbol= get_hash_symbol((const char *)tok,len,function); if (symbol) { lex->yylval->symbol.symbol=symbol; @@ -257,15 +259,16 @@ static LEX_STRING get_token(LEX *lex,uint length) static LEX_STRING get_quoted_token(LEX *lex,uint length, char quote) { LEX_STRING tmp; - byte *from, *to, *end; + const uchar *from, *end; + uchar *to; yyUnget(); // ptr points now after last token char tmp.length=lex->yytoklen=length; tmp.str=(char*) lex->thd->alloc(tmp.length+1); - for (from= (byte*) lex->tok_start, to= (byte*) tmp.str, end= to+length ; + for (from= lex->tok_start, to= (uchar*) tmp.str, end= to+length ; to != end ; ) { - if ((*to++= *from++) == quote) + if ((*to++= *from++) == (uchar) quote) from++; // Skip double quotes } *to= 0; // End null for safety @@ -285,7 +288,6 @@ static char *get_text(LEX *lex) CHARSET_INFO *cs= lex->thd->charset(); sep= yyGetLast(); // String should end with this - //lex->tok_start=lex->ptr-1; // Remember ' while (lex->ptr != lex->end_of_query) { c = yyGet(); @@ -329,7 +331,8 @@ static char *get_text(LEX *lex) yyUnget(); /* Found end. Unescape and return string */ - uchar *str,*end,*start; + const uchar *str, *end; + uchar *start; str=lex->tok_start+1; end=lex->ptr-1; @@ -613,7 +616,7 @@ int yylex(void *arg, void *yythd) break; } case MY_LEX_IDENT: - uchar *start; + const uchar *start; #if defined(USE_MB) && defined(USE_MB_IDENT) if (use_mb(cs)) { diff --git a/sql/sql_lex.h b/sql/sql_lex.h index 5ba47d768fb..eb2be2691b3 100644 --- a/sql/sql_lex.h +++ b/sql/sql_lex.h @@ -25,6 +25,7 @@ class sp_head; class sp_name; class sp_instr; class sp_pcontext; +class partition_info; /* The following hack is needed because mysql_yacc.cc does not define @@ -52,8 +53,8 @@ enum enum_sql_command { SQLCOM_DELETE, SQLCOM_TRUNCATE, SQLCOM_DROP_TABLE, SQLCOM_DROP_INDEX, SQLCOM_SHOW_DATABASES, SQLCOM_SHOW_TABLES, SQLCOM_SHOW_FIELDS, - SQLCOM_SHOW_KEYS, SQLCOM_SHOW_VARIABLES, SQLCOM_SHOW_LOGS, SQLCOM_SHOW_STATUS, - SQLCOM_SHOW_INNODB_STATUS, SQLCOM_SHOW_NDBCLUSTER_STATUS, SQLCOM_SHOW_MUTEX_STATUS, + SQLCOM_SHOW_KEYS, SQLCOM_SHOW_VARIABLES, SQLCOM_SHOW_STATUS, + SQLCOM_SHOW_ENGINE_LOGS, SQLCOM_SHOW_ENGINE_STATUS, SQLCOM_SHOW_ENGINE_MUTEX, SQLCOM_SHOW_PROCESSLIST, SQLCOM_SHOW_MASTER_STAT, SQLCOM_SHOW_SLAVE_STAT, SQLCOM_SHOW_GRANTS, SQLCOM_SHOW_CREATE, SQLCOM_SHOW_CHARSETS, SQLCOM_SHOW_COLLATIONS, SQLCOM_SHOW_CREATE_DB, SQLCOM_SHOW_TABLE_STATUS, @@ -91,6 +92,9 @@ enum enum_sql_command { SQLCOM_XA_START, SQLCOM_XA_END, SQLCOM_XA_PREPARE, SQLCOM_XA_COMMIT, SQLCOM_XA_ROLLBACK, SQLCOM_XA_RECOVER, SQLCOM_SHOW_PROC_CODE, SQLCOM_SHOW_FUNC_CODE, + SQLCOM_INSTALL_PLUGIN, SQLCOM_UNINSTALL_PLUGIN, + SQLCOM_SHOW_AUTHORS, SQLCOM_BINLOG_BASE64_EVENT, + SQLCOM_SHOW_PLUGINS, /* This should be the last !!! */ SQLCOM_END @@ -99,6 +103,11 @@ enum enum_sql_command { // describe/explain types #define DESCRIBE_NORMAL 1 #define DESCRIBE_EXTENDED 2 +/* + This is not #ifdef'ed because we want "EXPLAIN PARTITIONS ..." to produce + additional "partitions" column even if partitioning is not compiled in. +*/ +#define DESCRIBE_PARTITIONS 4 enum enum_sp_suid_behaviour { @@ -456,7 +465,7 @@ public: void set_limit(st_select_lex *values); void set_thd(THD *thd_arg) { thd= thd_arg; } - friend void lex_start(THD *thd, uchar *buf, uint length); + friend void lex_start(THD *thd, const uchar *buf, uint length); friend int subselect_union_engine::exec(); List<Item> *get_unit_column_types(); @@ -625,7 +634,7 @@ public: void cut_subtree() { slave= 0; } bool test_limit(); - friend void lex_start(THD *thd, uchar *buf, uint length); + friend void lex_start(THD *thd, const uchar *buf, uint length); st_select_lex() {} void make_empty_select() { @@ -662,6 +671,11 @@ typedef class st_select_lex SELECT_LEX; #define ALTER_KEYS_ONOFF 512 #define ALTER_CONVERT 1024 #define ALTER_FORCE 2048 +#define ALTER_RECREATE 4096 +#define ALTER_ADD_PARTITION 8192 +#define ALTER_DROP_PARTITION 16384 +#define ALTER_COALESCE_PARTITION 32768 +#define ALTER_REORGANISE_PARTITION 65536 typedef struct st_alter_info { @@ -670,9 +684,17 @@ typedef struct st_alter_info uint flags; enum enum_enable_or_disable keys_onoff; enum tablespace_op_type tablespace_op; + List<char> partition_names; + uint no_parts; st_alter_info(){clear();} - void clear(){keys_onoff= LEAVE_AS_IS;tablespace_op= NO_TABLESPACE_OP;} + void clear() + { + keys_onoff= LEAVE_AS_IS; + tablespace_op= NO_TABLESPACE_OP; + no_parts= 0; + partition_names.empty(); + } void reset(){drop_list.empty();alter_list.empty();clear();} } ALTER_INFO; @@ -691,7 +713,7 @@ struct st_trg_chistics enum trg_event_type event; }; -extern sys_var_long_ptr trg_new_row_fake_var; +extern sys_var *trg_new_row_fake_var; enum xa_option_words {XA_NONE, XA_JOIN, XA_RESUME, XA_ONE_PHASE, XA_SUSPEND, XA_FOR_MIGRATE}; @@ -708,11 +730,11 @@ typedef struct st_lex SELECT_LEX *current_select; /* list of all SELECT_LEX */ SELECT_LEX *all_selects_list; - uchar *buf; /* The beginning of string, used by SPs */ - uchar *ptr,*tok_start,*tok_end,*end_of_query; + const uchar *buf; /* The beginning of string, used by SPs */ + const uchar *ptr,*tok_start,*tok_end,*end_of_query; /* The values of tok_start/tok_end as they were one call of yylex before */ - uchar *tok_start_prev, *tok_end_prev; + const uchar *tok_start_prev, *tok_end_prev; char *length,*dec,*change,*name; char *help_arg; @@ -742,6 +764,8 @@ typedef struct st_lex TABLE_LIST *leaf_tables_insert; /* Position (first character index) of SELECT of CREATE VIEW statement */ uint create_view_select_start; + /* Partition info structure filled in by PARTITION BY parse part */ + partition_info *part_info; /* The definer of the object being created (view, trigger, stored routine). @@ -927,7 +951,7 @@ typedef struct st_lex Pointers to part of LOAD DATA statement that should be rewritten during replication ("LOCAL 'filename' REPLACE INTO" part). */ - uchar *fname_start, *fname_end; + const uchar *fname_start, *fname_end; bool escape_used; @@ -1054,7 +1078,7 @@ struct st_lex_local: public st_lex extern void lex_init(void); extern void lex_free(void); -extern void lex_start(THD *thd, uchar *buf,uint length); +extern void lex_start(THD *thd, const uchar *buf, uint length); extern void lex_end(LEX *lex); extern int yylex(void *arg, void *yythd); diff --git a/sql/sql_list.h b/sql/sql_list.h index b2bcc4ea401..05f589a2c23 100644 --- a/sql/sql_list.h +++ b/sql/sql_list.h @@ -441,6 +441,28 @@ struct ilink }; +/* Needed to be able to have an I_List of char* strings in mysqld.cc. */ + +class i_string: public ilink +{ +public: + const char* ptr; + i_string():ptr(0) { } + i_string(const char* s) : ptr(s) {} +}; + +/* needed for linked list of two strings for replicate-rewrite-db */ +class i_string_pair: public ilink +{ +public: + const char* key; + const char* val; + i_string_pair():key(0),val(0) { } + i_string_pair(const char* key_arg, const char* val_arg) : + key(key_arg),val(val_arg) {} +}; + + template <class T> class I_List_iterator; /* diff --git a/sql/sql_load.cc b/sql/sql_load.cc index 4f3bfee5d3a..3850e704718 100644 --- a/sql/sql_load.cc +++ b/sql/sql_load.cc @@ -176,7 +176,7 @@ bool mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list, The main thing to fix to remove this restriction is to ensure that the table is marked to be 'used for insert' in which case we should never - mark this table as as 'const table' (ie, one that has only one row). + mark this table as 'const table' (ie, one that has only one row). */ if (unique_table(thd, table_list, table_list->next_global)) { @@ -192,6 +192,10 @@ bool mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list, Field **field; for (field=table->field; *field ; field++) fields_vars.push_back(new Item_field(*field)); + /* + Since all fields are set we set all bits in the write set + */ + table->file->ha_set_all_bits_in_write_set(); table->timestamp_field_type= TIMESTAMP_NO_AUTO_SET; /* Let us also prepare SET clause, altough it is probably empty @@ -204,8 +208,15 @@ bool mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list, else { // Part field list /* TODO: use this conds for 'WITH CHECK OPTIONS' */ - if (setup_fields(thd, 0, fields_vars, 1, 0, 0) || - setup_fields(thd, 0, set_fields, 1, 0, 0) || + /* + Indicate that both variables in field list and fields in update_list + is to be included in write set of table. We do however set all bits + in write set anyways since it is not allowed to specify NULLs in + LOAD DATA + */ + table->file->ha_set_all_bits_in_write_set(); + if (setup_fields(thd, 0, fields_vars, 2, 0, 0) || + setup_fields(thd, 0, set_fields, 2, 0, 0) || check_that_all_fields_are_given_values(thd, table, table_list)) DBUG_RETURN(TRUE); /* @@ -275,7 +286,7 @@ bool mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list, #endif if (!dirname_length(ex->file_name)) { - strxnmov(name, FN_REFLEN, mysql_real_data_home, tdb, NullS); + strxnmov(name, FN_REFLEN-1, mysql_real_data_home, tdb, NullS); (void) fn_format(name, ex->file_name, name, "", MY_RELATIVE_PATH | MY_UNPACK_FILENAME); } @@ -403,38 +414,55 @@ bool mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list, #ifndef EMBEDDED_LIBRARY if (mysql_bin_log.is_open()) { +#ifdef HAVE_ROW_BASED_REPLICATION /* - Make sure last block (the one which caused the error) gets logged. - This is needed because otherwise after write of - (to the binlog, not to read_info (which is a cache)) - Delete_file_log_event the bad block will remain in read_info (because - pre_read is not called at the end of the last block; remember pre_read - is called whenever a new block is read from disk). - At the end of mysql_load(), the destructor of read_info will call - end_io_cache() which will flush read_info, so we will finally have - this in the binlog: - Append_block # The last successfull block - Delete_file - Append_block # The failing block - which is nonsense. - Or could also be (for a small file) - Create_file # The failing block - which is nonsense (Delete_file is not written in this case, because: - Create_file has not been written, so Delete_file is not written, then - when read_info is destroyed end_io_cache() is called which writes - Create_file. + We need to do the job that is normally done inside + binlog_query() here, which is to ensure that the pending event + is written before tables are unlocked and before any other + events are written. We also need to update the table map + version for the binary log to mark that table maps are invalid + after this point. */ - read_info.end_io_cache(); - /* If the file was not empty, wrote_create_file is true */ - if (lf_info.wrote_create_file) + if (binlog_row_based) + thd->binlog_flush_pending_rows_event(true); + else +#endif { - if ((info.copied || info.deleted) && !transactional_table) - write_execute_load_query_log_event(thd, handle_duplicates, - ignore, transactional_table); - else + /* + Make sure last block (the one which caused the error) gets + logged. This is needed because otherwise after write of (to + the binlog, not to read_info (which is a cache)) + Delete_file_log_event the bad block will remain in read_info + (because pre_read is not called at the end of the last + block; remember pre_read is called whenever a new block is + read from disk). At the end of mysql_load(), the destructor + of read_info will call end_io_cache() which will flush + read_info, so we will finally have this in the binlog: + + Append_block # The last successfull block + Delete_file + Append_block # The failing block + which is nonsense. + Or could also be (for a small file) + Create_file # The failing block + which is nonsense (Delete_file is not written in this case, because: + Create_file has not been written, so Delete_file is not written, then + when read_info is destroyed end_io_cache() is called which writes + Create_file. + */ + read_info.end_io_cache(); + /* If the file was not empty, wrote_create_file is true */ + if (lf_info.wrote_create_file) { - Delete_file_log_event d(thd, db, transactional_table); - mysql_bin_log.write(&d); + if ((info.copied || info.deleted) && !transactional_table) + write_execute_load_query_log_event(thd, handle_duplicates, + ignore, transactional_table); + else + { + Delete_file_log_event d(thd, db, transactional_table); + d.flags|= LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F; + mysql_bin_log.write(&d); + } } } } @@ -451,15 +479,32 @@ bool mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list, #ifndef EMBEDDED_LIBRARY if (mysql_bin_log.is_open()) { +#ifdef HAVE_ROW_BASED_REPLICATION /* - As already explained above, we need to call end_io_cache() or the last - block will be logged only after Execute_load_query_log_event (which is - wrong), when read_info is destroyed. - */ - read_info.end_io_cache(); - if (lf_info.wrote_create_file) - write_execute_load_query_log_event(thd, handle_duplicates, - ignore, transactional_table); + We need to do the job that is normally done inside + binlog_query() here, which is to ensure that the pending event + is written before tables are unlocked and before any other + events are written. We also need to update the table map + version for the binary log to mark that table maps are invalid + after this point. + */ + if (binlog_row_based) + thd->binlog_flush_pending_rows_event(true); + else +#endif + { + /* + As already explained above, we need to call end_io_cache() or the last + block will be logged only after Execute_load_query_log_event (which is + wrong), when read_info is destroyed. + */ + read_info.end_io_cache(); + if (lf_info.wrote_create_file) + { + write_execute_load_query_log_event(thd, handle_duplicates, + ignore, transactional_table); + } + } } #endif /*!EMBEDDED_LIBRARY*/ if (transactional_table) @@ -488,6 +533,7 @@ static bool write_execute_load_query_log_event(THD *thd, (duplicates == DUP_REPLACE) ? LOAD_DUP_REPLACE : (ignore ? LOAD_DUP_IGNORE : LOAD_DUP_ERROR), transactional_table, FALSE); + e.flags|= LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F; return mysql_bin_log.write(&e); } @@ -899,7 +945,7 @@ READ_INFO::READ_INFO(File file_par, uint tot_length, CHARSET_INFO *cs, if (get_it_from_net) cache.read_function = _my_b_net_read; - if (mysql_bin_log.is_open()) + if (!binlog_row_based && mysql_bin_log.is_open()) cache.pre_read = cache.pre_close = (IO_CACHE_CALLBACK) log_loaded_block; #endif diff --git a/sql/sql_manager.cc b/sql/sql_manager.cc index 1d3acd1696c..f580bcb16d9 100644 --- a/sql/sql_manager.cc +++ b/sql/sql_manager.cc @@ -32,12 +32,43 @@ pthread_t manager_thread; pthread_mutex_t LOCK_manager; pthread_cond_t COND_manager; +struct handler_cb { + struct handler_cb *next; + void (*action)(void); +}; + +static struct handler_cb * volatile cb_list; + +bool mysql_manager_submit(void (*action)()) +{ + bool result= FALSE; + struct handler_cb * volatile *cb; + pthread_mutex_lock(&LOCK_manager); + cb= &cb_list; + while (*cb && (*cb)->action != action) + cb= &(*cb)->next; + if (!*cb) + { + *cb= (struct handler_cb *)my_malloc(sizeof(struct handler_cb), MYF(MY_WME)); + if (!*cb) + result= TRUE; + else + { + (*cb)->next= NULL; + (*cb)->action= action; + } + } + pthread_mutex_unlock(&LOCK_manager); + return result; +} + pthread_handler_t handle_manager(void *arg __attribute__((unused))) { int error = 0; ulong status; struct timespec abstime; bool reset_flush_time = TRUE; + struct handler_cb *cb= NULL; my_thread_init(); DBUG_ENTER("handle_manager"); @@ -68,6 +99,11 @@ pthread_handler_t handle_manager(void *arg __attribute__((unused))) } status = manager_status; manager_status = 0; + if (cb == NULL) + { + cb= cb_list; + cb_list= NULL; + } pthread_mutex_unlock(&LOCK_manager); if (abort_loop) @@ -80,13 +116,13 @@ pthread_handler_t handle_manager(void *arg __attribute__((unused))) reset_flush_time = TRUE; } -#ifdef HAVE_BERKELEY_DB - if (status & MANAGER_BERKELEY_LOG_CLEANUP) + while (cb) { - berkeley_cleanup_log_files(); - status &= ~MANAGER_BERKELEY_LOG_CLEANUP; + struct handler_cb *next= cb->next; + cb->action(); + my_free((gptr)cb, MYF(0)); + cb= next; } -#endif if (status) DBUG_PRINT("error", ("manager did not handle something: %lx", status)); diff --git a/sql/sql_manager.h b/sql/sql_manager.h index 35704705820..d42deb8ff81 100644 --- a/sql/sql_manager.h +++ b/sql/sql_manager.h @@ -14,6 +14,6 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#ifdef HAVE_BERKELEY_DB +#ifdef WITH_BERKELEY_STORAGE_ENGINE void berkeley_cleanup_log_files(void); -#endif /* HAVE_BERKELEY_DB */ +#endif /* WITH_BERKELEY_STORAGE_ENGINE */ diff --git a/sql/sql_map.cc b/sql/sql_map.cc index 56b4b765355..8376b3bbfcc 100644 --- a/sql/sql_map.cc +++ b/sql/sql_map.cc @@ -25,10 +25,6 @@ #include <sys/mman.h> #endif -#ifndef MAP_NORESERVE -#define MAP_NORESERVE 0 // For IRIX -#endif - mapped_files::mapped_files(const my_string filename,byte *magic,uint magic_length) { #ifdef HAVE_MMAP diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index 9cdb288c045..ad205103140 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -16,19 +16,12 @@ #include "mysql_priv.h" #include "sql_repl.h" +#include "rpl_filter.h" #include "repl_failsafe.h" #include <m_ctype.h> #include <myisam.h> #include <my_dir.h> -#ifdef HAVE_INNOBASE_DB -#include "ha_innodb.h" -#endif - -#ifdef HAVE_NDBCLUSTER_DB -#include "ha_ndbcluster.h" -#endif - #include "sp_head.h" #include "sp.h" #include "sp_cache.h" @@ -189,7 +182,8 @@ static bool begin_trans(THD *thd) */ inline bool all_tables_not_ok(THD *thd, TABLE_LIST *tables) { - return table_rules_on && tables && !tables_ok(thd,tables); + return rpl_filter->is_on() && tables && !thd->spcont && + !rpl_filter->tables_ok(thd->db, tables); } #endif @@ -1782,8 +1776,9 @@ bool dispatch_command(enum enum_server_command command, THD *thd, TABLE_LIST table_list; LEX_STRING conv_name; /* Saved variable value */ - my_bool old_innodb_table_locks= - IF_INNOBASE_DB(thd->variables.innodb_table_locks, FALSE); + my_bool old_innodb_table_locks= thd->variables.innodb_table_locks; + + /* used as fields initializator */ lex_start(thd, 0, 0); @@ -1988,7 +1983,8 @@ bool dispatch_command(enum enum_server_command command, THD *thd, uptime, (int) thread_count, (ulong) thd->query_id, (ulong) thd->status_var.long_query_count, - thd->status_var.opened_tables, refresh_version, cached_tables(), + thd->status_var.opened_tables, refresh_version, + cached_open_tables(), (uptime ? (ulonglong2double(thd->query_id) / (double) uptime) : (double) 0)); #ifdef SAFEMALLOC @@ -2233,6 +2229,7 @@ int prepare_schema_table(THD *thd, LEX *lex, Table_ident *table_ident, case SCH_STATUS: case SCH_PROCEDURES: case SCH_CHARSETS: + case SCH_ENGINES: case SCH_COLLATIONS: case SCH_COLLATION_CHARACTER_SET_APPLICABILITY: case SCH_USER_PRIVILEGES: @@ -2695,29 +2692,20 @@ mysql_execute_command(THD *thd) res = load_master_data(thd); break; #endif /* HAVE_REPLICATION */ -#ifdef HAVE_NDBCLUSTER_DB - case SQLCOM_SHOW_NDBCLUSTER_STATUS: - { - res = ndbcluster_show_status(thd); - break; - } -#endif -#ifdef HAVE_INNOBASE_DB - case SQLCOM_SHOW_INNODB_STATUS: + case SQLCOM_SHOW_ENGINE_STATUS: { if (check_global_access(thd, SUPER_ACL)) - goto error; - res = innodb_show_status(thd); + goto error; + res = ha_show_status(thd, lex->create_info.db_type, HA_ENGINE_STATUS); break; } - case SQLCOM_SHOW_MUTEX_STATUS: + case SQLCOM_SHOW_ENGINE_MUTEX: { if (check_global_access(thd, SUPER_ACL)) goto error; - res = innodb_mutex_show_status(thd); + res = ha_show_status(thd, lex->create_info.db_type, HA_ENGINE_MUTEX); break; } -#endif #ifdef HAVE_REPLICATION case SQLCOM_LOAD_MASTER_TABLE: { @@ -3454,13 +3442,16 @@ end_with_restore_list: case SQLCOM_SHOW_STORAGE_ENGINES: res= mysqld_show_storage_engines(thd); break; + case SQLCOM_SHOW_AUTHORS: + res= mysqld_show_authors(thd); + break; case SQLCOM_SHOW_PRIVILEGES: res= mysqld_show_privileges(thd); break; case SQLCOM_SHOW_COLUMN_TYPES: res= mysqld_show_column_types(thd); break; - case SQLCOM_SHOW_LOGS: + case SQLCOM_SHOW_ENGINE_LOGS: #ifdef DONT_ALLOW_SHOW_COMMANDS my_message(ER_NOT_ALLOWED_COMMAND, ER(ER_NOT_ALLOWED_COMMAND), MYF(0)); /* purecov: inspected */ @@ -3469,7 +3460,7 @@ end_with_restore_list: { if (grant_option && check_access(thd, FILE_ACL, any_db,0,0,0,0)) goto error; - res= mysqld_show_logs(thd); + res= ha_show_status(thd, lex->create_info.db_type, HA_ENGINE_LOGS); break; } #endif @@ -3588,9 +3579,9 @@ end_with_restore_list: above was not called. So we have to check rules again here. */ #ifdef HAVE_REPLICATION - if (thd->slave_thread && - (!db_ok(lex->name, replicate_do_db, replicate_ignore_db) || - !db_ok_with_wild_table(lex->name))) + if (thd->slave_thread && + (!rpl_filter->db_ok(lex->name) || + !rpl_filter->db_ok_with_wild_table(lex->name))) { my_message(ER_SLAVE_IGNORED_TABLE, ER(ER_SLAVE_IGNORED_TABLE), MYF(0)); break; @@ -3623,8 +3614,8 @@ end_with_restore_list: */ #ifdef HAVE_REPLICATION if (thd->slave_thread && - (!db_ok(lex->name, replicate_do_db, replicate_ignore_db) || - !db_ok_with_wild_table(lex->name))) + (!rpl_filter->db_ok(lex->name) || + !rpl_filter->db_ok_with_wild_table(lex->name))) { my_message(ER_SLAVE_IGNORED_TABLE, ER(ER_SLAVE_IGNORED_TABLE), MYF(0)); break; @@ -3663,8 +3654,8 @@ end_with_restore_list: */ #ifdef HAVE_REPLICATION if (thd->slave_thread && - (!db_ok(db, replicate_do_db, replicate_ignore_db) || - !db_ok_with_wild_table(db))) + (!rpl_filter->db_ok(lex->name) || + !rpl_filter->db_ok_with_wild_table(lex->name))) { my_message(ER_SLAVE_IGNORED_TABLE, ER(ER_SLAVE_IGNORED_TABLE), MYF(0)); break; @@ -3688,8 +3679,6 @@ end_with_restore_list: my_error(ER_WRONG_DB_NAME, MYF(0), lex->name); break; } - if (check_access(thd,SELECT_ACL,lex->name,0,1,0,is_schema_db(lex->name))) - break; res=mysqld_show_create_db(thd,lex->name,&lex->create_info); break; } @@ -3724,8 +3713,8 @@ end_with_restore_list: { if (mysql_bin_log.is_open()) { - Query_log_event qinfo(thd, thd->query, thd->query_length, 0, FALSE); - mysql_bin_log.write(&qinfo); + thd->binlog_query(THD::MYSQL_QUERY_TYPE, + thd->query, thd->query_length, FALSE, FALSE); } send_ok(thd); } @@ -3742,8 +3731,8 @@ end_with_restore_list: { if (mysql_bin_log.is_open()) { - Query_log_event qinfo(thd, thd->query, thd->query_length, 0, FALSE); - mysql_bin_log.write(&qinfo); + thd->binlog_query(THD::MYSQL_QUERY_TYPE, + thd->query, thd->query_length, FALSE, FALSE); } send_ok(thd); } @@ -3760,8 +3749,8 @@ end_with_restore_list: { if (mysql_bin_log.is_open()) { - Query_log_event qinfo(thd, thd->query, thd->query_length, 0, FALSE); - mysql_bin_log.write(&qinfo); + thd->binlog_query(THD::MYSQL_QUERY_TYPE, + thd->query, thd->query_length, FALSE, FALSE); } send_ok(thd); } @@ -3776,8 +3765,8 @@ end_with_restore_list: { if (mysql_bin_log.is_open()) { - Query_log_event qinfo(thd, thd->query, thd->query_length, 0, FALSE); - mysql_bin_log.write(&qinfo); + thd->binlog_query(THD::MYSQL_QUERY_TYPE, + thd->query, thd->query_length, FALSE, FALSE); } send_ok(thd); } @@ -3856,8 +3845,8 @@ end_with_restore_list: if (!res && mysql_bin_log.is_open()) { thd->clear_error(); - Query_log_event qinfo(thd, thd->query, thd->query_length, 0, FALSE); - mysql_bin_log.write(&qinfo); + thd->binlog_query(THD::MYSQL_QUERY_TYPE, + thd->query, thd->query_length, FALSE, FALSE); } } else @@ -3876,8 +3865,8 @@ end_with_restore_list: if (mysql_bin_log.is_open()) { thd->clear_error(); - Query_log_event qinfo(thd, thd->query, thd->query_length, 0, FALSE); - mysql_bin_log.write(&qinfo); + thd->binlog_query(THD::MYSQL_QUERY_TYPE, + thd->query, thd->query_length, FALSE, FALSE); } if (lex->sql_command == SQLCOM_GRANT) { @@ -4173,12 +4162,12 @@ end_with_restore_list: db, name, lex->sql_command == SQLCOM_CREATE_PROCEDURE, 1)) { - close_thread_tables(thd); if (sp_grant_privileges(thd, db, name, lex->sql_command == SQLCOM_CREATE_PROCEDURE)) push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_PROC_AUTO_GRANT_FAIL, ER(ER_PROC_AUTO_GRANT_FAIL)); + close_thread_tables(thd); } #endif send_ok(thd); @@ -4406,8 +4395,8 @@ end_with_restore_list: if (mysql_bin_log.is_open()) { thd->clear_error(); - Query_log_event qinfo(thd, thd->query, thd->query_length, 0, FALSE); - mysql_bin_log.write(&qinfo); + thd->binlog_query(THD::MYSQL_QUERY_TYPE, + thd->query, thd->query_length, FALSE, FALSE); } send_ok(thd); break; @@ -4495,8 +4484,8 @@ end_with_restore_list: if (mysql_bin_log.is_open()) { thd->clear_error(); - Query_log_event qinfo(thd, thd->query, thd->query_length, 0, FALSE); - mysql_bin_log.write(&qinfo); + thd->binlog_query(THD::MYSQL_QUERY_TYPE, + thd->query, thd->query_length, FALSE, FALSE); } send_ok(thd); break; @@ -4620,8 +4609,8 @@ end_with_restore_list: buff.append(STRING_WITH_LEN(" AS ")); buff.append(first_table->source.str, first_table->source.length); - Query_log_event qinfo(thd, buff.ptr(), buff.length(), 0, FALSE); - mysql_bin_log.write(&qinfo); + thd->binlog_query(THD::STMT_QUERY_TYPE, + buff.ptr(), buff.length(), FALSE, FALSE); } break; } @@ -4634,8 +4623,8 @@ end_with_restore_list: mysql_bin_log.is_open()) { thd->clear_error(); - Query_log_event qinfo(thd, thd->query, thd->query_length, 0, FALSE); - mysql_bin_log.write(&qinfo); + thd->binlog_query(THD::STMT_QUERY_TYPE, + thd->query, thd->query_length, FALSE, FALSE); } break; } @@ -4829,15 +4818,33 @@ end_with_restore_list: case SQLCOM_XA_RECOVER: res= mysql_xa_recover(thd); break; + case SQLCOM_INSTALL_PLUGIN: + if (! (res= mysql_install_plugin(thd, &thd->lex->comment, + &thd->lex->ident))) + send_ok(thd); + break; + case SQLCOM_UNINSTALL_PLUGIN: + if (! (res= mysql_uninstall_plugin(thd, &thd->lex->comment))) + send_ok(thd); + break; + case SQLCOM_BINLOG_BASE64_EVENT: + { +#ifndef EMBEDDED_LIBRARY + mysql_client_binlog_statement(thd); +#else /* EMBEDDED_LIBRARY */ + my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "embedded"); +#endif /* EMBEDDED_LIBRARY */ + break; + } default: DBUG_ASSERT(0); /* Impossible */ send_ok(thd); break; } thd->proc_info="query end"; - /* Two binlog-related cleanups: */ /* + Binlog-related cleanup: Reset system variables temporarily modified by SET ONE SHOT. Exception: If this is a SET, do nothing. This is to allow @@ -5570,7 +5577,6 @@ void mysql_init_multi_delete(LEX *lex) lex->query_tables_last= &lex->query_tables; } - /* When you modify mysql_parse(), you may need to mofify mysql_test_parse_for_slave() in this same file. @@ -5895,12 +5901,16 @@ TABLE_LIST *st_select_lex::add_table_to_list(THD *thd, if (!table) DBUG_RETURN(0); // End of memory alias_str= alias ? alias->str : table->table.str; - if (check_table_name(table->table.str,table->table.length) || - table->db.str && check_db_name(table->db.str)) + if (check_table_name(table->table.str,table->table.length)) { my_error(ER_WRONG_TABLE_NAME, MYF(0), table->table.str); DBUG_RETURN(0); } + if (table->db.str && check_db_name(table->db.str)) + { + my_error(ER_WRONG_DB_NAME, MYF(0), table->db.str); + DBUG_RETURN(0); + } if (!alias) /* Alias is case sensitive */ { @@ -6472,7 +6482,7 @@ bool reload_acl_and_cache(THD *thd, ulong options, TABLE_LIST *tables, rotate_relay_log(active_mi); pthread_mutex_unlock(&LOCK_active_mi); #endif - if (ha_flush_logs()) + if (ha_flush_logs(NULL)) result=1; if (flush_error_log()) result=1; @@ -6788,7 +6798,7 @@ bool mysql_create_index(THD *thd, TABLE_LIST *table_list, List<Key> &keys) HA_CREATE_INFO create_info; DBUG_ENTER("mysql_create_index"); bzero((char*) &create_info,sizeof(create_info)); - create_info.db_type=DB_TYPE_DEFAULT; + create_info.db_type= (handlerton*) &default_hton; create_info.default_table_charset= thd->variables.collation_database; DBUG_RETURN(mysql_alter_table(thd,table_list->db,table_list->table_name, &create_info, table_list, @@ -6804,7 +6814,7 @@ bool mysql_drop_index(THD *thd, TABLE_LIST *table_list, ALTER_INFO *alter_info) HA_CREATE_INFO create_info; DBUG_ENTER("mysql_drop_index"); bzero((char*) &create_info,sizeof(create_info)); - create_info.db_type=DB_TYPE_DEFAULT; + create_info.db_type= (handlerton*) &default_hton; create_info.default_table_charset= thd->variables.collation_database; alter_info->clear(); alter_info->flags= ALTER_DROP_INDEX; diff --git a/sql/sql_partition.cc b/sql/sql_partition.cc new file mode 100644 index 00000000000..5d071c5591c --- /dev/null +++ b/sql/sql_partition.cc @@ -0,0 +1,3523 @@ +/* Copyright (C) 2005 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + This file was introduced as a container for general functionality related + to partitioning introduced in MySQL version 5.1. It contains functionality + used by all handlers that support partitioning, which in the first version + is the partitioning handler itself and the NDB handler. + + The first version was written by Mikael Ronstrom. + + This version supports RANGE partitioning, LIST partitioning, HASH + partitioning and composite partitioning (hereafter called subpartitioning) + where each RANGE/LIST partitioning is HASH partitioned. The hash function + can either be supplied by the user or by only a list of fields (also + called KEY partitioning, where the MySQL server will use an internal + hash function. + There are quite a few defaults that can be used as well. +*/ + +/* Some general useful functions */ + +#include "mysql_priv.h" +#include <errno.h> +#include <m_ctype.h> +#include "md5.h" + +#ifdef WITH_PARTITION_STORAGE_ENGINE +#include "ha_partition.h" +/* + Partition related functions declarations and some static constants; +*/ +static const char *hash_str= "HASH"; +static const char *range_str= "RANGE"; +static const char *list_str= "LIST"; +static const char *part_str= "PARTITION"; +static const char *sub_str= "SUB"; +static const char *by_str= "BY"; +static const char *key_str= "KEY"; +static const char *space_str= " "; +static const char *equal_str= "="; +static const char *end_paren_str= ")"; +static const char *begin_paren_str= "("; +static const char *comma_str= ","; +static char buff[22]; + +bool get_partition_id_list(partition_info *part_info, + uint32 *part_id); +bool get_partition_id_range(partition_info *part_info, + uint32 *part_id); +bool get_partition_id_hash_nosub(partition_info *part_info, + uint32 *part_id); +bool get_partition_id_key_nosub(partition_info *part_info, + uint32 *part_id); +bool get_partition_id_linear_hash_nosub(partition_info *part_info, + uint32 *part_id); +bool get_partition_id_linear_key_nosub(partition_info *part_info, + uint32 *part_id); +bool get_partition_id_range_sub_hash(partition_info *part_info, + uint32 *part_id); +bool get_partition_id_range_sub_key(partition_info *part_info, + uint32 *part_id); +bool get_partition_id_range_sub_linear_hash(partition_info *part_info, + uint32 *part_id); +bool get_partition_id_range_sub_linear_key(partition_info *part_info, + uint32 *part_id); +bool get_partition_id_list_sub_hash(partition_info *part_info, + uint32 *part_id); +bool get_partition_id_list_sub_key(partition_info *part_info, + uint32 *part_id); +bool get_partition_id_list_sub_linear_hash(partition_info *part_info, + uint32 *part_id); +bool get_partition_id_list_sub_linear_key(partition_info *part_info, + uint32 *part_id); +uint32 get_partition_id_hash_sub(partition_info *part_info); +uint32 get_partition_id_key_sub(partition_info *part_info); +uint32 get_partition_id_linear_hash_sub(partition_info *part_info); +uint32 get_partition_id_linear_key_sub(partition_info *part_info); +#endif + + +/* + A routine used by the parser to decide whether we are specifying a full + partitioning or if only partitions to add or to split. + SYNOPSIS + is_partition_management() + lex Reference to the lex object + RETURN VALUE + TRUE Yes, it is part of a management partition command + FALSE No, not a management partition command + DESCRIPTION + This needs to be outside of WITH_PARTITION_STORAGE_ENGINE since it is + used from the sql parser that doesn't have any #ifdef's +*/ + +my_bool is_partition_management(LEX *lex) +{ + return (lex->sql_command == SQLCOM_ALTER_TABLE && + (lex->alter_info.flags == ALTER_ADD_PARTITION || + lex->alter_info.flags == ALTER_REORGANISE_PARTITION)); +} + +#ifdef WITH_PARTITION_STORAGE_ENGINE +/* + A support function to check if a partition name is in a list of strings + SYNOPSIS + is_partition_in_list() + part_name String searched for + list_part_names A list of names searched in + RETURN VALUES + TRUE String found + FALSE String not found +*/ + +bool is_partition_in_list(char *part_name, + List<char> list_part_names) +{ + List_iterator<char> part_names_it(list_part_names); + uint no_names= list_part_names.elements; + uint i= 0; + do + { + char *list_name= part_names_it++; + if (!(my_strcasecmp(system_charset_info, part_name, list_name))) + return TRUE; + } while (++i < no_names); + return FALSE; +} + + +/* + A support function to check partition names for duplication in a + partitioned table + SYNOPSIS + is_partitions_in_table() + new_part_info New partition info + old_part_info Old partition info + RETURN VALUES + TRUE Duplicate names found + FALSE Duplicate names not found + DESCRIPTION + Can handle that the new and old parts are the same in which case it + checks that the list of names in the partitions doesn't contain any + duplicated names. +*/ + +bool is_partitions_in_table(partition_info *new_part_info, + partition_info *old_part_info) +{ + uint no_new_parts= new_part_info->partitions.elements, new_count; + uint no_old_parts= old_part_info->partitions.elements, old_count; + List_iterator<partition_element> new_parts_it(new_part_info->partitions); + bool same_part_info= (new_part_info == old_part_info); + DBUG_ENTER("is_partitions_in_table"); + + new_count= 0; + do + { + List_iterator<partition_element> old_parts_it(old_part_info->partitions); + char *new_name= (new_parts_it++)->partition_name; + new_count++; + old_count= 0; + do + { + char *old_name= (old_parts_it++)->partition_name; + old_count++; + if (same_part_info && old_count == new_count) + break; + if (!(my_strcasecmp(system_charset_info, old_name, new_name))) + { + DBUG_RETURN(TRUE); + } + } while (old_count < no_old_parts); + } while (new_count < no_new_parts); + DBUG_RETURN(FALSE); +} + + +/* + Check that the reorganized table will not have duplicate partitions. + + SYNOPSIS + check_reorganise_list() + new_part_info New partition info + old_part_info Old partition info + list_part_names The list of partition names that will go away and can be reused in the + new table. + + RETURN VALUES + TRUE Inacceptable name conflict detected. + FALSE New names are OK. + + DESCRIPTION + Can handle that the 'new_part_info' and 'old_part_info' the same + in which case it checks that the list of names in the partitions + doesn't contain any duplicated names. +*/ + +bool check_reorganise_list(partition_info *new_part_info, + partition_info *old_part_info, + List<char> list_part_names) +{ + uint new_count, old_count; + uint no_new_parts= new_part_info->partitions.elements; + uint no_old_parts= old_part_info->partitions.elements; + List_iterator<partition_element> new_parts_it(new_part_info->partitions); + bool same_part_info= (new_part_info == old_part_info); + DBUG_ENTER("check_reorganise_list"); + + new_count= 0; + do + { + List_iterator<partition_element> old_parts_it(old_part_info->partitions); + char *new_name= (new_parts_it++)->partition_name; + new_count++; + old_count= 0; + do + { + char *old_name= (old_parts_it++)->partition_name; + old_count++; + if (same_part_info && old_count == new_count) + break; + if (!(my_strcasecmp(system_charset_info, old_name, new_name))) + { + if (!is_partition_in_list(old_name, list_part_names)) + DBUG_RETURN(TRUE); + } + } while (old_count < no_old_parts); + } while (new_count < no_new_parts); + DBUG_RETURN(FALSE); +} + + +/* + A useful routine used by update_row for partition handlers to calculate + the partition ids of the old and the new record. + SYNOPSIS + get_part_for_update() + old_data Buffer of old record + new_data Buffer of new record + rec0 Reference to table->record[0] + part_info Reference to partition information + part_field_array A NULL-terminated array of fields for partition + function + old_part_id The returned partition id of old record + new_part_id The returned partition id of new record + RETURN VALUE + 0 Success + > 0 Error code + DESCRIPTION + Dependent on whether buf is not record[0] we need to prepare the + fields. Then we call the function pointer get_partition_id to + calculate the partition ids. +*/ + +int get_parts_for_update(const byte *old_data, byte *new_data, + const byte *rec0, partition_info *part_info, + uint32 *old_part_id, uint32 *new_part_id) +{ + Field **part_field_array= part_info->full_part_field_array; + int error; + DBUG_ENTER("get_parts_for_update"); + DBUG_ASSERT(new_data == rec0); + + set_field_ptr(part_field_array, old_data, rec0); + error= part_info->get_partition_id(part_info, old_part_id); + set_field_ptr(part_field_array, rec0, old_data); + if (unlikely(error)) // Should never happen + { + DBUG_ASSERT(0); + DBUG_RETURN(error); + } +#ifdef NOT_NEEDED + if (new_data == rec0) +#endif + { + if (unlikely(error= part_info->get_partition_id(part_info,new_part_id))) + { + DBUG_RETURN(error); + } + } +#ifdef NOT_NEEDED + else + { + /* + This branch should never execute but it is written anyways for + future use. It will be tested by ensuring that the above + condition is false in one test situation before pushing the code. + */ + set_field_ptr(part_field_array, new_data, rec0); + error= part_info->get_partition_id(part_info, new_part_id); + set_field_ptr(part_field_array, rec0, new_data); + if (unlikely(error)) + { + DBUG_RETURN(error); + } + } +#endif + DBUG_RETURN(0); +} + + +/* + A useful routine used by delete_row for partition handlers to calculate + the partition id. + SYNOPSIS + get_part_for_delete() + buf Buffer of old record + rec0 Reference to table->record[0] + part_info Reference to partition information + part_field_array A NULL-terminated array of fields for partition + function + part_id The returned partition id to delete from + RETURN VALUE + 0 Success + > 0 Error code + DESCRIPTION + Dependent on whether buf is not record[0] we need to prepare the + fields. Then we call the function pointer get_partition_id to + calculate the partition id. +*/ + +int get_part_for_delete(const byte *buf, const byte *rec0, + partition_info *part_info, uint32 *part_id) +{ + int error; + DBUG_ENTER("get_part_for_delete"); + + if (likely(buf == rec0)) + { + if (unlikely((error= part_info->get_partition_id(part_info, part_id)))) + { + DBUG_RETURN(error); + } + DBUG_PRINT("info", ("Delete from partition %d", *part_id)); + } + else + { + Field **part_field_array= part_info->full_part_field_array; + set_field_ptr(part_field_array, buf, rec0); + error= part_info->get_partition_id(part_info, part_id); + set_field_ptr(part_field_array, rec0, buf); + if (unlikely(error)) + { + DBUG_RETURN(error); + } + DBUG_PRINT("info", ("Delete from partition %d (path2)", *part_id)); + } + DBUG_RETURN(0); +} + + +/* + This routine allocates an array for all range constants to achieve a fast + check what partition a certain value belongs to. At the same time it does + also check that the range constants are defined in increasing order and + that the expressions are constant integer expressions. + SYNOPSIS + check_range_constants() + part_info + RETURN VALUE + TRUE An error occurred during creation of range constants + FALSE Successful creation of range constant mapping + DESCRIPTION + This routine is called from check_partition_info to get a quick error + before we came too far into the CREATE TABLE process. It is also called + from fix_partition_func every time we open the .frm file. It is only + called for RANGE PARTITIONed tables. +*/ + +static bool check_range_constants(partition_info *part_info) +{ + partition_element* part_def; + longlong current_largest_int= LONGLONG_MIN, part_range_value_int; + uint no_parts= part_info->no_parts, i; + List_iterator<partition_element> it(part_info->partitions); + bool result= TRUE; + DBUG_ENTER("check_range_constants"); + DBUG_PRINT("enter", ("INT_RESULT with %d parts", no_parts)); + + part_info->part_result_type= INT_RESULT; + part_info->range_int_array= + (longlong*)sql_alloc(no_parts * sizeof(longlong)); + if (unlikely(part_info->range_int_array == NULL)) + { + my_error(ER_OUTOFMEMORY, MYF(0), no_parts*sizeof(longlong)); + goto end; + } + i= 0; + do + { + part_def= it++; + if ((i != (no_parts - 1)) || !part_info->defined_max_value) + part_range_value_int= part_def->range_value; + else + part_range_value_int= LONGLONG_MAX; + if (likely(current_largest_int < part_range_value_int)) + { + current_largest_int= part_range_value_int; + part_info->range_int_array[i]= part_range_value_int; + } + else + { + my_error(ER_RANGE_NOT_INCREASING_ERROR, MYF(0)); + goto end; + } + } while (++i < no_parts); + result= FALSE; +end: + DBUG_RETURN(result); +} + + +/* + A support routine for check_list_constants used by qsort to sort the + constant list expressions. + SYNOPSIS + list_part_cmp() + a First list constant to compare with + b Second list constant to compare with + RETURN VALUE + +1 a > b + 0 a == b + -1 a < b +*/ + +static int list_part_cmp(const void* a, const void* b) +{ + longlong a1, b1; + a1= ((LIST_PART_ENTRY*)a)->list_value; + b1= ((LIST_PART_ENTRY*)b)->list_value; + if (a1 < b1) + return -1; + else if (a1 > b1) + return +1; + else + return 0; +} + + +/* + This routine allocates an array for all list constants to achieve a fast + check what partition a certain value belongs to. At the same time it does + also check that there are no duplicates among the list constants and that + that the list expressions are constant integer expressions. + SYNOPSIS + check_list_constants() + part_info + RETURN VALUE + TRUE An error occurred during creation of list constants + FALSE Successful creation of list constant mapping + DESCRIPTION + This routine is called from check_partition_info to get a quick error + before we came too far into the CREATE TABLE process. It is also called + from fix_partition_func every time we open the .frm file. It is only + called for LIST PARTITIONed tables. +*/ + +static bool check_list_constants(partition_info *part_info) +{ + uint i, no_list_values= 0, no_parts, list_index= 0; + longlong *list_value; + bool not_first, result= TRUE; + longlong curr_value, prev_value; + partition_element* part_def; + List_iterator<partition_element> list_func_it(part_info->partitions); + DBUG_ENTER("check_list_constants"); + + part_info->part_result_type= INT_RESULT; + + /* + We begin by calculating the number of list values that have been + defined in the first step. + + We use this number to allocate a properly sized array of structs + to keep the partition id and the value to use in that partition. + In the second traversal we assign them values in the struct array. + + Finally we sort the array of structs in order of values to enable + a quick binary search for the proper value to discover the + partition id. + After sorting the array we check that there are no duplicates in the + list. + */ + + no_parts= part_info->no_parts; + i= 0; + do + { + part_def= list_func_it++; + List_iterator<longlong> list_val_it1(part_def->list_val_list); + while (list_val_it1++) + no_list_values++; + } while (++i < no_parts); + list_func_it.rewind(); + part_info->no_list_values= no_list_values; + part_info->list_array= + (LIST_PART_ENTRY*)sql_alloc(no_list_values*sizeof(LIST_PART_ENTRY)); + if (unlikely(part_info->list_array == NULL)) + { + my_error(ER_OUTOFMEMORY, MYF(0), no_list_values*sizeof(LIST_PART_ENTRY)); + goto end; + } + + i= 0; + do + { + part_def= list_func_it++; + List_iterator<longlong> list_val_it2(part_def->list_val_list); + while ((list_value= list_val_it2++)) + { + part_info->list_array[list_index].list_value= *list_value; + part_info->list_array[list_index++].partition_id= i; + } + } while (++i < no_parts); + + qsort((void*)part_info->list_array, no_list_values, + sizeof(LIST_PART_ENTRY), &list_part_cmp); + + not_first= FALSE; + i= prev_value= 0; //prev_value initialised to quiet compiler + do + { + curr_value= part_info->list_array[i].list_value; + if (likely(!not_first || prev_value != curr_value)) + { + prev_value= curr_value; + not_first= TRUE; + } + else + { + my_error(ER_MULTIPLE_DEF_CONST_IN_LIST_PART_ERROR, MYF(0)); + goto end; + } + } while (++i < no_list_values); + result= FALSE; +end: + DBUG_RETURN(result); +} + + +/* + Create a memory area where default partition names are stored and fill it + up with the names. + SYNOPSIS + create_default_partition_names() + no_parts Number of partitions + subpart Is it subpartitions + RETURN VALUE + A pointer to the memory area of the default partition names + DESCRIPTION + A support routine for the partition code where default values are + generated. + The external routine needing this code is check_partition_info +*/ + +#define MAX_PART_NAME_SIZE 8 + +static char *create_default_partition_names(uint no_parts, uint start_no, + bool subpart) +{ + char *ptr= sql_calloc(no_parts*MAX_PART_NAME_SIZE); + char *move_ptr= ptr; + uint i= 0; + DBUG_ENTER("create_default_partition_names"); + if (likely(ptr != 0)) + { + do + { + if (subpart) + my_sprintf(move_ptr, (move_ptr,"sp%u", (start_no + i))); + else + my_sprintf(move_ptr, (move_ptr,"p%u", (start_no + i))); + move_ptr+=MAX_PART_NAME_SIZE; + } while (++i < no_parts); + } + else + { + my_error(ER_OUTOFMEMORY, MYF(0), no_parts*MAX_PART_NAME_SIZE); + } + DBUG_RETURN(ptr); +} + + +/* + Set up all the default partitions not set-up by the user in the SQL + statement. Also perform a number of checks that the user hasn't tried + to use default values where no defaults exists. + SYNOPSIS + set_up_default_partitions() + part_info The reference to all partition information + file A reference to a handler of the table + max_rows Maximum number of rows stored in the table + RETURN VALUE + TRUE Error, attempted default values not possible + FALSE Ok, default partitions set-up + DESCRIPTION + The routine uses the underlying handler of the partitioning to define + the default number of partitions. For some handlers this requires + knowledge of the maximum number of rows to be stored in the table. + This routine only accepts HASH and KEY partitioning and thus there is + no subpartitioning if this routine is successful. + The external routine needing this code is check_partition_info +*/ + +static bool set_up_default_partitions(partition_info *part_info, + handler *file, ulonglong max_rows, + uint start_no) +{ + uint no_parts, i; + char *default_name; + bool result= TRUE; + DBUG_ENTER("set_up_default_partitions"); + + if (part_info->part_type != HASH_PARTITION) + { + const char *error_string; + if (part_info->part_type == RANGE_PARTITION) + error_string= range_str; + else + error_string= list_str; + my_error(ER_PARTITIONS_MUST_BE_DEFINED_ERROR, MYF(0), error_string); + goto end; + } + if (part_info->no_parts == 0) + part_info->no_parts= file->get_default_no_partitions(max_rows); + no_parts= part_info->no_parts; + part_info->use_default_partitions= FALSE; + if (unlikely(no_parts > MAX_PARTITIONS)) + { + my_error(ER_TOO_MANY_PARTITIONS_ERROR, MYF(0)); + goto end; + } + if (unlikely((!(default_name= create_default_partition_names(no_parts, + start_no, + FALSE))))) + goto end; + i= 0; + do + { + partition_element *part_elem= new partition_element(); + if (likely(part_elem != 0)) + { + part_elem->engine_type= NULL; + part_elem->partition_name= default_name; + default_name+=MAX_PART_NAME_SIZE; + part_info->partitions.push_back(part_elem); + } + else + { + my_error(ER_OUTOFMEMORY, MYF(0), sizeof(partition_element)); + goto end; + } + } while (++i < no_parts); + result= FALSE; +end: + DBUG_RETURN(result); +} + + +/* + Set up all the default subpartitions not set-up by the user in the SQL + statement. Also perform a number of checks that the default partitioning + becomes an allowed partitioning scheme. + SYNOPSIS + set_up_default_subpartitions() + part_info The reference to all partition information + file A reference to a handler of the table + max_rows Maximum number of rows stored in the table + RETURN VALUE + TRUE Error, attempted default values not possible + FALSE Ok, default partitions set-up + DESCRIPTION + The routine uses the underlying handler of the partitioning to define + the default number of partitions. For some handlers this requires + knowledge of the maximum number of rows to be stored in the table. + This routine is only called for RANGE or LIST partitioning and those + need to be specified so only subpartitions are specified. + The external routine needing this code is check_partition_info +*/ + +static bool set_up_default_subpartitions(partition_info *part_info, + handler *file, ulonglong max_rows) +{ + uint i, j, no_parts, no_subparts; + char *default_name, *name_ptr; + bool result= TRUE; + partition_element *part_elem; + List_iterator<partition_element> part_it(part_info->partitions); + DBUG_ENTER("set_up_default_subpartitions"); + + if (part_info->no_subparts == 0) + part_info->no_subparts= file->get_default_no_partitions(max_rows); + no_parts= part_info->no_parts; + no_subparts= part_info->no_subparts; + part_info->use_default_subpartitions= FALSE; + if (unlikely((no_parts * no_subparts) > MAX_PARTITIONS)) + { + my_error(ER_TOO_MANY_PARTITIONS_ERROR, MYF(0)); + goto end; + } + if (unlikely((!(default_name= + create_default_partition_names(no_subparts, (uint)0, TRUE))))) + goto end; + i= 0; + do + { + part_elem= part_it++; + j= 0; + name_ptr= default_name; + do + { + partition_element *subpart_elem= new partition_element(); + if (likely(subpart_elem != 0)) + { + subpart_elem->engine_type= NULL; + subpart_elem->partition_name= name_ptr; + name_ptr+= MAX_PART_NAME_SIZE; + part_elem->subpartitions.push_back(subpart_elem); + } + else + { + my_error(ER_OUTOFMEMORY, MYF(0), sizeof(partition_element)); + goto end; + } + } while (++j < no_subparts); + } while (++i < no_parts); + result= FALSE; +end: + DBUG_RETURN(result); +} + + +/* + Set up defaults for partition or subpartition (cannot set-up for both, + this will return an error. + SYNOPSIS + set_up_defaults_for_partitioning() + part_info The reference to all partition information + file A reference to a handler of the table + max_rows Maximum number of rows stored in the table + RETURN VALUE + TRUE Error, attempted default values not possible + FALSE Ok, default partitions set-up + DESCRIPTION + Support routine for check_partition_info +*/ + +bool set_up_defaults_for_partitioning(partition_info *part_info, + handler *file, + ulonglong max_rows, uint start_no) +{ + DBUG_ENTER("set_up_defaults_for_partitioning"); + + if (part_info->use_default_partitions) + DBUG_RETURN(set_up_default_partitions(part_info, file, max_rows, + start_no)); + if (is_sub_partitioned(part_info) && part_info->use_default_subpartitions) + DBUG_RETURN(set_up_default_subpartitions(part_info, file, max_rows)); + DBUG_RETURN(FALSE); +} + + +/* + Check that all partitions use the same storage engine. + This is currently a limitation in this version. + SYNOPSIS + check_engine_mix() + engine_array An array of engine identifiers + no_parts Total number of partitions + RETURN VALUE + TRUE Error, mixed engines + FALSE Ok, no mixed engines +*/ + +static bool check_engine_mix(handlerton **engine_array, uint no_parts) +{ + /* + Current check verifies only that all handlers are the same. + Later this check will be more sophisticated. + */ + uint i= 0; + bool result= FALSE; + DBUG_ENTER("check_engine_mix"); + + do + { + if (engine_array[i] != engine_array[0]) + { + result= TRUE; + break; + } + } while (++i < no_parts); + DBUG_RETURN(result); +} + + +/* + We will check that the partition info requested is possible to set-up in + this version. This routine is an extension of the parser one could say. + If defaults were used we will generate default data structures for all + partitions. + SYNOPSIS + check_partition_info() + part_info The reference to all partition information + db_type Default storage engine if no engine specified per + partition. + file A reference to a handler of the table + max_rows Maximum number of rows stored in the table + RETURN VALUE + TRUE Error, something went wrong + FALSE Ok, full partition data structures are now generated + DESCRIPTION + This code is used early in the CREATE TABLE and ALTER TABLE process. +*/ + +bool check_partition_info(partition_info *part_info,handlerton *eng_type, + handler *file, ulonglong max_rows) +{ + handlerton **engine_array= NULL; + uint part_count= 0, i, no_parts, tot_partitions; + bool result= TRUE; + List_iterator<partition_element> part_it(part_info->partitions); + DBUG_ENTER("check_partition_info"); + + if (unlikely(is_sub_partitioned(part_info) && + (!(part_info->part_type == RANGE_PARTITION || + part_info->part_type == LIST_PARTITION)))) + { + /* Only RANGE and LIST partitioning can be subpartitioned */ + my_error(ER_SUBPARTITION_ERROR, MYF(0)); + goto end; + } + if (unlikely(set_up_defaults_for_partitioning(part_info, file, + max_rows, (uint)0))) + goto end; + tot_partitions= get_tot_partitions(part_info); + if (unlikely(tot_partitions > MAX_PARTITIONS)) + { + my_error(ER_TOO_MANY_PARTITIONS_ERROR, MYF(0)); + goto end; + } + if (unlikely(is_partitions_in_table(part_info, part_info))) + { + my_error(ER_SAME_NAME_PARTITION, MYF(0)); + goto end; + } + engine_array= (handlerton**)my_malloc(tot_partitions * sizeof(handlerton *), + MYF(MY_WME)); + if (unlikely(!engine_array)) + goto end; + i= 0; + no_parts= part_info->no_parts; + do + { + partition_element *part_elem= part_it++; + if (!is_sub_partitioned(part_info)) + { + if (part_elem->engine_type == NULL) + part_elem->engine_type= eng_type; + DBUG_PRINT("info", ("engine = %s", part_elem->engine_type->name)); + engine_array[part_count++]= part_elem->engine_type; + } + else + { + uint j= 0, no_subparts= part_info->no_subparts;; + List_iterator<partition_element> sub_it(part_elem->subpartitions); + do + { + part_elem= sub_it++; + if (part_elem->engine_type == NULL) + part_elem->engine_type= eng_type; + DBUG_PRINT("info", ("engine = %s", part_elem->engine_type->name)); + engine_array[part_count++]= part_elem->engine_type; + } while (++j < no_subparts); + } + } while (++i < part_info->no_parts); + if (unlikely(check_engine_mix(engine_array, part_count))) + { + my_error(ER_MIX_HANDLER_ERROR, MYF(0)); + goto end; + } + + /* + We need to check all constant expressions that they are of the correct + type and that they are increasing for ranges and not overlapping for + list constants. + */ + + if (unlikely((part_info->part_type == RANGE_PARTITION && + check_range_constants(part_info)) || + (part_info->part_type == LIST_PARTITION && + check_list_constants(part_info)))) + goto end; + result= FALSE; +end: + my_free((char*)engine_array,MYF(MY_ALLOW_ZERO_PTR)); + DBUG_RETURN(result); +} + + +/* + A great number of functions below here is part of the fix_partition_func + method. It is used to set up the partition structures for execution from + openfrm. It is called at the end of the openfrm when the table struct has + been set-up apart from the partition information. + It involves: + 1) Setting arrays of fields for the partition functions. + 2) Setting up binary search array for LIST partitioning + 3) Setting up array for binary search for RANGE partitioning + 4) Setting up key_map's to assist in quick evaluation whether one + can deduce anything from a given index of what partition to use + 5) Checking whether a set of partitions can be derived from a range on + a field in the partition function. + As part of doing this there is also a great number of error controls. + This is actually the place where most of the things are checked for + partition information when creating a table. + Things that are checked includes + 1) No NULLable fields in partition function + 2) All fields of partition function in Primary keys and unique indexes + (if not supported) + 3) No fields in partition function that are BLOB's or VARCHAR with a + collation other than the binary collation. + + + + Create an array of partition fields (NULL terminated). Before this method + is called fix_fields or find_table_in_sef has been called to set + GET_FIXED_FIELDS_FLAG on all fields that are part of the partition + function. + SYNOPSIS + set_up_field_array() + table TABLE object for which partition fields are set-up + sub_part Is the table subpartitioned as well + RETURN VALUE + TRUE Error, some field didn't meet requirements + FALSE Ok, partition field array set-up + DESCRIPTION + This method is used to set-up both partition and subpartitioning + field array and used for all types of partitioning. + It is part of the logic around fix_partition_func. +*/ +static bool set_up_field_array(TABLE *table, + bool sub_part) +{ + Field **ptr, *field, **field_array; + uint no_fields= 0, size_field_array, i= 0; + partition_info *part_info= table->part_info; + int result= FALSE; + DBUG_ENTER("set_up_field_array"); + + ptr= table->field; + while ((field= *(ptr++))) + { + if (field->flags & GET_FIXED_FIELDS_FLAG) + no_fields++; + } + size_field_array= (no_fields+1)*sizeof(Field*); + field_array= (Field**)sql_alloc(size_field_array); + if (unlikely(!field_array)) + { + my_error(ER_OUTOFMEMORY, MYF(0), size_field_array); + result= TRUE; + } + ptr= table->field; + while ((field= *(ptr++))) + { + if (field->flags & GET_FIXED_FIELDS_FLAG) + { + field->flags&= ~GET_FIXED_FIELDS_FLAG; + field->flags|= FIELD_IN_PART_FUNC_FLAG; + if (likely(!result)) + { + field_array[i++]= field; + + /* + We check that the fields are proper. It is required for each + field in a partition function to: + 1) Not be a BLOB of any type + A BLOB takes too long time to evaluate so we don't want it for + performance reasons. + 2) Not be a VARCHAR other than VARCHAR with a binary collation + A VARCHAR with character sets can have several values being + equal with different number of spaces or NULL's. This is not a + good ground for a safe and exact partition function. Thus it is + not allowed in partition functions. + */ + + if (unlikely(field->flags & BLOB_FLAG)) + { + my_error(ER_BLOB_FIELD_IN_PART_FUNC_ERROR, MYF(0)); + result= TRUE; + } + else if (unlikely((!field->flags & BINARY_FLAG) && + field->real_type() == MYSQL_TYPE_VARCHAR)) + { + my_error(ER_CHAR_SET_IN_PART_FIELD_ERROR, MYF(0)); + result= TRUE; + } + } + } + } + field_array[no_fields]= 0; + if (!sub_part) + { + part_info->part_field_array= field_array; + part_info->no_part_fields= no_fields; + } + else + { + part_info->subpart_field_array= field_array; + part_info->no_subpart_fields= no_fields; + } + DBUG_RETURN(result); +} + + +/* + Create a field array including all fields of both the partitioning and the + subpartitioning functions. + SYNOPSIS + create_full_part_field_array() + table TABLE object for which partition fields are set-up + part_info Reference to partitioning data structure + RETURN VALUE + TRUE Memory allocation of field array failed + FALSE Ok + DESCRIPTION + If there is no subpartitioning then the same array is used as for the + partitioning. Otherwise a new array is built up using the flag + FIELD_IN_PART_FUNC in the field object. + This function is called from fix_partition_func +*/ + +static bool create_full_part_field_array(TABLE *table, + partition_info *part_info) +{ + bool result= FALSE; + DBUG_ENTER("create_full_part_field_array"); + + if (!is_sub_partitioned(part_info)) + { + part_info->full_part_field_array= part_info->part_field_array; + part_info->no_full_part_fields= part_info->no_part_fields; + } + else + { + Field **ptr, *field, **field_array; + uint no_part_fields=0, size_field_array; + ptr= table->field; + while ((field= *(ptr++))) + { + if (field->flags & FIELD_IN_PART_FUNC_FLAG) + no_part_fields++; + } + size_field_array= (no_part_fields+1)*sizeof(Field*); + field_array= (Field**)sql_alloc(size_field_array); + if (unlikely(!field_array)) + { + my_error(ER_OUTOFMEMORY, MYF(0), size_field_array); + result= TRUE; + goto end; + } + no_part_fields= 0; + ptr= table->field; + while ((field= *(ptr++))) + { + if (field->flags & FIELD_IN_PART_FUNC_FLAG) + field_array[no_part_fields++]= field; + } + field_array[no_part_fields]=0; + part_info->full_part_field_array= field_array; + part_info->no_full_part_fields= no_part_fields; + } +end: + DBUG_RETURN(result); +} + + +/* + These support routines is used to set/reset an indicator of all fields + in a certain key. It is used in conjunction with another support routine + that traverse all fields in the PF to find if all or some fields in the + PF is part of the key. This is used to check primary keys and unique + keys involve all fields in PF (unless supported) and to derive the + key_map's used to quickly decide whether the index can be used to + derive which partitions are needed to scan. + + + + Clear flag GET_FIXED_FIELDS_FLAG in all fields of a key previously set by + set_indicator_in_key_fields (always used in pairs). + SYNOPSIS + clear_indicator_in_key_fields() + key_info Reference to find the key fields +*/ + +static void clear_indicator_in_key_fields(KEY *key_info) +{ + KEY_PART_INFO *key_part; + uint key_parts= key_info->key_parts, i; + for (i= 0, key_part=key_info->key_part; i < key_parts; i++, key_part++) + key_part->field->flags&= (~GET_FIXED_FIELDS_FLAG); +} + + +/* + Set flag GET_FIXED_FIELDS_FLAG in all fields of a key. + SYNOPSIS + set_indicator_in_key_fields + key_info Reference to find the key fields +*/ + +static void set_indicator_in_key_fields(KEY *key_info) +{ + KEY_PART_INFO *key_part; + uint key_parts= key_info->key_parts, i; + for (i= 0, key_part=key_info->key_part; i < key_parts; i++, key_part++) + key_part->field->flags|= GET_FIXED_FIELDS_FLAG; +} + + +/* + Check if all or some fields in partition field array is part of a key + previously used to tag key fields. + SYNOPSIS + check_fields_in_PF() + ptr Partition field array + all_fields Is all fields of partition field array used in key + some_fields Is some fields of partition field array used in key + RETURN VALUE + all_fields, some_fields +*/ + +static void check_fields_in_PF(Field **ptr, bool *all_fields, + bool *some_fields) +{ + DBUG_ENTER("check_fields_in_PF"); + *all_fields= TRUE; + *some_fields= FALSE; + do + { + /* Check if the field of the PF is part of the current key investigated */ + if ((*ptr)->flags & GET_FIXED_FIELDS_FLAG) + *some_fields= TRUE; + else + *all_fields= FALSE; + } while (*(++ptr)); + DBUG_VOID_RETURN; +} + + +/* + Clear flag GET_FIXED_FIELDS_FLAG in all fields of the table. + This routine is used for error handling purposes. + SYNOPSIS + clear_field_flag() + table TABLE object for which partition fields are set-up +*/ + +static void clear_field_flag(TABLE *table) +{ + Field **ptr; + DBUG_ENTER("clear_field_flag"); + + for (ptr= table->field; *ptr; ptr++) + (*ptr)->flags&= (~GET_FIXED_FIELDS_FLAG); + DBUG_VOID_RETURN; +} + + +/* + This routine sets-up the partition field array for KEY partitioning, it + also verifies that all fields in the list of fields is actually a part of + the table. + SYNOPSIS + handle_list_of_fields() + it A list of field names for the partition function + table TABLE object for which partition fields are set-up + part_info Reference to partitioning data structure + sub_part Is the table subpartitioned as well + RETURN VALUE + TRUE Fields in list of fields not part of table + FALSE All fields ok and array created + DESCRIPTION + find_field_in_table_sef finds the field given its name. All fields get + GET_FIXED_FIELDS_FLAG set. +*/ + +static bool handle_list_of_fields(List_iterator<char> it, + TABLE *table, + partition_info *part_info, + bool sub_part) +{ + Field *field; + bool result; + char *field_name; + DBUG_ENTER("handle_list_of_fields"); + + while ((field_name= it++)) + { + field= find_field_in_table_sef(table, field_name); + if (likely(field != 0)) + field->flags|= GET_FIXED_FIELDS_FLAG; + else + { + my_error(ER_FIELD_NOT_FOUND_PART_ERROR, MYF(0)); + clear_field_flag(table); + result= TRUE; + goto end; + } + } + result= set_up_field_array(table, sub_part); +end: + DBUG_RETURN(result); +} + + +/* + This function is used to build an array of partition fields for the + partitioning function and subpartitioning function. The partitioning + function is an item tree that must reference at least one field in the + table. This is checked first in the parser that the function doesn't + contain non-cacheable parts (like a random function) and by checking + here that the function isn't a constant function. + SYNOPSIS + fix_fields_part_func() + thd The thread object + tables A list of one table, the partitioned table + func_expr The item tree reference of the partition function + part_info Reference to partitioning data structure + sub_part Is the table subpartitioned as well + RETURN VALUE + TRUE An error occurred, something was wrong with the + partition function. + FALSE Ok, a partition field array was created + DESCRIPTION + The function uses a new feature in fix_fields where the flag + GET_FIXED_FIELDS_FLAG is set for all fields in the item tree. + This field must always be reset before returning from the function + since it is used for other purposes as well. +*/ + +static bool fix_fields_part_func(THD *thd, TABLE_LIST *tables, + Item* func_expr, partition_info *part_info, + bool sub_part) +{ + /* + Calculate the number of fields in the partition function. + Use it allocate memory for array of Field pointers. + Initialise array of field pointers. Use information set when + calling fix_fields and reset it immediately after. + The get_fields_in_item_tree activates setting of bit in flags + on the field object. + */ + + bool result= TRUE; + TABLE *table= tables->table; + TABLE_LIST *save_table_list, *save_first_table, *save_last_table; + int error; + Name_resolution_context *context; + DBUG_ENTER("fix_fields_part_func"); + + context= thd->lex->current_context(); + table->map= 1; //To ensure correct calculation of const item + table->get_fields_in_item_tree= TRUE; + save_table_list= context->table_list; + save_first_table= context->first_name_resolution_table; + save_last_table= context->last_name_resolution_table; + context->table_list= tables; + context->first_name_resolution_table= tables; + context->last_name_resolution_table= NULL; + func_expr->walk(&Item::change_context_processor, (byte*) context); + thd->where= "partition function"; + error= func_expr->fix_fields(thd, (Item**)0); + context->table_list= save_table_list; + context->first_name_resolution_table= save_first_table; + context->last_name_resolution_table= save_last_table; + if (unlikely(error)) + { + DBUG_PRINT("info", ("Field in partition function not part of table")); + clear_field_flag(table); + goto end; + } + if (unlikely(func_expr->const_item())) + { + my_error(ER_CONST_EXPR_IN_PARTITION_FUNC_ERROR, MYF(0)); + clear_field_flag(table); + goto end; + } + result= set_up_field_array(table, sub_part); +end: + table->get_fields_in_item_tree= FALSE; + table->map= 0; //Restore old value + DBUG_RETURN(result); +} + + +/* + This function verifies that if there is a primary key that it contains + all the fields of the partition function. + This is a temporary limitation that will hopefully be removed after a + while. + SYNOPSIS + check_primary_key() + table TABLE object for which partition fields are set-up + RETURN VALUES + TRUE Not all fields in partitioning function was part + of primary key + FALSE Ok, all fields of partitioning function were part + of primary key +*/ + +static bool check_primary_key(TABLE *table) +{ + uint primary_key= table->s->primary_key; + bool all_fields, some_fields, result= FALSE; + DBUG_ENTER("check_primary_key"); + + if (primary_key < MAX_KEY) + { + set_indicator_in_key_fields(table->key_info+primary_key); + check_fields_in_PF(table->part_info->full_part_field_array, + &all_fields, &some_fields); + clear_indicator_in_key_fields(table->key_info+primary_key); + if (unlikely(!all_fields)) + { + my_error(ER_UNIQUE_KEY_NEED_ALL_FIELDS_IN_PF,MYF(0),"PRIMARY KEY"); + result= TRUE; + } + } + DBUG_RETURN(result); +} + + +/* + This function verifies that if there is a unique index that it contains + all the fields of the partition function. + This is a temporary limitation that will hopefully be removed after a + while. + SYNOPSIS + check_unique_keys() + table TABLE object for which partition fields are set-up + RETURN VALUES + TRUE Not all fields in partitioning function was part + of all unique keys + FALSE Ok, all fields of partitioning function were part + of unique keys +*/ + +static bool check_unique_keys(TABLE *table) +{ + bool all_fields, some_fields, result= FALSE; + uint keys= table->s->keys, i; + DBUG_ENTER("check_unique_keys"); + for (i= 0; i < keys; i++) + { + if (table->key_info[i].flags & HA_NOSAME) //Unique index + { + set_indicator_in_key_fields(table->key_info+i); + check_fields_in_PF(table->part_info->full_part_field_array, + &all_fields, &some_fields); + clear_indicator_in_key_fields(table->key_info+i); + if (unlikely(!all_fields)) + { + my_error(ER_UNIQUE_KEY_NEED_ALL_FIELDS_IN_PF,MYF(0),"UNIQUE INDEX"); + result= TRUE; + break; + } + } + } + DBUG_RETURN(result); +} + + +/* + An important optimisation is whether a range on a field can select a subset + of the partitions. + A prerequisite for this to happen is that the PF is a growing function OR + a shrinking function. + This can never happen for a multi-dimensional PF. Thus this can only happen + with PF with at most one field involved in the PF. + The idea is that if the function is a growing function and you know that + the field of the PF is 4 <= A <= 6 then we can convert this to a range + in the PF instead by setting the range to PF(4) <= PF(A) <= PF(6). In the + case of RANGE PARTITIONING and LIST PARTITIONING this can be used to + calculate a set of partitions rather than scanning all of them. + Thus the following prerequisites are there to check if sets of partitions + can be found. + 1) Only possible for RANGE and LIST partitioning (not for subpartitioning) + 2) Only possible if PF only contains 1 field + 3) Possible if PF is a growing function of the field + 4) Possible if PF is a shrinking function of the field + OBSERVATION: + 1) IF f1(A) is a growing function AND f2(A) is a growing function THEN + f1(A) + f2(A) is a growing function + f1(A) * f2(A) is a growing function if f1(A) >= 0 and f2(A) >= 0 + 2) IF f1(A) is a growing function and f2(A) is a shrinking function THEN + f1(A) / f2(A) is a growing function if f1(A) >= 0 and f2(A) > 0 + 3) IF A is a growing function then a function f(A) that removes the + least significant portion of A is a growing function + E.g. DATE(datetime) is a growing function + MONTH(datetime) is not a growing/shrinking function + 4) IF f1(A) is a growing function and f2(A) is a growing function THEN + f1(f2(A)) and f2(f1(A)) are also growing functions + 5) IF f1(A) is a shrinking function and f2(A) is a growing function THEN + f1(f2(A)) is a shrinking function and f2(f1(A)) is a shrinking function + 6) f1(A) = A is a growing function + 7) f1(A) = A*a + b (where a and b are constants) is a growing function + + By analysing the item tree of the PF we can use these deducements and + derive whether the PF is a growing function or a shrinking function or + neither of it. + + If the PF is range capable then a flag is set on the table object + indicating this to notify that we can use also ranges on the field + of the PF to deduce a set of partitions if the fields of the PF were + not all fully bound. + SYNOPSIS + check_range_capable_PF() + table TABLE object for which partition fields are set-up + DESCRIPTION + Support for this is not implemented yet. +*/ + +void check_range_capable_PF(TABLE *table) +{ + DBUG_ENTER("check_range_capable_PF"); + DBUG_VOID_RETURN; +} + + +/* + Set up partition key maps + SYNOPSIS + set_up_partition_key_maps() + table TABLE object for which partition fields are set-up + part_info Reference to partitioning data structure + RETURN VALUES + None + DESCRIPTION + This function sets up a couple of key maps to be able to quickly check + if an index ever can be used to deduce the partition fields or even + a part of the fields of the partition function. + We set up the following key_map's. + PF = Partition Function + 1) All fields of the PF is set even by equal on the first fields in the + key + 2) All fields of the PF is set if all fields of the key is set + 3) At least one field in the PF is set if all fields is set + 4) At least one field in the PF is part of the key +*/ + +static void set_up_partition_key_maps(TABLE *table, + partition_info *part_info) +{ + uint keys= table->s->keys, i; + bool all_fields, some_fields; + DBUG_ENTER("set_up_partition_key_maps"); + + part_info->all_fields_in_PF.clear_all(); + part_info->all_fields_in_PPF.clear_all(); + part_info->all_fields_in_SPF.clear_all(); + part_info->some_fields_in_PF.clear_all(); + for (i= 0; i < keys; i++) + { + set_indicator_in_key_fields(table->key_info+i); + check_fields_in_PF(part_info->full_part_field_array, + &all_fields, &some_fields); + if (all_fields) + part_info->all_fields_in_PF.set_bit(i); + if (some_fields) + part_info->some_fields_in_PF.set_bit(i); + if (is_sub_partitioned(part_info)) + { + check_fields_in_PF(part_info->part_field_array, + &all_fields, &some_fields); + if (all_fields) + part_info->all_fields_in_PPF.set_bit(i); + check_fields_in_PF(part_info->subpart_field_array, + &all_fields, &some_fields); + if (all_fields) + part_info->all_fields_in_SPF.set_bit(i); + } + clear_indicator_in_key_fields(table->key_info+i); + } + DBUG_VOID_RETURN; +} + + +/* + Set-up all function pointers for calculation of partition id, + subpartition id and the upper part in subpartitioning. This is to speed up + execution of get_partition_id which is executed once every record to be + written and deleted and twice for updates. + SYNOPSIS + set_up_partition_function_pointers() + part_info Reference to partitioning data structure +*/ + +static void set_up_partition_func_pointers(partition_info *part_info) +{ + if (is_sub_partitioned(part_info)) + { + if (part_info->part_type == RANGE_PARTITION) + { + part_info->get_part_partition_id= get_partition_id_range; + if (part_info->list_of_subpart_fields) + { + if (part_info->linear_hash_ind) + { + part_info->get_partition_id= get_partition_id_range_sub_linear_key; + part_info->get_subpartition_id= get_partition_id_linear_key_sub; + } + else + { + part_info->get_partition_id= get_partition_id_range_sub_key; + part_info->get_subpartition_id= get_partition_id_key_sub; + } + } + else + { + if (part_info->linear_hash_ind) + { + part_info->get_partition_id= get_partition_id_range_sub_linear_hash; + part_info->get_subpartition_id= get_partition_id_linear_hash_sub; + } + else + { + part_info->get_partition_id= get_partition_id_range_sub_hash; + part_info->get_subpartition_id= get_partition_id_hash_sub; + } + } + } + else //LIST Partitioning + { + part_info->get_part_partition_id= get_partition_id_list; + if (part_info->list_of_subpart_fields) + { + if (part_info->linear_hash_ind) + { + part_info->get_partition_id= get_partition_id_list_sub_linear_key; + part_info->get_subpartition_id= get_partition_id_linear_key_sub; + } + else + { + part_info->get_partition_id= get_partition_id_list_sub_key; + part_info->get_subpartition_id= get_partition_id_key_sub; + } + } + else + { + if (part_info->linear_hash_ind) + { + part_info->get_partition_id= get_partition_id_list_sub_linear_hash; + part_info->get_subpartition_id= get_partition_id_linear_hash_sub; + } + else + { + part_info->get_partition_id= get_partition_id_list_sub_hash; + part_info->get_subpartition_id= get_partition_id_hash_sub; + } + } + } + } + else //No subpartitioning + { + part_info->get_part_partition_id= NULL; + part_info->get_subpartition_id= NULL; + if (part_info->part_type == RANGE_PARTITION) + part_info->get_partition_id= get_partition_id_range; + else if (part_info->part_type == LIST_PARTITION) + part_info->get_partition_id= get_partition_id_list; + else //HASH partitioning + { + if (part_info->list_of_part_fields) + { + if (part_info->linear_hash_ind) + part_info->get_partition_id= get_partition_id_linear_key_nosub; + else + part_info->get_partition_id= get_partition_id_key_nosub; + } + else + { + if (part_info->linear_hash_ind) + part_info->get_partition_id= get_partition_id_linear_hash_nosub; + else + part_info->get_partition_id= get_partition_id_hash_nosub; + } + } + } +} + + +/* + For linear hashing we need a mask which is on the form 2**n - 1 where + 2**n >= no_parts. Thus if no_parts is 6 then mask is 2**3 - 1 = 8 - 1 = 7. + SYNOPSIS + set_linear_hash_mask() + part_info Reference to partitioning data structure + no_parts Number of parts in linear hash partitioning +*/ + +static void set_linear_hash_mask(partition_info *part_info, uint no_parts) +{ + uint mask; + for (mask= 1; mask < no_parts; mask<<=1) + ; + part_info->linear_hash_mask= mask - 1; +} + + +/* + This function calculates the partition id provided the result of the hash + function using linear hashing parameters, mask and number of partitions. + SYNOPSIS + get_part_id_from_linear_hash() + hash_value Hash value calculated by HASH function or KEY function + mask Mask calculated previously by set_linear_hash_mask + no_parts Number of partitions in HASH partitioned part + RETURN VALUE + part_id The calculated partition identity (starting at 0) + DESCRIPTION + The partition is calculated according to the theory of linear hashing. + See e.g. Linear hashing: a new tool for file and table addressing, + Reprinted from VLDB-80 in Readings Database Systems, 2nd ed, M. Stonebraker + (ed.), Morgan Kaufmann 1994. +*/ + +static uint32 get_part_id_from_linear_hash(longlong hash_value, uint mask, + uint no_parts) +{ + uint32 part_id= (uint32)(hash_value & mask); + if (part_id >= no_parts) + { + uint new_mask= ((mask + 1) >> 1) - 1; + part_id= hash_value & new_mask; + } + return part_id; +} + +/* + fix partition functions + + SYNOPSIS + fix_partition_func() + thd The thread object + name The name of the partitioned table + table TABLE object for which partition fields are set-up + + RETURN VALUE + TRUE + FALSE + + DESCRIPTION + The name parameter contains the full table name and is used to get the + database name of the table which is used to set-up a correct + TABLE_LIST object for use in fix_fields. + +NOTES + This function is called as part of opening the table by opening the .frm + file. It is a part of CREATE TABLE to do this so it is quite permissible + that errors due to erroneus syntax isn't found until we come here. + If the user has used a non-existing field in the table is one such example + of an error that is not discovered until here. +*/ + +bool fix_partition_func(THD *thd, const char *name, TABLE *table) +{ + bool result= TRUE; + uint dir_length, home_dir_length; + TABLE_LIST tables; + TABLE_SHARE *share= table->s; + char db_name_string[FN_REFLEN]; + char* db_name; + partition_info *part_info= table->part_info; + ulong save_set_query_id= thd->set_query_id; + DBUG_ENTER("fix_partition_func"); + + thd->set_query_id= 0; + /* + Set-up the TABLE_LIST object to be a list with a single table + Set the object to zero to create NULL pointers and set alias + and real name to table name and get database name from file name. + */ + + bzero((void*)&tables, sizeof(TABLE_LIST)); + tables.alias= tables.table_name= (char*) share->table_name.str; + tables.table= table; + tables.next_local= 0; + tables.next_name_resolution_table= 0; + strmov(db_name_string, name); + dir_length= dirname_length(db_name_string); + db_name_string[dir_length - 1]= 0; + home_dir_length= dirname_length(db_name_string); + db_name= &db_name_string[home_dir_length]; + tables.db= db_name; + + if (is_sub_partitioned(part_info)) + { + DBUG_ASSERT(part_info->subpart_type == HASH_PARTITION); + /* + Subpartition is defined. We need to verify that subpartitioning + function is correct. + */ + if (part_info->linear_hash_ind) + set_linear_hash_mask(part_info, part_info->no_subparts); + if (part_info->list_of_subpart_fields) + { + List_iterator<char> it(part_info->subpart_field_list); + if (unlikely(handle_list_of_fields(it, table, part_info, TRUE))) + goto end; + } + else + { + if (unlikely(fix_fields_part_func(thd, &tables, + part_info->subpart_expr, part_info, + TRUE))) + goto end; + if (unlikely(part_info->subpart_expr->result_type() != INT_RESULT)) + { + my_error(ER_PARTITION_FUNC_NOT_ALLOWED_ERROR, MYF(0), + "SUBPARTITION"); + goto end; + } + } + } + DBUG_ASSERT(part_info->part_type != NOT_A_PARTITION); + /* + Partition is defined. We need to verify that partitioning + function is correct. + */ + if (part_info->part_type == HASH_PARTITION) + { + if (part_info->linear_hash_ind) + set_linear_hash_mask(part_info, part_info->no_parts); + if (part_info->list_of_part_fields) + { + List_iterator<char> it(part_info->part_field_list); + if (unlikely(handle_list_of_fields(it, table, part_info, FALSE))) + goto end; + } + else + { + if (unlikely(fix_fields_part_func(thd, &tables, part_info->part_expr, + part_info, FALSE))) + goto end; + if (unlikely(part_info->part_expr->result_type() != INT_RESULT)) + { + my_error(ER_PARTITION_FUNC_NOT_ALLOWED_ERROR, MYF(0), part_str); + goto end; + } + part_info->part_result_type= INT_RESULT; + } + } + else + { + const char *error_str; + if (part_info->part_type == RANGE_PARTITION) + { + error_str= range_str; + if (unlikely(check_range_constants(part_info))) + goto end; + } + else if (part_info->part_type == LIST_PARTITION) + { + error_str= list_str; + if (unlikely(check_list_constants(part_info))) + goto end; + } + else + { + DBUG_ASSERT(0); + my_error(ER_INCONSISTENT_PARTITION_INFO_ERROR, MYF(0)); + goto end; + } + if (unlikely(part_info->no_parts < 1)) + { + my_error(ER_PARTITIONS_MUST_BE_DEFINED_ERROR, MYF(0), error_str); + goto end; + } + if (unlikely(fix_fields_part_func(thd, &tables, part_info->part_expr, + part_info, FALSE))) + goto end; + if (unlikely(part_info->part_expr->result_type() != INT_RESULT)) + { + my_error(ER_PARTITION_FUNC_NOT_ALLOWED_ERROR, MYF(0), part_str); + goto end; + } + } + if (unlikely(create_full_part_field_array(table, part_info))) + goto end; + if (unlikely(check_primary_key(table))) + goto end; + if (unlikely((!table->file->partition_flags() & HA_CAN_PARTITION_UNIQUE) && + check_unique_keys(table))) + goto end; + check_range_capable_PF(table); + set_up_partition_key_maps(table, part_info); + set_up_partition_func_pointers(part_info); + result= FALSE; +end: + thd->set_query_id= save_set_query_id; + DBUG_RETURN(result); +} + + +/* + The code below is support routines for the reverse parsing of the + partitioning syntax. This feature is very useful to generate syntax for + all default values to avoid all default checking when opening the frm + file. It is also used when altering the partitioning by use of various + ALTER TABLE commands. Finally it is used for SHOW CREATE TABLES. +*/ + +static int add_write(File fptr, const char *buf, uint len) +{ + uint len_written= my_write(fptr, (const byte*)buf, len, MYF(0)); + if (likely(len == len_written)) + return 0; + else + return 1; +} + +static int add_string(File fptr, const char *string) +{ + return add_write(fptr, string, strlen(string)); +} + +static int add_string_len(File fptr, const char *string, uint len) +{ + return add_write(fptr, string, len); +} + +static int add_space(File fptr) +{ + return add_string(fptr, space_str); +} + +static int add_comma(File fptr) +{ + return add_string(fptr, comma_str); +} + +static int add_equal(File fptr) +{ + return add_string(fptr, equal_str); +} + +static int add_end_parenthesis(File fptr) +{ + return add_string(fptr, end_paren_str); +} + +static int add_begin_parenthesis(File fptr) +{ + return add_string(fptr, begin_paren_str); +} + +static int add_part_key_word(File fptr, const char *key_string) +{ + int err= add_string(fptr, key_string); + err+= add_space(fptr); + return err + add_begin_parenthesis(fptr); +} + +static int add_hash(File fptr) +{ + return add_part_key_word(fptr, hash_str); +} + +static int add_partition(File fptr) +{ + strxmov(buff, part_str, space_str, NullS); + return add_string(fptr, buff); +} + +static int add_subpartition(File fptr) +{ + int err= add_string(fptr, sub_str); + return err + add_partition(fptr); +} + +static int add_partition_by(File fptr) +{ + strxmov(buff, part_str, space_str, by_str, space_str, NullS); + return add_string(fptr, buff); +} + +static int add_subpartition_by(File fptr) +{ + int err= add_string(fptr, sub_str); + return err + add_partition_by(fptr); +} + +static int add_key_partition(File fptr, List<char> field_list) +{ + uint i, no_fields; + int err; + List_iterator<char> part_it(field_list); + err= add_part_key_word(fptr, key_str); + no_fields= field_list.elements; + i= 0; + do + { + const char *field_str= part_it++; + err+= add_string(fptr, field_str); + if (i != (no_fields-1)) + err+= add_comma(fptr); + } while (++i < no_fields); + return err; +} + +static int add_int(File fptr, longlong number) +{ + llstr(number, buff); + return add_string(fptr, buff); +} + +static int add_keyword_string(File fptr, const char *keyword, + const char *keystr) +{ + int err= add_string(fptr, keyword); + err+= add_space(fptr); + err+= add_equal(fptr); + err+= add_space(fptr); + err+= add_string(fptr, keystr); + return err + add_space(fptr); +} + +static int add_keyword_int(File fptr, const char *keyword, longlong num) +{ + int err= add_string(fptr, keyword); + err+= add_space(fptr); + err+= add_equal(fptr); + err+= add_space(fptr); + err+= add_int(fptr, num); + return err + add_space(fptr); +} + +static int add_engine(File fptr, handlerton *engine_type) +{ + const char *engine_str= engine_type->name; + int err= add_string(fptr, "ENGINE = "); + return err + add_string(fptr, engine_str); + return err; +} + +static int add_partition_options(File fptr, partition_element *p_elem) +{ + int err= 0; + if (p_elem->tablespace_name) + err+= add_keyword_string(fptr,"TABLESPACE",p_elem->tablespace_name); + if (p_elem->nodegroup_id != UNDEF_NODEGROUP) + err+= add_keyword_int(fptr,"NODEGROUP",(longlong)p_elem->nodegroup_id); + if (p_elem->part_max_rows) + err+= add_keyword_int(fptr,"MAX_ROWS",(longlong)p_elem->part_max_rows); + if (p_elem->part_min_rows) + err+= add_keyword_int(fptr,"MIN_ROWS",(longlong)p_elem->part_min_rows); + if (p_elem->data_file_name) + err+= add_keyword_string(fptr,"DATA DIRECTORY",p_elem->data_file_name); + if (p_elem->index_file_name) + err+= add_keyword_string(fptr,"INDEX DIRECTORY",p_elem->index_file_name); + if (p_elem->part_comment) + err+= add_keyword_string(fptr, "COMMENT",p_elem->part_comment); + return err + add_engine(fptr,p_elem->engine_type); +} + +static int add_partition_values(File fptr, partition_info *part_info, + partition_element *p_elem) +{ + int err= 0; + if (part_info->part_type == RANGE_PARTITION) + { + err+= add_string(fptr, "VALUES LESS THAN "); + if (p_elem->range_value != LONGLONG_MAX) + { + err+= add_begin_parenthesis(fptr); + err+= add_int(fptr, p_elem->range_value); + err+= add_end_parenthesis(fptr); + } + else + err+= add_string(fptr, "MAXVALUE"); + } + else if (part_info->part_type == LIST_PARTITION) + { + uint i; + List_iterator<longlong> list_val_it(p_elem->list_val_list); + err+= add_string(fptr, "VALUES IN "); + uint no_items= p_elem->list_val_list.elements; + err+= add_begin_parenthesis(fptr); + i= 0; + do + { + longlong *list_value= list_val_it++; + err+= add_int(fptr, *list_value); + if (i != (no_items-1)) + err+= add_comma(fptr); + } while (++i < no_items); + err+= add_end_parenthesis(fptr); + } + return err + add_space(fptr); +} + +/* + Generate the partition syntax from the partition data structure. + Useful for support of generating defaults, SHOW CREATE TABLES + and easy partition management. + SYNOPSIS + generate_partition_syntax() + part_info The partitioning data structure + buf_length A pointer to the returned buffer length + use_sql_alloc Allocate buffer from sql_alloc if true + otherwise use my_malloc + add_default_info Add info generated by default + RETURN VALUES + NULL error + buf, buf_length Buffer and its length + DESCRIPTION + Here we will generate the full syntax for the given command where all + defaults have been expanded. By so doing the it is also possible to + make lots of checks of correctness while at it. + This could will also be reused for SHOW CREATE TABLES and also for all + type ALTER TABLE commands focusing on changing the PARTITION structure + in any fashion. + + The implementation writes the syntax to a temporary file (essentially + an abstraction of a dynamic array) and if all writes goes well it + allocates a buffer and writes the syntax into this one and returns it. + + As a security precaution the file is deleted before writing into it. This + means that no other processes on the machine can open and read the file + while this processing is ongoing. + + The code is optimised for minimal code size since it is not used in any + common queries. +*/ + +char *generate_partition_syntax(partition_info *part_info, + uint *buf_length, + bool use_sql_alloc, + bool add_default_info) +{ + uint i,j, no_parts, no_subparts; + partition_element *part_elem; + ulonglong buffer_length; + char path[FN_REFLEN]; + int err= 0; + DBUG_ENTER("generate_partition_syntax"); + File fptr; + char *buf= NULL; //Return buffer + const char *file_name; + + sprintf(path, "%s_%lx_%lx", "part_syntax", current_pid, + current_thd->thread_id); + fn_format(path,path,mysql_tmpdir,".psy", MY_REPLACE_EXT); + file_name= &path[0]; + DBUG_PRINT("info", ("File name = %s", file_name)); + if (unlikely(((fptr= my_open(file_name,O_CREAT|O_RDWR, MYF(MY_WME))) == -1))) + DBUG_RETURN(NULL); +#if defined(MSDOS) || defined(__WIN__) || defined(__EMX__) || defined(OS2) +#else + my_delete(file_name, MYF(0)); +#endif + err+= add_space(fptr); + err+= add_partition_by(fptr); + switch (part_info->part_type) + { + case RANGE_PARTITION: + add_default_info= TRUE; + err+= add_part_key_word(fptr, range_str); + break; + case LIST_PARTITION: + add_default_info= TRUE; + err+= add_part_key_word(fptr, list_str); + break; + case HASH_PARTITION: + if (part_info->linear_hash_ind) + err+= add_string(fptr, "LINEAR "); + if (part_info->list_of_part_fields) + err+= add_key_partition(fptr, part_info->part_field_list); + else + err+= add_hash(fptr); + break; + default: + DBUG_ASSERT(0); + /* We really shouldn't get here, no use in continuing from here */ + current_thd->fatal_error(); + DBUG_RETURN(NULL); + } + if (part_info->part_expr) + err+= add_string_len(fptr, part_info->part_func_string, + part_info->part_func_len); + err+= add_end_parenthesis(fptr); + err+= add_space(fptr); + if (is_sub_partitioned(part_info)) + { + err+= add_subpartition_by(fptr); + /* Must be hash partitioning for subpartitioning */ + if (part_info->list_of_subpart_fields) + err+= add_key_partition(fptr, part_info->subpart_field_list); + else + err+= add_hash(fptr); + if (part_info->subpart_expr) + err+= add_string_len(fptr, part_info->subpart_func_string, + part_info->subpart_func_len); + err+= add_end_parenthesis(fptr); + err+= add_space(fptr); + } + if (add_default_info) + { + err+= add_begin_parenthesis(fptr); + List_iterator<partition_element> part_it(part_info->partitions); + no_parts= part_info->no_parts; + no_subparts= part_info->no_subparts; + i= 0; + do + { + part_elem= part_it++; + err+= add_partition(fptr); + err+= add_string(fptr, part_elem->partition_name); + err+= add_space(fptr); + err+= add_partition_values(fptr, part_info, part_elem); + if (!is_sub_partitioned(part_info)) + err+= add_partition_options(fptr, part_elem); + if (is_sub_partitioned(part_info)) + { + err+= add_space(fptr); + err+= add_begin_parenthesis(fptr); + List_iterator<partition_element> sub_it(part_elem->subpartitions); + j= 0; + do + { + part_elem= sub_it++; + err+= add_subpartition(fptr); + err+= add_string(fptr, part_elem->partition_name); + err+= add_space(fptr); + err+= add_partition_options(fptr, part_elem); + if (j != (no_subparts-1)) + { + err+= add_comma(fptr); + err+= add_space(fptr); + } + else + err+= add_end_parenthesis(fptr); + } while (++j < no_subparts); + } + if (i != (no_parts-1)) + { + err+= add_comma(fptr); + err+= add_space(fptr); + } + else + err+= add_end_parenthesis(fptr); + } while (++i < no_parts); + } + if (err) + goto close_file; + buffer_length= my_seek(fptr, 0L,MY_SEEK_END,MYF(0)); + if (unlikely(buffer_length == MY_FILEPOS_ERROR)) + goto close_file; + if (unlikely(my_seek(fptr, 0L, MY_SEEK_SET, MYF(0)) == MY_FILEPOS_ERROR)) + goto close_file; + *buf_length= (uint)buffer_length; + if (use_sql_alloc) + buf= sql_alloc(*buf_length+1); + else + buf= my_malloc(*buf_length+1, MYF(MY_WME)); + if (!buf) + goto close_file; + + if (unlikely(my_read(fptr, (byte*)buf, *buf_length, MYF(MY_FNABP)))) + { + if (!use_sql_alloc) + my_free(buf, MYF(0)); + else + buf= NULL; + } + else + buf[*buf_length]= 0; + +close_file: + /* + Delete the file before closing to ensure the file doesn't get synched + to disk unnecessary. We only used the file system as a dynamic array + implementation so we are not really interested in getting the file + present on disk. + This is not possible on Windows so here it has to be done after closing + the file. Also on Unix we delete immediately after opening to ensure no + other process can read the information written into the file. + */ + my_close(fptr, MYF(0)); +#if defined(MSDOS) || defined(__WIN__) || defined(__EMX__) || defined(OS2) + my_delete(file_name, MYF(0)); +#endif + DBUG_RETURN(buf); +} + + +/* + Check if partition key fields are modified and if it can be handled by the + underlying storage engine. + SYNOPSIS + partition_key_modified + table TABLE object for which partition fields are set-up + fields A list of the to be modifed + RETURN VALUES + TRUE Need special handling of UPDATE + FALSE Normal UPDATE handling is ok +*/ + +bool partition_key_modified(TABLE *table, List<Item> &fields) +{ + List_iterator_fast<Item> f(fields); + partition_info *part_info= table->part_info; + Item_field *item_field; + DBUG_ENTER("partition_key_modified"); + if (!part_info) + DBUG_RETURN(FALSE); + if (table->file->partition_flags() & HA_CAN_UPDATE_PARTITION_KEY) + DBUG_RETURN(FALSE); + f.rewind(); + while ((item_field=(Item_field*) f++)) + if (item_field->field->flags & FIELD_IN_PART_FUNC_FLAG) + DBUG_RETURN(TRUE); + DBUG_RETURN(FALSE); +} + + +/* + The next set of functions are used to calculate the partition identity. + A handler sets up a variable that corresponds to one of these functions + to be able to quickly call it whenever the partition id needs to calculated + based on the record in table->record[0] (or set up to fake that). + There are 4 functions for hash partitioning and 2 for RANGE/LIST partitions. + In addition there are 4 variants for RANGE subpartitioning and 4 variants + for LIST subpartitioning thus in total there are 14 variants of this + function. + + We have a set of support functions for these 14 variants. There are 4 + variants of hash functions and there is a function for each. The KEY + partitioning uses the function calculate_key_value to calculate the hash + value based on an array of fields. The linear hash variants uses the + method get_part_id_from_linear_hash to get the partition id using the + hash value and some parameters calculated from the number of partitions. +*/ + +/* + Calculate hash value for KEY partitioning using an array of fields. + SYNOPSIS + calculate_key_value() + field_array An array of the fields in KEY partitioning + RETURN VALUE + hash_value calculated + DESCRIPTION + Uses the hash function on the character set of the field. Integer and + floating point fields use the binary character set by default. +*/ + +static uint32 calculate_key_value(Field **field_array) +{ + uint32 hashnr= 0; + ulong nr2= 4; + do + { + Field *field= *field_array; + if (field->is_null()) + { + hashnr^= (hashnr << 1) | 1; + } + else + { + uint len= field->pack_length(); + ulong nr1= 1; + CHARSET_INFO *cs= field->charset(); + cs->coll->hash_sort(cs, (uchar*)field->ptr, len, &nr1, &nr2); + hashnr^= (uint32)nr1; + } + } while (*(++field_array)); + return hashnr; +} + + +/* + A simple support function to calculate part_id given local part and + sub part. + SYNOPSIS + get_part_id_for_sub() + loc_part_id Local partition id + sub_part_id Subpartition id + no_subparts Number of subparts +*/ + +inline +static uint32 get_part_id_for_sub(uint32 loc_part_id, uint32 sub_part_id, + uint no_subparts) +{ + return (uint32)((loc_part_id * no_subparts) + sub_part_id); +} + + +/* + Calculate part_id for (SUB)PARTITION BY HASH + SYNOPSIS + get_part_id_hash() + no_parts Number of hash partitions + part_expr Item tree of hash function + RETURN VALUE + Calculated partition id +*/ + +inline +static uint32 get_part_id_hash(uint no_parts, + Item *part_expr) +{ + DBUG_ENTER("get_part_id_hash"); + DBUG_RETURN((uint32)(part_expr->val_int() % no_parts)); +} + + +/* + Calculate part_id for (SUB)PARTITION BY LINEAR HASH + SYNOPSIS + get_part_id_linear_hash() + part_info A reference to the partition_info struct where all the + desired information is given + no_parts Number of hash partitions + part_expr Item tree of hash function + RETURN VALUE + Calculated partition id +*/ + +inline +static uint32 get_part_id_linear_hash(partition_info *part_info, + uint no_parts, + Item *part_expr) +{ + DBUG_ENTER("get_part_id_linear_hash"); + DBUG_RETURN(get_part_id_from_linear_hash(part_expr->val_int(), + part_info->linear_hash_mask, + no_parts)); +} + + +/* + Calculate part_id for (SUB)PARTITION BY KEY + SYNOPSIS + get_part_id_key() + field_array Array of fields for PARTTION KEY + no_parts Number of KEY partitions + RETURN VALUE + Calculated partition id +*/ + +inline +static uint32 get_part_id_key(Field **field_array, + uint no_parts) +{ + DBUG_ENTER("get_part_id_key"); + DBUG_RETURN(calculate_key_value(field_array) % no_parts); +} + + +/* + Calculate part_id for (SUB)PARTITION BY LINEAR KEY + SYNOPSIS + get_part_id_linear_key() + part_info A reference to the partition_info struct where all the + desired information is given + field_array Array of fields for PARTTION KEY + no_parts Number of KEY partitions + RETURN VALUE + Calculated partition id +*/ + +inline +static uint32 get_part_id_linear_key(partition_info *part_info, + Field **field_array, + uint no_parts) +{ + DBUG_ENTER("get_partition_id_linear_key"); + DBUG_RETURN(get_part_id_from_linear_hash(calculate_key_value(field_array), + part_info->linear_hash_mask, + no_parts)); +} + +/* + This function is used to calculate the partition id where all partition + fields have been prepared to point to a record where the partition field + values are bound. + SYNOPSIS + get_partition_id() + part_info A reference to the partition_info struct where all the + desired information is given + part_id The partition id is returned through this pointer + RETURN VALUE + part_id + return TRUE means that the fields of the partition function didn't fit + into any partition and thus the values of the PF-fields are not allowed. + DESCRIPTION + A routine used from write_row, update_row and delete_row from any + handler supporting partitioning. It is also a support routine for + get_partition_set used to find the set of partitions needed to scan + for a certain index scan or full table scan. + + It is actually 14 different variants of this function which are called + through a function pointer. + + get_partition_id_list + get_partition_id_range + get_partition_id_hash_nosub + get_partition_id_key_nosub + get_partition_id_linear_hash_nosub + get_partition_id_linear_key_nosub + get_partition_id_range_sub_hash + get_partition_id_range_sub_key + get_partition_id_range_sub_linear_hash + get_partition_id_range_sub_linear_key + get_partition_id_list_sub_hash + get_partition_id_list_sub_key + get_partition_id_list_sub_linear_hash + get_partition_id_list_sub_linear_key +*/ + +/* + This function is used to calculate the main partition to use in the case of + subpartitioning and we don't know enough to get the partition identity in + total. + SYNOPSIS + get_part_partition_id() + part_info A reference to the partition_info struct where all the + desired information is given + part_id The partition id is returned through this pointer + RETURN VALUE + part_id + return TRUE means that the fields of the partition function didn't fit + into any partition and thus the values of the PF-fields are not allowed. + DESCRIPTION + + It is actually 6 different variants of this function which are called + through a function pointer. + + get_partition_id_list + get_partition_id_range + get_partition_id_hash_nosub + get_partition_id_key_nosub + get_partition_id_linear_hash_nosub + get_partition_id_linear_key_nosub +*/ + + +bool get_partition_id_list(partition_info *part_info, + uint32 *part_id) +{ + DBUG_ENTER("get_partition_id_list"); + LIST_PART_ENTRY *list_array= part_info->list_array; + uint list_index; + longlong list_value; + uint min_list_index= 0, max_list_index= part_info->no_list_values - 1; + longlong part_func_value= part_info->part_expr->val_int(); + while (max_list_index >= min_list_index) + { + list_index= (max_list_index + min_list_index) >> 1; + list_value= list_array[list_index].list_value; + if (list_value < part_func_value) + min_list_index= list_index + 1; + else if (list_value > part_func_value) + { + if (!list_index) + goto notfound; + max_list_index= list_index - 1; + } + else + { + *part_id= (uint32)list_array[list_index].partition_id; + DBUG_RETURN(FALSE); + } + } +notfound: + *part_id= 0; + DBUG_RETURN(TRUE); +} + + +/* + Find the sub-array part_info->list_array that corresponds to given interval + + SYNOPSIS + get_list_array_idx_for_endpoint() + part_info Partitioning info (partitioning type must be LIST) + left_endpoint TRUE - the interval is [a; +inf) or (a; +inf) + FALSE - the interval is (-inf; a] or (-inf; a) + include_endpoint TRUE iff the interval includes the endpoint + + DESCRIPTION + This function finds the sub-array of part_info->list_array where values of + list_array[idx].list_value are contained within the specifed interval. + list_array is ordered by list_value, so + 1. For [a; +inf) or (a; +inf)-type intervals (left_endpoint==TRUE), the + sought sub-array starts at some index idx and continues till array end. + The function returns first number idx, such that + list_array[idx].list_value is contained within the passed interval. + + 2. For (-inf; a] or (-inf; a)-type intervals (left_endpoint==FALSE), the + sought sub-array starts at array start and continues till some last + index idx. + The function returns first number idx, such that + list_array[idx].list_value is NOT contained within the passed interval. + If all array elements are contained, part_info->no_list_values is + returned. + + NOTE + The caller will call this function and then will run along the sub-array of + list_array to collect partition ids. If the number of list values is + significantly higher then number of partitions, this could be slow and + we could invent some other approach. The "run over list array" part is + already wrapped in a get_next()-like function. + + RETURN + The edge of corresponding sub-array of part_info->list_array +*/ + +uint32 get_list_array_idx_for_endpoint(partition_info *part_info, + bool left_endpoint, + bool include_endpoint) +{ + DBUG_ENTER("get_list_array_idx_for_endpoint"); + LIST_PART_ENTRY *list_array= part_info->list_array; + uint list_index; + longlong list_value; + uint min_list_index= 0, max_list_index= part_info->no_list_values - 1; + /* Get the partitioning function value for the endpoint */ + longlong part_func_value= part_info->part_expr->val_int(); + while (max_list_index >= min_list_index) + { + list_index= (max_list_index + min_list_index) >> 1; + list_value= list_array[list_index].list_value; + if (list_value < part_func_value) + min_list_index= list_index + 1; + else if (list_value > part_func_value) + { + if (!list_index) + goto notfound; + max_list_index= list_index - 1; + } + else + { + DBUG_RETURN(list_index + test(left_endpoint ^ include_endpoint)); + } + } +notfound: + if (list_value < part_func_value) + list_index++; + DBUG_RETURN(list_index); +} + + +bool get_partition_id_range(partition_info *part_info, + uint32 *part_id) +{ + DBUG_ENTER("get_partition_id_int_range"); + longlong *range_array= part_info->range_int_array; + uint max_partition= part_info->no_parts - 1; + uint min_part_id= 0, max_part_id= max_partition, loc_part_id; + longlong part_func_value= part_info->part_expr->val_int(); + while (max_part_id > min_part_id) + { + loc_part_id= (max_part_id + min_part_id + 1) >> 1; + if (range_array[loc_part_id] < part_func_value) + min_part_id= loc_part_id + 1; + else + max_part_id= loc_part_id - 1; + } + loc_part_id= max_part_id; + if (part_func_value >= range_array[loc_part_id]) + if (loc_part_id != max_partition) + loc_part_id++; + *part_id= (uint32)loc_part_id; + if (loc_part_id == max_partition) + if (range_array[loc_part_id] != LONGLONG_MAX) + if (part_func_value >= range_array[loc_part_id]) + DBUG_RETURN(TRUE); + DBUG_RETURN(FALSE); +} + + +/* + Find the sub-array of part_info->range_int_array that covers given interval + + SYNOPSIS + get_partition_id_range_for_endpoint() + part_info Partitioning info (partitioning type must be RANGE) + left_endpoint TRUE - the interval is [a; +inf) or (a; +inf) + FALSE - the interval is (-inf; a] or (-inf; a). + include_endpoint TRUE <=> the endpoint itself is included in the + interval + + DESCRIPTION + This function finds the sub-array of part_info->range_int_array where the + elements have non-empty intersections with the given interval. + + A range_int_array element at index idx represents the interval + + [range_int_array[idx-1], range_int_array[idx]), + + intervals are disjoint and ordered by their right bound, so + + 1. For [a; +inf) or (a; +inf)-type intervals (left_endpoint==TRUE), the + sought sub-array starts at some index idx and continues till array end. + The function returns first number idx, such that the interval + represented by range_int_array[idx] has non empty intersection with + the passed interval. + + 2. For (-inf; a] or (-inf; a)-type intervals (left_endpoint==FALSE), the + sought sub-array starts at array start and continues till some last + index idx. + The function returns first number idx, such that the interval + represented by range_int_array[idx] has EMPTY intersection with the + passed interval. + If the interval represented by the last array element has non-empty + intersection with the passed interval, part_info->no_parts is + returned. + + RETURN + The edge of corresponding part_info->range_int_array sub-array. +*/ + +uint32 get_partition_id_range_for_endpoint(partition_info *part_info, + bool left_endpoint, + bool include_endpoint) +{ + DBUG_ENTER("get_partition_id_range_for_endpoint"); + longlong *range_array= part_info->range_int_array; + uint max_partition= part_info->no_parts - 1; + uint min_part_id= 0, max_part_id= max_partition, loc_part_id; + /* Get the partitioning function value for the endpoint */ + longlong part_func_value= part_info->part_expr->val_int(); + while (max_part_id > min_part_id) + { + loc_part_id= (max_part_id + min_part_id + 1) >> 1; + if (range_array[loc_part_id] < part_func_value) + min_part_id= loc_part_id + 1; + else + max_part_id= loc_part_id - 1; + } + loc_part_id= max_part_id; + if (loc_part_id < max_partition && + part_func_value >= range_array[loc_part_id+1]) + { + loc_part_id++; + } + if (left_endpoint) + { + if (part_func_value >= range_array[loc_part_id]) + loc_part_id++; + } + else + { + if (part_func_value == range_array[loc_part_id]) + loc_part_id += test(include_endpoint); + else if (part_func_value > range_array[loc_part_id]) + loc_part_id++; + loc_part_id++; + } + DBUG_RETURN(loc_part_id); +} + + +bool get_partition_id_hash_nosub(partition_info *part_info, + uint32 *part_id) +{ + *part_id= get_part_id_hash(part_info->no_parts, part_info->part_expr); + return FALSE; +} + + +bool get_partition_id_linear_hash_nosub(partition_info *part_info, + uint32 *part_id) +{ + *part_id= get_part_id_linear_hash(part_info, part_info->no_parts, + part_info->part_expr); + return FALSE; +} + + +bool get_partition_id_key_nosub(partition_info *part_info, + uint32 *part_id) +{ + *part_id= get_part_id_key(part_info->part_field_array, part_info->no_parts); + return FALSE; +} + + +bool get_partition_id_linear_key_nosub(partition_info *part_info, + uint32 *part_id) +{ + *part_id= get_part_id_linear_key(part_info, + part_info->part_field_array, + part_info->no_parts); + return FALSE; +} + + +bool get_partition_id_range_sub_hash(partition_info *part_info, + uint32 *part_id) +{ + uint32 loc_part_id, sub_part_id; + uint no_subparts; + DBUG_ENTER("get_partition_id_range_sub_hash"); + if (unlikely(get_partition_id_range(part_info, &loc_part_id))) + { + DBUG_RETURN(TRUE); + } + no_subparts= part_info->no_subparts; + sub_part_id= get_part_id_hash(no_subparts, part_info->subpart_expr); + *part_id= get_part_id_for_sub(loc_part_id, sub_part_id, no_subparts); + DBUG_RETURN(FALSE); +} + + +bool get_partition_id_range_sub_linear_hash(partition_info *part_info, + uint32 *part_id) +{ + uint32 loc_part_id, sub_part_id; + uint no_subparts; + DBUG_ENTER("get_partition_id_range_sub_linear_hash"); + if (unlikely(get_partition_id_range(part_info, &loc_part_id))) + { + DBUG_RETURN(TRUE); + } + no_subparts= part_info->no_subparts; + sub_part_id= get_part_id_linear_hash(part_info, no_subparts, + part_info->subpart_expr); + *part_id= get_part_id_for_sub(loc_part_id, sub_part_id, no_subparts); + DBUG_RETURN(FALSE); +} + + +bool get_partition_id_range_sub_key(partition_info *part_info, + uint32 *part_id) +{ + uint32 loc_part_id, sub_part_id; + uint no_subparts; + DBUG_ENTER("get_partition_id_range_sub_key"); + if (unlikely(get_partition_id_range(part_info, &loc_part_id))) + { + DBUG_RETURN(TRUE); + } + no_subparts= part_info->no_subparts; + sub_part_id= get_part_id_key(part_info->subpart_field_array, no_subparts); + *part_id= get_part_id_for_sub(loc_part_id, sub_part_id, no_subparts); + DBUG_RETURN(FALSE); +} + + +bool get_partition_id_range_sub_linear_key(partition_info *part_info, + uint32 *part_id) +{ + uint32 loc_part_id, sub_part_id; + uint no_subparts; + DBUG_ENTER("get_partition_id_range_sub_linear_key"); + if (unlikely(get_partition_id_range(part_info, &loc_part_id))) + { + DBUG_RETURN(TRUE); + } + no_subparts= part_info->no_subparts; + sub_part_id= get_part_id_linear_key(part_info, + part_info->subpart_field_array, + no_subparts); + *part_id= get_part_id_for_sub(loc_part_id, sub_part_id, no_subparts); + DBUG_RETURN(FALSE); +} + + +bool get_partition_id_list_sub_hash(partition_info *part_info, + uint32 *part_id) +{ + uint32 loc_part_id, sub_part_id; + uint no_subparts; + DBUG_ENTER("get_partition_id_list_sub_hash"); + if (unlikely(get_partition_id_list(part_info, &loc_part_id))) + { + DBUG_RETURN(TRUE); + } + no_subparts= part_info->no_subparts; + sub_part_id= get_part_id_hash(no_subparts, part_info->subpart_expr); + *part_id= get_part_id_for_sub(loc_part_id, sub_part_id, no_subparts); + DBUG_RETURN(FALSE); +} + + +bool get_partition_id_list_sub_linear_hash(partition_info *part_info, + uint32 *part_id) +{ + uint32 loc_part_id, sub_part_id; + uint no_subparts; + DBUG_ENTER("get_partition_id_list_sub_linear_hash"); + if (unlikely(get_partition_id_list(part_info, &loc_part_id))) + { + DBUG_RETURN(TRUE); + } + no_subparts= part_info->no_subparts; + sub_part_id= get_part_id_hash(no_subparts, part_info->subpart_expr); + *part_id= get_part_id_for_sub(loc_part_id, sub_part_id, no_subparts); + DBUG_RETURN(FALSE); +} + + +bool get_partition_id_list_sub_key(partition_info *part_info, + uint32 *part_id) +{ + uint32 loc_part_id, sub_part_id; + uint no_subparts; + DBUG_ENTER("get_partition_id_range_sub_key"); + if (unlikely(get_partition_id_list(part_info, &loc_part_id))) + { + DBUG_RETURN(TRUE); + } + no_subparts= part_info->no_subparts; + sub_part_id= get_part_id_key(part_info->subpart_field_array, no_subparts); + *part_id= get_part_id_for_sub(loc_part_id, sub_part_id, no_subparts); + DBUG_RETURN(FALSE); +} + + +bool get_partition_id_list_sub_linear_key(partition_info *part_info, + uint32 *part_id) +{ + uint32 loc_part_id, sub_part_id; + uint no_subparts; + DBUG_ENTER("get_partition_id_list_sub_linear_key"); + if (unlikely(get_partition_id_list(part_info, &loc_part_id))) + { + DBUG_RETURN(TRUE); + } + no_subparts= part_info->no_subparts; + sub_part_id= get_part_id_linear_key(part_info, + part_info->subpart_field_array, + no_subparts); + *part_id= get_part_id_for_sub(loc_part_id, sub_part_id, no_subparts); + DBUG_RETURN(FALSE); +} + + +/* + This function is used to calculate the subpartition id + SYNOPSIS + get_subpartition_id() + part_info A reference to the partition_info struct where all the + desired information is given + RETURN VALUE + part_id + The subpartition identity + DESCRIPTION + A routine used in some SELECT's when only partial knowledge of the + partitions is known. + + It is actually 4 different variants of this function which are called + through a function pointer. + + get_partition_id_hash_sub + get_partition_id_key_sub + get_partition_id_linear_hash_sub + get_partition_id_linear_key_sub +*/ + +uint32 get_partition_id_hash_sub(partition_info *part_info) +{ + return get_part_id_hash(part_info->no_subparts, part_info->subpart_expr); +} + + +uint32 get_partition_id_linear_hash_sub(partition_info *part_info) +{ + return get_part_id_linear_hash(part_info, part_info->no_subparts, + part_info->subpart_expr); +} + + +uint32 get_partition_id_key_sub(partition_info *part_info) +{ + return get_part_id_key(part_info->subpart_field_array, + part_info->no_subparts); +} + + +uint32 get_partition_id_linear_key_sub(partition_info *part_info) +{ + return get_part_id_linear_key(part_info, + part_info->subpart_field_array, + part_info->no_subparts); +} + + +/* + Set an indicator on all partition fields that are set by the key + SYNOPSIS + set_PF_fields_in_key() + key_info Information about the index + key_length Length of key + RETURN VALUE + TRUE Found partition field set by key + FALSE No partition field set by key +*/ + +static bool set_PF_fields_in_key(KEY *key_info, uint key_length) +{ + KEY_PART_INFO *key_part; + bool found_part_field= FALSE; + DBUG_ENTER("set_PF_fields_in_key"); + + for (key_part= key_info->key_part; (int)key_length > 0; key_part++) + { + if (key_part->null_bit) + key_length--; + if (key_part->type == HA_KEYTYPE_BIT) + { + if (((Field_bit*)key_part->field)->bit_len) + key_length--; + } + if (key_part->key_part_flag & (HA_BLOB_PART + HA_VAR_LENGTH_PART)) + { + key_length-= HA_KEY_BLOB_LENGTH; + } + if (key_length < key_part->length) + break; + key_length-= key_part->length; + if (key_part->field->flags & FIELD_IN_PART_FUNC_FLAG) + { + found_part_field= TRUE; + key_part->field->flags|= GET_FIXED_FIELDS_FLAG; + } + } + DBUG_RETURN(found_part_field); +} + + +/* + We have found that at least one partition field was set by a key, now + check if a partition function has all its fields bound or not. + SYNOPSIS + check_part_func_bound() + ptr Array of fields NULL terminated (partition fields) + RETURN VALUE + TRUE All fields in partition function are set + FALSE Not all fields in partition function are set +*/ + +static bool check_part_func_bound(Field **ptr) +{ + bool result= TRUE; + DBUG_ENTER("check_part_func_bound"); + + for (; *ptr; ptr++) + { + if (!((*ptr)->flags & GET_FIXED_FIELDS_FLAG)) + { + result= FALSE; + break; + } + } + DBUG_RETURN(result); +} + + +/* + Get the id of the subpartitioning part by using the key buffer of the + index scan. + SYNOPSIS + get_sub_part_id_from_key() + table The table object + buf A buffer that can be used to evaluate the partition function + key_info The index object + key_spec A key_range containing key and key length + RETURN VALUES + part_id Subpartition id to use + DESCRIPTION + Use key buffer to set-up record in buf, move field pointers and + get the partition identity and restore field pointers afterwards. +*/ + +static uint32 get_sub_part_id_from_key(const TABLE *table,byte *buf, + KEY *key_info, + const key_range *key_spec) +{ + byte *rec0= table->record[0]; + partition_info *part_info= table->part_info; + uint32 part_id; + DBUG_ENTER("get_sub_part_id_from_key"); + + key_restore(buf, (byte*)key_spec->key, key_info, key_spec->length); + if (likely(rec0 == buf)) + part_id= part_info->get_subpartition_id(part_info); + else + { + Field **part_field_array= part_info->subpart_field_array; + set_field_ptr(part_field_array, buf, rec0); + part_id= part_info->get_subpartition_id(part_info); + set_field_ptr(part_field_array, rec0, buf); + } + DBUG_RETURN(part_id); +} + +/* + Get the id of the partitioning part by using the key buffer of the + index scan. + SYNOPSIS + get_part_id_from_key() + table The table object + buf A buffer that can be used to evaluate the partition function + key_info The index object + key_spec A key_range containing key and key length + part_id Partition to use + RETURN VALUES + TRUE Partition to use not found + FALSE Ok, part_id indicates partition to use + DESCRIPTION + Use key buffer to set-up record in buf, move field pointers and + get the partition identity and restore field pointers afterwards. +*/ +bool get_part_id_from_key(const TABLE *table, byte *buf, KEY *key_info, + const key_range *key_spec, uint32 *part_id) +{ + bool result; + byte *rec0= table->record[0]; + partition_info *part_info= table->part_info; + DBUG_ENTER("get_part_id_from_key"); + + key_restore(buf, (byte*)key_spec->key, key_info, key_spec->length); + if (likely(rec0 == buf)) + result= part_info->get_part_partition_id(part_info, part_id); + else + { + Field **part_field_array= part_info->part_field_array; + set_field_ptr(part_field_array, buf, rec0); + result= part_info->get_part_partition_id(part_info, part_id); + set_field_ptr(part_field_array, rec0, buf); + } + DBUG_RETURN(result); +} + +/* + Get the partitioning id of the full PF by using the key buffer of the + index scan. + SYNOPSIS + get_full_part_id_from_key() + table The table object + buf A buffer that is used to evaluate the partition function + key_info The index object + key_spec A key_range containing key and key length + part_spec A partition id containing start part and end part + RETURN VALUES + part_spec + No partitions to scan is indicated by end_part > start_part when returning + DESCRIPTION + Use key buffer to set-up record in buf, move field pointers if needed and + get the partition identity and restore field pointers afterwards. +*/ + +void get_full_part_id_from_key(const TABLE *table, byte *buf, + KEY *key_info, + const key_range *key_spec, + part_id_range *part_spec) +{ + bool result; + partition_info *part_info= table->part_info; + byte *rec0= table->record[0]; + DBUG_ENTER("get_full_part_id_from_key"); + + key_restore(buf, (byte*)key_spec->key, key_info, key_spec->length); + if (likely(rec0 == buf)) + result= part_info->get_partition_id(part_info, &part_spec->start_part); + else + { + Field **part_field_array= part_info->full_part_field_array; + set_field_ptr(part_field_array, buf, rec0); + result= part_info->get_partition_id(part_info, &part_spec->start_part); + set_field_ptr(part_field_array, rec0, buf); + } + part_spec->end_part= part_spec->start_part; + if (unlikely(result)) + part_spec->start_part++; + DBUG_VOID_RETURN; +} + +/* + Get the set of partitions to use in query. + SYNOPSIS + get_partition_set() + table The table object + buf A buffer that can be used to evaluate the partition function + index The index of the key used, if MAX_KEY no index used + key_spec A key_range containing key and key length + part_spec Contains start part, end part and indicator if bitmap is + used for which partitions to scan + DESCRIPTION + This function is called to discover which partitions to use in an index + scan or a full table scan. + It returns a range of partitions to scan. If there are holes in this + range with partitions that are not needed to scan a bit array is used + to signal which partitions to use and which not to use. + If start_part > end_part at return it means no partition needs to be + scanned. If start_part == end_part it always means a single partition + needs to be scanned. + RETURN VALUE + part_spec +*/ +void get_partition_set(const TABLE *table, byte *buf, const uint index, + const key_range *key_spec, part_id_range *part_spec) +{ + partition_info *part_info= table->part_info; + uint no_parts= get_tot_partitions(part_info), i, part_id; + uint sub_part= no_parts; + uint32 part_part= no_parts; + KEY *key_info= NULL; + bool found_part_field= FALSE; + DBUG_ENTER("get_partition_set"); + + part_spec->use_bit_array= FALSE; + part_spec->start_part= 0; + part_spec->end_part= no_parts - 1; + if ((index < MAX_KEY) && + key_spec->flag == (uint)HA_READ_KEY_EXACT && + part_info->some_fields_in_PF.is_set(index)) + { + key_info= table->key_info+index; + /* + The index can potentially provide at least one PF-field (field in the + partition function). Thus it is interesting to continue our probe. + */ + if (key_spec->length == key_info->key_length) + { + /* + The entire key is set so we can check whether we can immediately + derive either the complete PF or if we can derive either + the top PF or the subpartitioning PF. This can be established by + checking precalculated bits on each index. + */ + if (part_info->all_fields_in_PF.is_set(index)) + { + /* + We can derive the exact partition to use, no more than this one + is needed. + */ + get_full_part_id_from_key(table,buf,key_info,key_spec,part_spec); + DBUG_VOID_RETURN; + } + else if (is_sub_partitioned(part_info)) + { + if (part_info->all_fields_in_SPF.is_set(index)) + sub_part= get_sub_part_id_from_key(table, buf, key_info, key_spec); + else if (part_info->all_fields_in_PPF.is_set(index)) + { + if (get_part_id_from_key(table,buf,key_info,key_spec,(uint32*)&part_part)) + { + /* + The value of the RANGE or LIST partitioning was outside of + allowed values. Thus it is certain that the result of this + scan will be empty. + */ + part_spec->start_part= no_parts; + DBUG_VOID_RETURN; + } + } + } + } + else + { + /* + Set an indicator on all partition fields that are bound. + If at least one PF-field was bound it pays off to check whether + the PF or PPF or SPF has been bound. + (PF = Partition Function, SPF = Subpartition Function and + PPF = Partition Function part of subpartitioning) + */ + if ((found_part_field= set_PF_fields_in_key(key_info, + key_spec->length))) + { + if (check_part_func_bound(part_info->full_part_field_array)) + { + /* + We were able to bind all fields in the partition function even + by using only a part of the key. Calculate the partition to use. + */ + get_full_part_id_from_key(table,buf,key_info,key_spec,part_spec); + clear_indicator_in_key_fields(key_info); + DBUG_VOID_RETURN; + } + else if (check_part_func_bound(part_info->part_field_array)) + sub_part= get_sub_part_id_from_key(table, buf, key_info, key_spec); + else if (check_part_func_bound(part_info->subpart_field_array)) + { + if (get_part_id_from_key(table,buf,key_info,key_spec,(uint32*)&part_part)) + { + part_spec->start_part= no_parts; + clear_indicator_in_key_fields(key_info); + DBUG_VOID_RETURN; + } + } + } + } + } + { + /* + The next step is to analyse the table condition to see whether any + information about which partitions to scan can be derived from there. + Currently not implemented. + */ + } + /* + If we come here we have found a range of sorts we have either discovered + nothing or we have discovered a range of partitions with possible holes + in it. We need a bitvector to further the work here. + */ + if (!(part_part == no_parts && sub_part == no_parts)) + { + /* + We can only arrive here if we are using subpartitioning. + */ + if (part_part != no_parts) + { + /* + We know the top partition and need to scan all underlying + subpartitions. This is a range without holes. + */ + DBUG_ASSERT(sub_part == no_parts); + part_spec->start_part= part_part * part_info->no_parts; + part_spec->end_part= part_spec->start_part+part_info->no_subparts - 1; + } + else + { + DBUG_ASSERT(sub_part != no_parts); + part_spec->use_bit_array= TRUE; + part_spec->start_part= sub_part; + part_spec->end_part=sub_part+ + (part_info->no_subparts*(part_info->no_parts-1)); + for (i= 0, part_id= sub_part; i < part_info->no_parts; + i++, part_id+= part_info->no_subparts) + ; //Set bit part_id in bit array + } + } + if (found_part_field) + clear_indicator_in_key_fields(key_info); + DBUG_VOID_RETURN; +} + + +/* + If the table is partitioned we will read the partition info into the + .frm file here. + ------------------------------- + | Fileinfo 64 bytes | + ------------------------------- + | Formnames 7 bytes | + ------------------------------- + | Not used 4021 bytes | + ------------------------------- + | Keyinfo + record | + ------------------------------- + | Padded to next multiple | + | of IO_SIZE | + ------------------------------- + | Forminfo 288 bytes | + ------------------------------- + | Screen buffer, to make | + | field names readable | + ------------------------------- + | Packed field info | + | 17 + 1 + strlen(field_name) | + | + 1 end of file character | + ------------------------------- + | Partition info | + ------------------------------- + We provide the length of partition length in Fileinfo[55-58]. + + Read the partition syntax from the frm file and parse it to get the + data structures of the partitioning. + SYNOPSIS + mysql_unpack_partition() + file File reference of frm file + thd Thread object + part_info_len Length of partition syntax + table Table object of partitioned table + RETURN VALUE + TRUE Error + FALSE Sucess + DESCRIPTION + Read the partition syntax from the current position in the frm file. + Initiate a LEX object, save the list of item tree objects to free after + the query is done. Set-up partition info object such that parser knows + it is called from internally. Call parser to create data structures + (best possible recreation of item trees and so forth since there is no + serialisation of these objects other than in parseable text format). + We need to save the text of the partition functions since it is not + possible to retrace this given an item tree. +*/ + +bool mysql_unpack_partition(THD *thd, const uchar *part_buf, + uint part_info_len, TABLE* table, + handlerton *default_db_type) +{ + Item *thd_free_list= thd->free_list; + bool result= TRUE; + partition_info *part_info; + LEX *old_lex= thd->lex, lex; + DBUG_ENTER("mysql_unpack_partition"); + + thd->lex= &lex; + lex_start(thd, part_buf, part_info_len); + /* + We need to use the current SELECT_LEX since I need to keep the + Name_resolution_context object which is referenced from the + Item_field objects. + This is not a nice solution since if the parser uses current_select + for anything else it will corrupt the current LEX object. + */ + thd->lex->current_select= old_lex->current_select; + /* + All Items created is put into a free list on the THD object. This list + is used to free all Item objects after completing a query. We don't + want that to happen with the Item tree created as part of the partition + info. This should be attached to the table object and remain so until + the table object is released. + Thus we move away the current list temporarily and start a new list that + we then save in the partition info structure. + */ + thd->free_list= NULL; + lex.part_info= (partition_info*)1; //Indicate yyparse from this place + if (yyparse((void*)thd) || thd->is_fatal_error) + { + free_items(thd->free_list); + goto end; + } + part_info= lex.part_info; + table->part_info= part_info; + table->file->set_part_info(part_info); + if (part_info->default_engine_type == NULL) + part_info->default_engine_type= default_db_type; + else + { + DBUG_ASSERT(part_info->default_engine_type == default_db_type); + } + part_info->item_free_list= thd->free_list; + + { + /* + This code part allocates memory for the serialised item information for + the partition functions. In most cases this is not needed but if the + table is used for SHOW CREATE TABLES or ALTER TABLE that modifies + partition information it is needed and the info is lost if we don't + save it here so unfortunately we have to do it here even if in most + cases it is not needed. This is a consequence of that item trees are + not serialisable. + */ + uint part_func_len= part_info->part_func_len; + uint subpart_func_len= part_info->subpart_func_len; + uint bitmap_bits= part_info->no_subparts? + (part_info->no_subparts* part_info->no_parts): + part_info->no_parts; + uint bitmap_bytes= bitmap_buffer_size(bitmap_bits); + uint32 *bitmap_buf; + char *part_func_string, *subpart_func_string= NULL; + if (!((part_func_string= thd->alloc(part_func_len))) || + (subpart_func_len && + !((subpart_func_string= thd->alloc(subpart_func_len)))) || + !((bitmap_buf= (uint32*)thd->alloc(bitmap_bytes)))) + { + my_error(ER_OUTOFMEMORY, MYF(0), part_func_len); + free_items(thd->free_list); + part_info->item_free_list= 0; + goto end; + } + memcpy(part_func_string, part_info->part_func_string, part_func_len); + if (subpart_func_len) + memcpy(subpart_func_string, part_info->subpart_func_string, + subpart_func_len); + part_info->part_func_string= part_func_string; + part_info->subpart_func_string= subpart_func_string; + + bitmap_init(&part_info->used_partitions, bitmap_buf, bitmap_bytes*8, FALSE); + } + + result= FALSE; +end: + thd->free_list= thd_free_list; + thd->lex= old_lex; + DBUG_RETURN(result); +} +#endif + +/* + Prepare for calling val_int on partition function by setting fields to + point to the record where the values of the PF-fields are stored. + SYNOPSIS + set_field_ptr() + ptr Array of fields to change ptr + new_buf New record pointer + old_buf Old record pointer + DESCRIPTION + Set ptr in field objects of field array to refer to new_buf record + instead of previously old_buf. Used before calling val_int and after + it is used to restore pointers to table->record[0]. + This routine is placed outside of partition code since it can be useful + also for other programs. +*/ + +void set_field_ptr(Field **ptr, const byte *new_buf, + const byte *old_buf) +{ + my_ptrdiff_t diff= (new_buf - old_buf); + DBUG_ENTER("set_nullable_field_ptr"); + + do + { + (*ptr)->move_field_offset(diff); + } while (*(++ptr)); + DBUG_VOID_RETURN; +} + + +/* + Prepare for calling val_int on partition function by setting fields to + point to the record where the values of the PF-fields are stored. + This variant works on a key_part reference. + It is not required that all fields are NOT NULL fields. + SYNOPSIS + set_key_field_ptr() + key_part key part with a set of fields to change ptr + new_buf New record pointer + old_buf Old record pointer + DESCRIPTION + Set ptr in field objects of field array to refer to new_buf record + instead of previously old_buf. Used before calling val_int and after + it is used to restore pointers to table->record[0]. + This routine is placed outside of partition code since it can be useful + also for other programs. +*/ + +void set_key_field_ptr(KEY *key_info, const byte *new_buf, + const byte *old_buf) +{ + KEY_PART_INFO *key_part= key_info->key_part; + uint key_parts= key_info->key_parts, i= 0; + my_ptrdiff_t diff= (new_buf - old_buf); + DBUG_ENTER("set_key_field_ptr"); + + do + { + key_part->field->move_field_offset(diff); + key_part++; + } while (++i < key_parts); + DBUG_VOID_RETURN; +} + + +/* + Fill the string comma-separated line of used partitions names + SYNOPSIS + make_used_partitions_str() + part_info IN Partitioning info + parts_str OUT The string to fill +*/ + +void make_used_partitions_str(partition_info *part_info, String *parts_str) +{ + parts_str->length(0); + partition_element *pe; + uint partition_id= 0; + List_iterator<partition_element> it(part_info->partitions); + + if (part_info->subpart_type != NOT_A_PARTITION) + { + partition_element *head_pe; + while ((head_pe= it++)) + { + List_iterator<partition_element> it2(head_pe->subpartitions); + while ((pe= it2++)) + { + if (bitmap_is_set(&part_info->used_partitions, partition_id)) + { + if (parts_str->length()) + parts_str->append(','); + parts_str->append(head_pe->partition_name, + strlen(head_pe->partition_name), + system_charset_info); + parts_str->append('_'); + parts_str->append(pe->partition_name, + strlen(pe->partition_name), + system_charset_info); + } + partition_id++; + } + } + } + else + { + while ((pe= it++)) + { + if (bitmap_is_set(&part_info->used_partitions, partition_id)) + { + if (parts_str->length()) + parts_str->append(','); + parts_str->append(pe->partition_name, strlen(pe->partition_name), + system_charset_info); + } + partition_id++; + } + } +} + diff --git a/sql/sql_plugin.cc b/sql/sql_plugin.cc new file mode 100644 index 00000000000..3063f665564 --- /dev/null +++ b/sql/sql_plugin.cc @@ -0,0 +1,820 @@ +/* Copyright (C) 2005 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "mysql_priv.h" +#include <my_pthread.h> +#define REPORT_TO_LOG 1 +#define REPORT_TO_USER 2 + +char *opt_plugin_dir_ptr; +char opt_plugin_dir[FN_REFLEN]; +LEX_STRING plugin_type_names[]= +{ + { (char *)STRING_WITH_LEN("UDF") }, + { (char *)STRING_WITH_LEN("STORAGE ENGINE") }, + { (char *)STRING_WITH_LEN("FTPARSER") } +}; +static const char *plugin_interface_version_sym= + "_mysql_plugin_interface_version_"; +static const char *plugin_declarations_sym= "_mysql_plugin_declarations_"; +static int min_plugin_interface_version= 0x0000; +/* Note that 'int version' must be the first field of every plugin + sub-structure (plugin->info). +*/ +static int min_plugin_info_interface_version[MYSQL_MAX_PLUGIN_TYPE_NUM]= +{ + 0x0000, + 0x0000, + 0x0000 +}; +static int cur_plugin_info_interface_version[MYSQL_MAX_PLUGIN_TYPE_NUM]= +{ + 0x0000, /* UDF: not implemented */ + MYSQL_HANDLERTON_INTERFACE_VERSION, + MYSQL_FTPARSER_INTERFACE_VERSION +}; +static DYNAMIC_ARRAY plugin_dl_array; +static DYNAMIC_ARRAY plugin_array; +static HASH plugin_hash[MYSQL_MAX_PLUGIN_TYPE_NUM]; +static rw_lock_t THR_LOCK_plugin; +static bool initialized= 0; + +static struct st_plugin_dl *plugin_dl_find(LEX_STRING *dl) +{ + uint i; + DBUG_ENTER("plugin_dl_find"); + for (i= 0; i < plugin_dl_array.elements; i++) + { + struct st_plugin_dl *tmp= dynamic_element(&plugin_dl_array, i, + struct st_plugin_dl *); + if (tmp->ref_count && + ! my_strnncoll(files_charset_info, + (const uchar *)dl->str, dl->length, + (const uchar *)tmp->dl.str, tmp->dl.length)) + DBUG_RETURN(tmp); + } + DBUG_RETURN(0); +} + + +static st_plugin_dl *plugin_dl_insert_or_reuse(struct st_plugin_dl *plugin_dl) +{ + uint i; + DBUG_ENTER("plugin_dl_insert_or_reuse"); + for (i= 0; i < plugin_dl_array.elements; i++) + { + struct st_plugin_dl *tmp= dynamic_element(&plugin_dl_array, i, + struct st_plugin_dl *); + if (! tmp->ref_count) + { + memcpy(tmp, plugin_dl, sizeof(struct st_plugin_dl)); + DBUG_RETURN(tmp); + } + } + if (insert_dynamic(&plugin_dl_array, (gptr)plugin_dl)) + DBUG_RETURN(0); + DBUG_RETURN(dynamic_element(&plugin_dl_array, plugin_dl_array.elements - 1, + struct st_plugin_dl *)); +} + + +static st_plugin_dl *plugin_dl_add(LEX_STRING *dl, int report) +{ +#ifdef HAVE_DLOPEN + char dlpath[FN_REFLEN]; + uint plugin_dir_len, dummy_errors; + struct st_plugin_dl *tmp, plugin_dl; + void *sym; + DBUG_ENTER("plugin_dl_add"); + plugin_dir_len= strlen(opt_plugin_dir); + /* + Ensure that the dll doesn't have a path. + This is done to ensure that only approved libraries from the + plugin directory are used (to make this even remotely secure). + */ + if (my_strchr(files_charset_info, dl->str, dl->str + dl->length, FN_LIBCHAR) || + dl->length > NAME_LEN || + plugin_dir_len + dl->length + 1 >= FN_REFLEN) + { + if (report & REPORT_TO_USER) + my_error(ER_UDF_NO_PATHS, MYF(0)); + if (report & REPORT_TO_LOG) + sql_print_error(ER(ER_UDF_NO_PATHS)); + DBUG_RETURN(0); + } + /* If this dll is already loaded just increase ref_count. */ + if ((tmp= plugin_dl_find(dl))) + { + tmp->ref_count++; + DBUG_RETURN(tmp); + } + /* Compile dll path */ + strxnmov(dlpath, sizeof(dlpath) - 1, opt_plugin_dir, "/", dl->str, NullS); + plugin_dl.ref_count= 1; + /* Open new dll handle */ + if (!(plugin_dl.handle= dlopen(dlpath, RTLD_NOW))) + { + if (report & REPORT_TO_USER) + my_error(ER_CANT_OPEN_LIBRARY, MYF(0), dlpath, errno, dlerror()); + if (report & REPORT_TO_LOG) + sql_print_error(ER(ER_CANT_OPEN_LIBRARY), dlpath, errno, dlerror()); + DBUG_RETURN(0); + } + /* Determine interface version */ + if (!(sym= dlsym(plugin_dl.handle, plugin_interface_version_sym))) + { + dlclose(plugin_dl.handle); + if (report & REPORT_TO_USER) + my_error(ER_CANT_FIND_DL_ENTRY, MYF(0), plugin_interface_version_sym); + if (report & REPORT_TO_LOG) + sql_print_error(ER(ER_CANT_FIND_DL_ENTRY), plugin_interface_version_sym); + DBUG_RETURN(0); + } + plugin_dl.version= *(int *)sym; + /* Versioning */ + if (plugin_dl.version < min_plugin_interface_version || + (plugin_dl.version >> 8) > (MYSQL_PLUGIN_INTERFACE_VERSION >> 8)) + { + dlclose(plugin_dl.handle); + if (report & REPORT_TO_USER) + my_error(ER_CANT_OPEN_LIBRARY, MYF(0), dlpath, 0, + "plugin interface version mismatch"); + if (report & REPORT_TO_LOG) + sql_print_error(ER(ER_CANT_OPEN_LIBRARY), dlpath, 0, + "plugin interface version mismatch"); + DBUG_RETURN(0); + } + /* Find plugin declarations */ + if (!(sym= dlsym(plugin_dl.handle, plugin_declarations_sym))) + { + dlclose(plugin_dl.handle); + if (report & REPORT_TO_USER) + my_error(ER_CANT_FIND_DL_ENTRY, MYF(0), plugin_declarations_sym); + if (report & REPORT_TO_LOG) + sql_print_error(ER(ER_CANT_FIND_DL_ENTRY), plugin_declarations_sym); + DBUG_RETURN(0); + } + plugin_dl.plugins= (struct st_mysql_plugin *)sym; + /* Duplicate and convert dll name */ + plugin_dl.dl.length= dl->length * files_charset_info->mbmaxlen + 1; + if (! (plugin_dl.dl.str= my_malloc(plugin_dl.dl.length, MYF(0)))) + { + dlclose(plugin_dl.handle); + if (report & REPORT_TO_USER) + my_error(ER_OUTOFMEMORY, MYF(0), plugin_dl.dl.length); + if (report & REPORT_TO_LOG) + sql_print_error(ER(ER_OUTOFMEMORY), plugin_dl.dl.length); + DBUG_RETURN(0); + } + plugin_dl.dl.length= copy_and_convert(plugin_dl.dl.str, plugin_dl.dl.length, + files_charset_info, dl->str, dl->length, system_charset_info, + &dummy_errors); + plugin_dl.dl.str[plugin_dl.dl.length]= 0; + /* Add this dll to array */ + if (! (tmp= plugin_dl_insert_or_reuse(&plugin_dl))) + { + dlclose(plugin_dl.handle); + my_free(plugin_dl.dl.str, MYF(0)); + if (report & REPORT_TO_USER) + my_error(ER_OUTOFMEMORY, MYF(0), sizeof(struct st_plugin_dl)); + if (report & REPORT_TO_LOG) + sql_print_error(ER(ER_OUTOFMEMORY), sizeof(struct st_plugin_dl)); + DBUG_RETURN(0); + } + DBUG_RETURN(tmp); +#else + DBUG_ENTER("plugin_dl_add"); + if (report & REPORT_TO_USER) + my_error(ER_FEATURE_DISABLED, MYF(0), "plugin", "HAVE_DLOPEN"); + if (report & REPORT_TO_LOG) + sql_print_error(ER(ER_FEATURE_DISABLED), "plugin", "HAVE_DLOPEN"); + DBUG_RETURN(0); +#endif +} + + +static void plugin_dl_del(LEX_STRING *dl) +{ +#ifdef HAVE_DLOPEN + uint i; + DBUG_ENTER("plugin_dl_del"); + for (i= 0; i < plugin_dl_array.elements; i++) + { + struct st_plugin_dl *tmp= dynamic_element(&plugin_dl_array, i, + struct st_plugin_dl *); + if (tmp->ref_count && + ! my_strnncoll(files_charset_info, + (const uchar *)dl->str, dl->length, + (const uchar *)tmp->dl.str, tmp->dl.length)) + { + /* Do not remove this element, unless no other plugin uses this dll. */ + if (! --tmp->ref_count) + { + dlclose(tmp->handle); + my_free(tmp->dl.str, MYF(0)); + bzero(tmp, sizeof(struct st_plugin_dl)); + } + break; + } + } + DBUG_VOID_RETURN; +#endif +} + + +static struct st_plugin_int *plugin_find_internal(LEX_STRING *name, int type) +{ + uint i; + DBUG_ENTER("plugin_find_internal"); + if (! initialized) + DBUG_RETURN(0); + if (type == MYSQL_ANY_PLUGIN) + { + for (i= 0; i < MYSQL_MAX_PLUGIN_TYPE_NUM; i++) + { + struct st_plugin_int *plugin= (st_plugin_int *) + hash_search(&plugin_hash[i], (const byte *)name->str, name->length); + if (plugin) + DBUG_RETURN(plugin); + } + } + else + DBUG_RETURN((st_plugin_int *) + hash_search(&plugin_hash[type], (const byte *)name->str, name->length)); + DBUG_RETURN(0); +} + + +my_bool plugin_is_ready(LEX_STRING *name, int type) +{ + my_bool rc= FALSE; + struct st_plugin_int *plugin; + DBUG_ENTER("plugin_is_ready"); + rw_rdlock(&THR_LOCK_plugin); + if ((plugin= plugin_find_internal(name, type)) && + plugin->state == PLUGIN_IS_READY) + rc= TRUE; + rw_unlock(&THR_LOCK_plugin); + DBUG_RETURN(rc); +} + + +struct st_plugin_int *plugin_lock(LEX_STRING *name, int type) +{ + struct st_plugin_int *rc; + DBUG_ENTER("plugin_lock"); + rw_wrlock(&THR_LOCK_plugin); + if ((rc= plugin_find_internal(name, type))) + { + if (rc->state == PLUGIN_IS_READY) + rc->ref_count++; + else + rc= 0; + } + rw_unlock(&THR_LOCK_plugin); + DBUG_RETURN(rc); +} + + +static st_plugin_int *plugin_insert_or_reuse(struct st_plugin_int *plugin) +{ + uint i; + DBUG_ENTER("plugin_insert_or_reuse"); + for (i= 0; i < plugin_array.elements; i++) + { + struct st_plugin_int *tmp= dynamic_element(&plugin_array, i, + struct st_plugin_int *); + if (tmp->state == PLUGIN_IS_FREED) + { + memcpy(tmp, plugin, sizeof(struct st_plugin_int)); + DBUG_RETURN(tmp); + } + } + if (insert_dynamic(&plugin_array, (gptr)plugin)) + DBUG_RETURN(0); + DBUG_RETURN(dynamic_element(&plugin_array, plugin_array.elements - 1, + struct st_plugin_int *)); +} + + +static my_bool plugin_add(LEX_STRING *name, LEX_STRING *dl, int report) +{ + struct st_plugin_int tmp; + struct st_mysql_plugin *plugin; + DBUG_ENTER("plugin_add"); + if (plugin_find_internal(name, MYSQL_ANY_PLUGIN)) + { + if (report & REPORT_TO_USER) + my_error(ER_UDF_EXISTS, MYF(0), name->str); + if (report & REPORT_TO_LOG) + sql_print_error(ER(ER_UDF_EXISTS), name->str); + DBUG_RETURN(TRUE); + } + if (! (tmp.plugin_dl= plugin_dl_add(dl, report))) + DBUG_RETURN(TRUE); + /* Find plugin by name */ + for (plugin= tmp.plugin_dl->plugins; plugin->info; plugin++) + { + uint name_len= strlen(plugin->name); + if (plugin->type >= 0 && plugin->type < MYSQL_MAX_PLUGIN_TYPE_NUM && + ! my_strnncoll(system_charset_info, + (const uchar *)name->str, name->length, + (const uchar *)plugin->name, + name_len)) + { + struct st_plugin_int *tmp_plugin_ptr; + if (*(int*)plugin->info < + min_plugin_info_interface_version[plugin->type] || + ((*(int*)plugin->info) >> 8) > + (cur_plugin_info_interface_version[plugin->type] >> 8)) + { + char buf[256]; + strxnmov(buf, sizeof(buf) - 1, "API version for ", + plugin_type_names[plugin->type].str, + " plugin is too different", NullS); + if (report & REPORT_TO_USER) + my_error(ER_CANT_OPEN_LIBRARY, MYF(0), dl->str, 0, buf); + if (report & REPORT_TO_LOG) + sql_print_error(ER(ER_CANT_OPEN_LIBRARY), dl->str, 0, buf); + goto err; + } + tmp.plugin= plugin; + tmp.name.str= (char *)plugin->name; + tmp.name.length= name_len; + tmp.ref_count= 0; + tmp.state= PLUGIN_IS_UNINITIALIZED; + if (! (tmp_plugin_ptr= plugin_insert_or_reuse(&tmp))) + { + if (report & REPORT_TO_USER) + my_error(ER_OUTOFMEMORY, MYF(0), sizeof(struct st_plugin_int)); + if (report & REPORT_TO_LOG) + sql_print_error(ER(ER_OUTOFMEMORY), sizeof(struct st_plugin_int)); + goto err; + } + if (my_hash_insert(&plugin_hash[plugin->type], (byte*)tmp_plugin_ptr)) + { + tmp_plugin_ptr->state= PLUGIN_IS_FREED; + if (report & REPORT_TO_USER) + my_error(ER_OUTOFMEMORY, MYF(0), sizeof(struct st_plugin_int)); + if (report & REPORT_TO_LOG) + sql_print_error(ER(ER_OUTOFMEMORY), sizeof(struct st_plugin_int)); + goto err; + } + DBUG_RETURN(FALSE); + } + } + if (report & REPORT_TO_USER) + my_error(ER_CANT_FIND_DL_ENTRY, MYF(0), name->str); + if (report & REPORT_TO_LOG) + sql_print_error(ER(ER_CANT_FIND_DL_ENTRY), name->str); +err: + plugin_dl_del(dl); + DBUG_RETURN(TRUE); +} + + +static void plugin_del(LEX_STRING *name) +{ + uint i; + struct st_plugin_int *plugin; + DBUG_ENTER("plugin_del"); + if ((plugin= plugin_find_internal(name, MYSQL_ANY_PLUGIN))) + { + hash_delete(&plugin_hash[plugin->plugin->type], (byte*)plugin); + plugin_dl_del(&plugin->plugin_dl->dl); + plugin->state= PLUGIN_IS_FREED; + } + DBUG_VOID_RETURN; +} + + +void plugin_unlock(struct st_plugin_int *plugin) +{ + DBUG_ENTER("plugin_unlock"); + rw_wrlock(&THR_LOCK_plugin); + DBUG_ASSERT(plugin && plugin->ref_count); + plugin->ref_count--; + if (plugin->state == PLUGIN_IS_DELETED && ! plugin->ref_count) + { + if (plugin->plugin->deinit) + plugin->plugin->deinit(); + plugin_del(&plugin->name); + } + rw_unlock(&THR_LOCK_plugin); + DBUG_VOID_RETURN; +} + + +static int plugin_initialize(struct st_plugin_int *plugin) +{ + DBUG_ENTER("plugin_initialize"); + + if (plugin->plugin->init) + { + if (plugin->plugin->init()) + { + sql_print_error("Plugin '%s' init function returned error.", + plugin->name.str); + DBUG_PRINT("warning", ("Plugin '%s' init function returned error.", + plugin->name.str)) + goto err; + } + } + + switch (plugin->plugin->type) + { + case MYSQL_STORAGE_ENGINE_PLUGIN: + if (ha_initialize_handlerton((handlerton*) plugin->plugin->info)) + { + sql_print_error("Plugin '%s' handlerton init returned error.", + plugin->name.str); + DBUG_PRINT("warning", ("Plugin '%s' handlerton init returned error.", + plugin->name.str)) + goto err; + } + break; + default: + break; + } + + DBUG_RETURN(0); +err: + DBUG_RETURN(1); +} + +static void plugin_call_initializer(void) +{ + uint i; + DBUG_ENTER("plugin_call_initializer"); + for (i= 0; i < plugin_array.elements; i++) + { + struct st_plugin_int *tmp= dynamic_element(&plugin_array, i, + struct st_plugin_int *); + if (tmp->state == PLUGIN_IS_UNINITIALIZED) + { + if (plugin_initialize(tmp)) + plugin_del(&tmp->name); + else + tmp->state= PLUGIN_IS_READY; + } + } + DBUG_VOID_RETURN; +} + + +static void plugin_call_deinitializer(void) +{ + uint i; + DBUG_ENTER("plugin_call_deinitializer"); + for (i= 0; i < plugin_array.elements; i++) + { + struct st_plugin_int *tmp= dynamic_element(&plugin_array, i, + struct st_plugin_int *); + if (tmp->state == PLUGIN_IS_READY) + { + if (tmp->plugin->deinit) + { + DBUG_PRINT("info", ("Deinitializing plugin: '%s'", tmp->name.str)); + if (tmp->plugin->deinit()) + { + DBUG_PRINT("warning", ("Plugin '%s' deinit function returned error.", + tmp->name.str)) + } + } + tmp->state= PLUGIN_IS_UNINITIALIZED; + } + } + DBUG_VOID_RETURN; +} + + +static byte *get_hash_key(const byte *buff, uint *length, + my_bool not_used __attribute__((unused))) +{ + struct st_plugin_int *plugin= (st_plugin_int *)buff; + *length= (uint)plugin->name.length; + return((byte *)plugin->name.str); +} + + +int plugin_init(void) +{ + int i; + DBUG_ENTER("plugin_init"); + + if (initialized) + DBUG_RETURN(0); + + my_rwlock_init(&THR_LOCK_plugin, NULL); + + if (my_init_dynamic_array(&plugin_dl_array, + sizeof(struct st_plugin_dl),16,16) || + my_init_dynamic_array(&plugin_array, + sizeof(struct st_plugin_int),16,16)) + goto err; + + for (i= 0; i < MYSQL_MAX_PLUGIN_TYPE_NUM; i++) + { + if (hash_init(&plugin_hash[i], system_charset_info, 16, 0, 0, + get_hash_key, NULL, 0)) + goto err; + } + + initialized= 1; + + DBUG_RETURN(0); + +err: + DBUG_RETURN(1); +} + + +my_bool plugin_register_builtin(struct st_mysql_plugin *plugin) +{ + struct st_plugin_int tmp; + DBUG_ENTER("plugin_register_builtin"); + + tmp.plugin= plugin; + tmp.name.str= (char *)plugin->name; + tmp.name.length= strlen(plugin->name); + tmp.state= PLUGIN_IS_UNINITIALIZED; + + /* Cannot be unloaded */ + tmp.ref_count= 1; + tmp.plugin_dl= 0; + + if (insert_dynamic(&plugin_array, (gptr)&tmp)) + DBUG_RETURN(1); + + if (my_hash_insert(&plugin_hash[plugin->type], + (byte*)dynamic_element(&plugin_array, + plugin_array.elements - 1, + struct st_plugin_int *))) + DBUG_RETURN(1); + + DBUG_RETURN(0); +} + + +void plugin_load(void) +{ + TABLE_LIST tables; + TABLE *table; + READ_RECORD read_record_info; + int error, i; + MEM_ROOT mem; + THD *new_thd; + DBUG_ENTER("plugin_load"); + + DBUG_ASSERT(initialized); + + if (!(new_thd= new THD)) + { + sql_print_error("Can't allocate memory for plugin structures"); + delete new_thd; + DBUG_VOID_RETURN; + } + init_sql_alloc(&mem, 1024, 0); + new_thd->thread_stack= (char*) &tables; + new_thd->store_globals(); + new_thd->db= my_strdup("mysql", MYF(0)); + new_thd->db_length= 5; + bzero((gptr)&tables, sizeof(tables)); + tables.alias= tables.table_name= (char*)"plugin"; + tables.lock_type= TL_READ; + tables.db= new_thd->db; + if (simple_open_n_lock_tables(new_thd, &tables)) + { + DBUG_PRINT("error",("Can't open plugin table")); + sql_print_error("Can't open the mysql.plugin table. Please run the mysql_install_db script to create it."); + goto end; + } + table= tables.table; + init_read_record(&read_record_info, new_thd, table, NULL, 1, 0); + while (!(error= read_record_info.read_record(&read_record_info))) + { + DBUG_PRINT("info", ("init plugin record")); + LEX_STRING name, dl; + name.str= get_field(&mem, table->field[0]); + name.length= strlen(name.str); + dl.str= get_field(&mem, table->field[1]); + dl.length= strlen(dl.str); + if (plugin_add(&name, &dl, REPORT_TO_LOG)) + DBUG_PRINT("warning", ("Couldn't load plugin named '%s' with soname '%s'.", + name.str, dl.str)); + } + plugin_call_initializer(); + if (error > 0) + sql_print_error(ER(ER_GET_ERRNO), my_errno); + end_read_record(&read_record_info); + new_thd->version--; // Force close to free memory +end: + free_root(&mem, MYF(0)); + close_thread_tables(new_thd); + delete new_thd; + /* Remember that we don't have a THD */ + my_pthread_setspecific_ptr(THR_THD, 0); + DBUG_VOID_RETURN; +} + + +void plugin_free(void) +{ + uint i; + DBUG_ENTER("plugin_free"); + plugin_call_deinitializer(); + for (i= 0; i < MYSQL_MAX_PLUGIN_TYPE_NUM; i++) + hash_free(&plugin_hash[i]); + delete_dynamic(&plugin_array); + for (i= 0; i < plugin_dl_array.elements; i++) + { + struct st_plugin_dl *tmp= dynamic_element(&plugin_dl_array, i, + struct st_plugin_dl *); +#ifdef HAVE_DLOPEN + if (tmp->handle) + { + dlclose(tmp->handle); + my_free(tmp->dl.str, MYF(0)); + } +#endif + } + delete_dynamic(&plugin_dl_array); + if (initialized) + { + initialized= 0; + rwlock_destroy(&THR_LOCK_plugin); + } + DBUG_VOID_RETURN; +} + + +my_bool mysql_install_plugin(THD *thd, LEX_STRING *name, LEX_STRING *dl) +{ + TABLE_LIST tables; + TABLE *table; + int error; + struct st_plugin_int *tmp; + DBUG_ENTER("mysql_install_plugin"); + + bzero(&tables, sizeof(tables)); + tables.db= (char *)"mysql"; + tables.table_name= tables.alias= (char *)"plugin"; + if (check_table_access(thd, INSERT_ACL, &tables, 0)) + DBUG_RETURN(TRUE); + + /* need to open before acquiring THR_LOCK_plugin or it will deadlock */ + if (! (table = open_ltable(thd, &tables, TL_WRITE))) + DBUG_RETURN(TRUE); + + rw_wrlock(&THR_LOCK_plugin); + if (plugin_add(name, dl, REPORT_TO_USER)) + goto err; + tmp= plugin_find_internal(name, MYSQL_ANY_PLUGIN); + + if (plugin_initialize(tmp)) + { + my_error(ER_CANT_INITIALIZE_UDF, MYF(0), name->str, + "Plugin initialization function failed."); + goto err; + } + + tmp->state= PLUGIN_IS_READY; + + restore_record(table, s->default_values); + table->field[0]->store(name->str, name->length, system_charset_info); + table->field[1]->store(dl->str, dl->length, files_charset_info); + error= table->file->ha_write_row(table->record[0]); + if (error) + { + table->file->print_error(error, MYF(0)); + goto deinit; + } + + rw_unlock(&THR_LOCK_plugin); + DBUG_RETURN(FALSE); +deinit: + if (tmp->plugin->deinit) + tmp->plugin->deinit(); +err: + plugin_del(name); + rw_unlock(&THR_LOCK_plugin); + DBUG_RETURN(TRUE); +} + + +my_bool mysql_uninstall_plugin(THD *thd, LEX_STRING *name) +{ + TABLE *table; + TABLE_LIST tables; + struct st_plugin_int *plugin; + DBUG_ENTER("mysql_uninstall_plugin"); + + bzero(&tables, sizeof(tables)); + tables.db= (char *)"mysql"; + tables.table_name= tables.alias= (char *)"plugin"; + + /* need to open before acquiring THR_LOCK_plugin or it will deadlock */ + if (! (table= open_ltable(thd, &tables, TL_WRITE))) + DBUG_RETURN(TRUE); + + rw_wrlock(&THR_LOCK_plugin); + if (!(plugin= plugin_find_internal(name, MYSQL_ANY_PLUGIN))) + { + my_error(ER_SP_DOES_NOT_EXIST, MYF(0), "PLUGIN", name->str); + goto err; + } + if (!plugin->plugin_dl) + { + push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, 0, + "Built-in plugins cannot be deleted,."); + my_error(ER_SP_DOES_NOT_EXIST, MYF(0), "PLUGIN", name->str); + goto err; + } + + if (plugin->ref_count) + { + plugin->state= PLUGIN_IS_DELETED; + push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, 0, + "Plugin is not deleted, waiting on tables."); + } + else + { + if (plugin->plugin->deinit) + plugin->plugin->deinit(); + plugin_del(name); + } + + table->field[0]->store(name->str, name->length, system_charset_info); + table->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS); + if (! table->file->index_read_idx(table->record[0], 0, + (byte *)table->field[0]->ptr, + table->key_info[0].key_length, + HA_READ_KEY_EXACT)) + { + int error; + if ((error= table->file->ha_delete_row(table->record[0]))) + { + table->file->print_error(error, MYF(0)); + goto err; + } + } + rw_unlock(&THR_LOCK_plugin); + DBUG_RETURN(FALSE); +err: + rw_unlock(&THR_LOCK_plugin); + DBUG_RETURN(TRUE); +} + + +my_bool plugin_foreach(THD *thd, plugin_foreach_func *func, + int type, void *arg) +{ + uint idx; + struct st_plugin_int *plugin; + DBUG_ENTER("mysql_uninstall_plugin"); + rw_rdlock(&THR_LOCK_plugin); + + if (type == MYSQL_ANY_PLUGIN) + { + for (idx= 0; idx < plugin_array.elements; idx++) + { + plugin= dynamic_element(&plugin_array, idx, struct st_plugin_int *); + + /* FREED records may have garbage pointers */ + if ((plugin->state != PLUGIN_IS_FREED) && + func(thd, plugin, arg)) + goto err; + } + } + else + { + HASH *hash= &plugin_hash[type]; + for (idx= 0; idx < hash->records; idx++) + { + plugin= (struct st_plugin_int *) hash_element(hash, idx); + if ((plugin->state != PLUGIN_IS_FREED) && + (plugin->state != PLUGIN_IS_DELETED) && + func(thd, plugin, arg)) + goto err; + } + } + + rw_unlock(&THR_LOCK_plugin); + DBUG_RETURN(FALSE); +err: + rw_unlock(&THR_LOCK_plugin); + DBUG_RETURN(TRUE); +} diff --git a/sql/sql_plugin.h b/sql/sql_plugin.h new file mode 100644 index 00000000000..f0b70ca9d26 --- /dev/null +++ b/sql/sql_plugin.h @@ -0,0 +1,73 @@ +/* Copyright (C) 2005 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef _sql_plugin_h +#define _sql_plugin_h + +#include <plugin.h> + +#define MYSQL_ANY_PLUGIN -1 + +enum enum_plugin_state +{ + PLUGIN_IS_FREED= 0, + PLUGIN_IS_DELETED, + PLUGIN_IS_UNINITIALIZED, + PLUGIN_IS_READY +}; + +/* A handle for the dynamic library containing a plugin or plugins. */ + +struct st_plugin_dl +{ + LEX_STRING dl; + void *handle; + struct st_mysql_plugin *plugins; + int version; + uint ref_count; /* number of plugins loaded from the library */ +}; + +/* A handle of a plugin */ + +struct st_plugin_int +{ + LEX_STRING name; + struct st_mysql_plugin *plugin; + struct st_plugin_dl *plugin_dl; + enum enum_plugin_state state; + uint ref_count; /* number of threads using the plugin */ +}; + +extern char *opt_plugin_dir_ptr; +extern char opt_plugin_dir[FN_REFLEN]; +extern LEX_STRING plugin_type_names[]; +extern int plugin_init(void); +extern void plugin_load(void); +extern void plugin_free(void); +extern my_bool plugin_is_ready(LEX_STRING *name, int type); +extern st_plugin_int *plugin_lock(LEX_STRING *name, int type); +extern void plugin_unlock(struct st_plugin_int *plugin); +extern my_bool mysql_install_plugin(THD *thd, LEX_STRING *name, LEX_STRING *dl); +extern my_bool mysql_uninstall_plugin(THD *thd, LEX_STRING *name); + +extern my_bool plugin_register_builtin(struct st_mysql_plugin *plugin); + +typedef my_bool (plugin_foreach_func)(THD *thd, + st_plugin_int *plugin, + void *arg); +extern my_bool plugin_foreach(THD *thd, plugin_foreach_func *func, + int type, void *arg); +#endif diff --git a/sql/sql_prepare.cc b/sql/sql_prepare.cc index ffbbf0c6476..7b35f057217 100644 --- a/sql/sql_prepare.cc +++ b/sql/sql_prepare.cc @@ -1726,7 +1726,9 @@ static bool check_prepared_statement(Prepared_statement *stmt, case SQLCOM_SHOW_COLUMN_TYPES: case SQLCOM_SHOW_STATUS: case SQLCOM_SHOW_VARIABLES: - case SQLCOM_SHOW_LOGS: + case SQLCOM_SHOW_ENGINE_LOGS: + case SQLCOM_SHOW_ENGINE_STATUS: + case SQLCOM_SHOW_ENGINE_MUTEX: case SQLCOM_SHOW_TABLES: case SQLCOM_SHOW_OPEN_TABLES: case SQLCOM_SHOW_CHARSETS: @@ -2740,11 +2742,11 @@ bool Prepared_statement::prepare(const char *packet, uint packet_len) old_stmt_arena= thd->stmt_arena; thd->stmt_arena= this; lex_start(thd, (uchar*) thd->query, thd->query_length); - lex->safe_to_cache_query= FALSE; lex->stmt_prepare_mode= TRUE; error= yyparse((void *)thd) || thd->is_fatal_error || thd->net.report_error || init_param_array(this); + lex->safe_to_cache_query= FALSE; /* While doing context analysis of the query (in check_prepared_statement) we allocate a lot of additional memory: for open tables, JOINs, derived diff --git a/sql/sql_rename.cc b/sql/sql_rename.cc index 80fcb973028..150c1dba1c9 100644 --- a/sql/sql_rename.cc +++ b/sql/sql_rename.cc @@ -84,8 +84,8 @@ bool mysql_rename_tables(THD *thd, TABLE_LIST *table_list) if (mysql_bin_log.is_open()) { thd->clear_error(); - Query_log_event qinfo(thd, thd->query, thd->query_length, 0, FALSE); - mysql_bin_log.write(&qinfo); + thd->binlog_query(THD::STMT_QUERY_TYPE, + thd->query, thd->query_length, FALSE, FALSE); } send_ok(thd); } @@ -134,7 +134,7 @@ rename_tables(THD *thd, TABLE_LIST *table_list, bool skip_error) { TABLE_LIST *ren_table,*new_table; frm_type_enum frm_type; - db_type table_type; + enum legacy_db_type table_type; DBUG_ENTER("rename_tables"); @@ -155,18 +155,15 @@ rename_tables(THD *thd, TABLE_LIST *table_list, bool skip_error) old_alias= ren_table->table_name; new_alias= new_table->table_name; } - sprintf(name,"%s/%s/%s%s",mysql_data_home, - new_table->db, new_alias, reg_ext); - unpack_filename(name, name); + build_table_filename(name, sizeof(name), + new_table->db, new_alias, reg_ext); if (!access(name,F_OK)) { my_error(ER_TABLE_EXISTS_ERROR, MYF(0), new_alias); DBUG_RETURN(ren_table); // This can't be skipped } - sprintf(name,"%s/%s/%s%s",mysql_data_home, - ren_table->db, old_alias, - reg_ext); - unpack_filename(name, name); + build_table_filename(name, sizeof(name), + ren_table->db, old_alias, reg_ext); frm_type= mysql_frm_type(thd, name, &table_type); switch (frm_type) @@ -176,7 +173,8 @@ rename_tables(THD *thd, TABLE_LIST *table_list, bool skip_error) if (table_type == DB_TYPE_UNKNOWN) my_error(ER_FILE_NOT_FOUND, MYF(0), name, my_errno); else - rc= mysql_rename_table(table_type, ren_table->db, old_alias, + rc= mysql_rename_table(ha_resolve_by_legacy_type(thd, table_type), + ren_table->db, old_alias, new_table->db, new_alias); break; } diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc index cd293fc21c7..dd70f90b3da 100644 --- a/sql/sql_repl.cc +++ b/sql/sql_repl.cc @@ -19,6 +19,7 @@ #include "sql_repl.h" #include "log_event.h" +#include "rpl_filter.h" #include <my_dir.h> int max_binlog_dump_events = 0; // unlimited @@ -1457,8 +1458,8 @@ bool show_binlog_info(THD* thd) int dir_len = dirname_length(li.log_file_name); protocol->store(li.log_file_name + dir_len, &my_charset_bin); protocol->store((ulonglong) li.pos); - protocol->store(&binlog_do_db); - protocol->store(&binlog_ignore_db); + protocol->store(binlog_filter->get_do_db()); + protocol->store(binlog_filter->get_ignore_db()); if (protocol->write()) DBUG_RETURN(TRUE); } diff --git a/sql/sql_repl.h b/sql/sql_repl.h index 9eb6456ee20..789de64da85 100644 --- a/sql/sql_repl.h +++ b/sql/sql_repl.h @@ -14,6 +14,8 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include "rpl_filter.h" + #ifdef HAVE_REPLICATION #include "slave.h" @@ -31,7 +33,6 @@ typedef struct st_slave_info extern my_bool opt_show_slave_auth_info; extern char *master_host, *master_info_file; extern bool server_id_supplied; -extern I_List<i_string> binlog_do_db, binlog_ignore_db; extern int max_binlog_dump_events; extern my_bool opt_sporadic_binlog_dump_fail; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index ad6375290cd..8c954297f42 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -633,6 +633,21 @@ JOIN::optimize() DBUG_RETURN(0); } +#ifdef WITH_PARTITION_STORAGE_ENGINE + { + TABLE_LIST *tbl; + for (tbl= select_lex->leaf_tables; tbl; tbl= tbl->next_leaf) + { + if (!tbl->embedding) + { + Item *prune_cond= tbl->on_expr? tbl->on_expr : conds; + tbl->table->no_partitions_used= prune_partitions(thd, tbl->table, + prune_cond); + } + } + } +#endif + /* Optimize count(*), min() and max() */ if (tables_list && tmp_table_param.sum_func_count && ! group_list) { @@ -961,23 +976,19 @@ JOIN::optimize() } /* - Need to tell Innobase that to play it safe, it should fetch all - columns of the tables: this is because MySQL may build row - pointers for the rows, and for all columns of the primary key the - field->query_id has not necessarily been set to thd->query_id by - MySQL. + Need to tell handlers that to play it safe, it should fetch all + columns of the primary key of the tables: this is because MySQL may + build row pointers for the rows, and for all columns of the primary key + the read set has not necessarily been set by the server code. */ - -#ifdef HAVE_INNOBASE_DB if (need_tmp || select_distinct || group_list || order) { for (uint i_h = const_tables; i_h < tables; i_h++) { TABLE* table_h = join_tab[i_h].table; - table_h->file->extra(HA_EXTRA_RETRIEVE_PRIMARY_KEY); + table_h->file->ha_retrieve_all_pk(); } } -#endif DBUG_EXECUTE("info",TEST_join(this);); @@ -1339,6 +1350,9 @@ JOIN::exec() /* Copy data to the temporary table */ thd->proc_info= "Copying to tmp table"; DBUG_PRINT("info", ("%s", thd->proc_info)); + if (!curr_join->sort_and_group && + curr_join->const_tables != curr_join->tables) + curr_join->join_tab[curr_join->const_tables].sorted= 0; if ((tmp_error= do_select(curr_join, (List<Item> *) 0, curr_tmp_table, 0))) { error= tmp_error; @@ -1494,6 +1508,9 @@ JOIN::exec() 1, TRUE)) DBUG_VOID_RETURN; curr_join->group_list= 0; + if (!curr_join->sort_and_group && + curr_join->const_tables != curr_join->tables) + curr_join->join_tab[curr_join->const_tables].sorted= 0; if (setup_sum_funcs(curr_join->thd, curr_join->sum_funcs) || (tmp_error= do_select(curr_join, (List<Item> *) 0, curr_tmp_table, 0))) @@ -1680,6 +1697,16 @@ JOIN::exec() (select_options & OPTION_FOUND_ROWS ? HA_POS_ERROR : unit->select_limit_cnt))) DBUG_VOID_RETURN; + if (curr_join->const_tables != curr_join->tables && + !curr_join->join_tab[curr_join->const_tables].table->sort.io_cache) + { + /* + If no IO cache exists for the first table then we are using an + INDEX SCAN and no filesort. Thus we should not remove the sorted + attribute on the INDEX SCAN. + */ + skip_sort_order= 1; + } } } /* XXX: When can we have here thd->net.report_error not zero? */ @@ -2006,7 +2033,11 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables, COND *conds, if (*s->on_expr_ref) { /* s is the only inner table of an outer join */ +#ifdef WITH_PARTITION_STORAGE_ENGINE + if ((!table->file->records || table->no_partitions_used) && !embedding) +#else if (!table->file->records && !embedding) +#endif { // Empty table s->dependent= 0; // Ignore LEFT JOIN depend. set_position(join,const_count++,s,(KEYUSE*) 0); @@ -2033,8 +2064,14 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables, COND *conds, while (embedding); continue; } - - if ((table->s->system || table->file->records <= 1) && ! s->dependent && +#ifdef WITH_PARTITION_STORAGE_ENGINE + bool no_partitions_used= table->no_partitions_used; +#else + const bool no_partitions_used= FALSE; +#endif + if ((table->s->system || table->file->records <= 1 || + no_partitions_used) && + !s->dependent && !(table->file->table_flags() & HA_NOT_EXACT_COUNT) && !table->fulltext_searched) { @@ -5086,7 +5123,7 @@ static void add_not_null_conds(JOIN *join) SYNOPSIS add_found_match_trig_cond() tab the first inner table for most nested outer join - cond the predicate to be guarded + cond the predicate to be guarded (must be set) root_tab the first inner table to stop DESCRIPTION @@ -5104,12 +5141,11 @@ static COND* add_found_match_trig_cond(JOIN_TAB *tab, COND *cond, JOIN_TAB *root_tab) { COND *tmp; - if (tab == root_tab || !cond) + DBUG_ASSERT(cond != 0); + if (tab == root_tab) return cond; if ((tmp= add_found_match_trig_cond(tab->first_upper, cond, root_tab))) - { tmp= new Item_func_trig_cond(tmp, &tab->found); - } if (tmp) { tmp->quick_fix_field(); @@ -5270,6 +5306,10 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) for (uint i=join->const_tables ; i < join->tables ; i++) { JOIN_TAB *tab=join->join_tab+i; + /* + first_inner is the X in queries like: + SELECT * FROM t1 LEFT OUTER JOIN (t2 JOIN t3) ON X + */ JOIN_TAB *first_inner_tab= tab->first_inner; table_map current_map= tab->table->map; bool use_quick_range=0; @@ -5320,15 +5360,15 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) */ DBUG_PRINT("info", ("Item_int")); tmp= new Item_int((longlong) 1,1); // Always true - DBUG_PRINT("info", ("Item_int 0x%lx", (ulong)tmp)); } } if (tmp || !cond) { DBUG_EXECUTE("where",print_where(tmp,tab->table->alias);); - SQL_SELECT *sel=tab->select=(SQL_SELECT*) - thd->memdup((gptr) select, sizeof(SQL_SELECT)); + SQL_SELECT *sel= tab->select= ((SQL_SELECT*) + thd->memdup((gptr) select, + sizeof(*select))); if (!sel) DBUG_RETURN(1); // End of memory /* @@ -5555,6 +5595,7 @@ make_join_readinfo(JOIN *join, uint options) uint i; bool statistics= test(!(join->select_options & SELECT_DESCRIBE)); + bool sorted= 1; DBUG_ENTER("make_join_readinfo"); for (i=join->const_tables ; i < join->tables ; i++) @@ -5564,6 +5605,8 @@ make_join_readinfo(JOIN *join, uint options) tab->read_record.table= table; tab->read_record.file=table->file; tab->next_select=sub_select; /* normal select */ + tab->sorted= sorted; + sorted= 0; // only first must be sorted switch (tab->type) { case JT_SYSTEM: // Only happens with left join table->status=STATUS_NO_RECORD; @@ -8016,7 +8059,7 @@ const_expression_in_where(COND *cond, Item *comp_item, Item **const_item) new_created field */ -Field* create_tmp_field_from_field(THD *thd, Field* org_field, +Field *create_tmp_field_from_field(THD *thd, Field *org_field, const char *name, TABLE *table, Item_field *item, uint convert_blob_length) { @@ -8025,12 +8068,14 @@ Field* create_tmp_field_from_field(THD *thd, Field* org_field, if (convert_blob_length && (org_field->flags & BLOB_FLAG)) new_field= new Field_varstring(convert_blob_length, org_field->maybe_null(), - org_field->field_name, table, + org_field->field_name, table->s, org_field->charset()); else new_field= org_field->new_field(thd->mem_root, table); if (new_field) { + new_field->init(table); + new_field->orig_table= org_field->orig_table; if (item) item->result_field= new_field; else @@ -8073,18 +8118,18 @@ static Field *create_tmp_field_from_item(THD *thd, Item *item, TABLE *table, Item ***copy_func, bool modify_item, uint convert_blob_length) { - bool maybe_null=item->maybe_null; + bool maybe_null= item->maybe_null; Field *new_field; LINT_INIT(new_field); switch (item->result_type()) { case REAL_RESULT: - new_field=new Field_double(item->max_length, maybe_null, - item->name, table, item->decimals); + new_field= new Field_double(item->max_length, maybe_null, + item->name, item->decimals); break; case INT_RESULT: - new_field=new Field_longlong(item->max_length, maybe_null, - item->name, table, item->unsigned_flag); + new_field= new Field_longlong(item->max_length, maybe_null, + item->name, item->unsigned_flag); break; case STRING_RESULT: DBUG_ASSERT(item->collation.collation); @@ -8096,26 +8141,29 @@ static Field *create_tmp_field_from_item(THD *thd, Item *item, TABLE *table, */ if ((type= item->field_type()) == MYSQL_TYPE_DATETIME || type == MYSQL_TYPE_TIME || type == MYSQL_TYPE_DATE) - new_field= item->tmp_table_field_from_field_type(table); + new_field= item->tmp_table_field_from_field_type(table, 1); else if (item->max_length/item->collation.collation->mbmaxlen > 255 && convert_blob_length) new_field= new Field_varstring(convert_blob_length, maybe_null, - item->name, table, + item->name, table->s, item->collation.collation); else new_field= item->make_string_field(table); break; case DECIMAL_RESULT: new_field= new Field_new_decimal(item->max_length, maybe_null, item->name, - table, item->decimals, item->unsigned_flag); + item->decimals, item->unsigned_flag); break; case ROW_RESULT: default: // This case should never be choosen DBUG_ASSERT(0); - new_field= 0; // to satisfy compiler (uninitialized variable) + new_field= 0; break; } + if (new_field) + new_field->init(table); + if (copy_func && item->is_result_field()) *((*copy_func)++) = item; // Save for copy_funcs if (modify_item) @@ -8142,14 +8190,20 @@ Field *create_tmp_field_for_schema(THD *thd, Item *item, TABLE *table) { if (item->field_type() == MYSQL_TYPE_VARCHAR) { + Field *field; if (item->max_length > MAX_FIELD_VARCHARLENGTH / item->collation.collation->mbmaxlen) - return new Field_blob(item->max_length, item->maybe_null, - item->name, table, item->collation.collation); - return new Field_varstring(item->max_length, item->maybe_null, item->name, - table, item->collation.collation); + field= new Field_blob(item->max_length, item->maybe_null, + item->name, item->collation.collation); + else + field= new Field_varstring(item->max_length, item->maybe_null, + item->name, + table->s, item->collation.collation); + if (field) + field->init(table); + return field; } - return item->tmp_table_field_from_field_type(table); + return item->tmp_table_field_from_field_type(table, 0); } @@ -8200,11 +8254,13 @@ Field *create_tmp_field(THD *thd, TABLE *table,Item *item, Item::Type type, item= item->real_item(); type= Item::FIELD_ITEM; } + switch (type) { case Item::SUM_FUNC_ITEM: { Item_sum *item_sum=(Item_sum*) item; - Field *result= item_sum->create_tmp_field(group, table, convert_blob_length); + Field *result= item_sum->create_tmp_field(group, table, + convert_blob_length); if (!result) thd->fatal_error(); return result; @@ -8317,12 +8373,13 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, { MEM_ROOT *mem_root_save, own_root; TABLE *table; + TABLE_SHARE *share; uint i,field_count,null_count,null_pack_length; uint copy_func_count= param->func_count; uint hidden_null_count, hidden_null_pack_length, hidden_field_count; uint blob_count,group_null_items, string_count; uint temp_pool_slot=MY_BIT_NONE; - ulong reclength, string_total_length; + ulong reclength, string_total_length, fieldnr= 0; bool using_unique_constraint= 0; bool use_packed_rows= 0; bool not_all_columns= !(select_options & TMP_TABLE_ALL_COLUMNS); @@ -8345,7 +8402,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, statistic_increment(thd->status_var.created_tmp_tables, &LOCK_status); if (use_temp_pool && !(test_flags & TEST_KEEP_TMP_TABLES)) - temp_pool_slot = bitmap_set_next(&temp_pool); + temp_pool_slot = bitmap_lock_set_next(&temp_pool); if (temp_pool_slot != MY_BIT_NONE) // we got a slot sprintf(path, "%s_%lx_%i", tmp_file_prefix, @@ -8396,6 +8453,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, if (!multi_alloc_root(&own_root, &table, sizeof(*table), + &share, sizeof(*share), ®_field, sizeof(Field*) * (field_count+1), &blob_field, sizeof(uint)*(field_count+1), &from_field, sizeof(Field*)*field_count, @@ -8410,13 +8468,13 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, param->group_length : 0, NullS)) { - bitmap_clear_bit(&temp_pool, temp_pool_slot); + bitmap_lock_clear_bit(&temp_pool, temp_pool_slot); DBUG_RETURN(NULL); /* purecov: inspected */ } /* Copy_field belongs to TMP_TABLE_PARAM, allocate it in THD mem_root */ if (!(param->copy_field= copy= new (thd->mem_root) Copy_field[field_count])) { - bitmap_clear_bit(&temp_pool, temp_pool_slot); + bitmap_lock_clear_bit(&temp_pool, temp_pool_slot); free_root(&own_root, MYF(0)); /* purecov: inspected */ DBUG_RETURN(NULL); /* purecov: inspected */ } @@ -8444,19 +8502,17 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, table->used_keys.init(); table->keys_in_use_for_query.init(); - table->s= &table->share_not_to_be_used; - table->s->blob_field= blob_field; - table->s->table_name= table->s->path= tmpname; - table->s->db= ""; - table->s->blob_ptr_size= mi_portable_sizeof_char_ptr; - table->s->tmp_table= TMP_TABLE; - table->s->db_low_byte_first=1; // True for HEAP and MyISAM - table->s->table_charset= param->table_charset; - table->s->keys_for_keyread.init(); - table->s->keys_in_use.init(); + table->s= share; + init_tmp_table_share(share, "", 0, tmpname, tmpname); + share->blob_field= blob_field; + share->blob_ptr_size= mi_portable_sizeof_char_ptr; + share->db_low_byte_first=1; // True for HEAP and MyISAM + share->table_charset= param->table_charset; + share->primary_key= MAX_KEY; // Indicate no primary key + share->keys_for_keyread.init(); + share->keys_in_use.init(); /* For easier error reporting */ - table->s->table_cache_key= (char*) (table->s->db= ""); - + share->table_cache_key= share->db; /* Calculate which type of fields we will store in the temporary table */ @@ -8531,6 +8587,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, (*argp)->maybe_null=1; } new_field->query_id= thd->query_id; + new_field->fieldnr= ++fieldnr; } } } @@ -8582,6 +8639,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, new_field->flags|= GROUP_FLAG; } new_field->query_id= thd->query_id; + new_field->fieldnr= ++fieldnr; new_field->field_index= (uint) (reg_field - table->field); *(reg_field++) =new_field; } @@ -8598,14 +8656,15 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, DBUG_ASSERT(field_count >= (uint) (reg_field - table->field)); field_count= (uint) (reg_field - table->field); *blob_field= 0; // End marker + share->fields= field_count; /* If result table is small; use a heap */ if (blob_count || using_unique_constraint || (select_options & (OPTION_BIG_TABLES | SELECT_SMALL_RESULT)) == OPTION_BIG_TABLES || (select_options & TMP_TABLE_FORCE_MYISAM)) { - table->file= get_new_handler(table, &table->mem_root, - table->s->db_type= DB_TYPE_MYISAM); + table->file= get_new_handler(share, &table->mem_root, + share->db_type= &myisam_hton); if (group && (param->group_parts > table->file->max_key_parts() || param->group_length > table->file->max_key_length())) @@ -8613,14 +8672,16 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, } else { - table->file= get_new_handler(table, &table->mem_root, - table->s->db_type= DB_TYPE_HEAP); + table->file= get_new_handler(share, &table->mem_root, + share->db_type= &heap_hton); } + if (!table->file) + goto err; if (!using_unique_constraint) reclength+= group_null_items; // null flag is stored separately - table->s->blob_fields= blob_count; + share->blob_fields= blob_count; if (blob_count == 0) { /* We need to ensure that first byte is not 0 for the delete link */ @@ -8642,16 +8703,15 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, string_total_length / string_count >= AVG_STRING_LENGTH_TO_PACK_ROWS)) use_packed_rows= 1; - table->s->fields= field_count; - table->s->reclength= reclength; + share->reclength= reclength; { uint alloc_length=ALIGN_SIZE(reclength+MI_UNIQUE_HASH_LENGTH+1); - table->s->rec_buff_length= alloc_length; + share->rec_buff_length= alloc_length; if (!(table->record[0]= (byte*) alloc_root(&table->mem_root, alloc_length*3))) goto err; table->record[1]= table->record[0]+alloc_length; - table->s->default_values= table->record[1]+alloc_length; + share->default_values= table->record[1]+alloc_length; } copy_func[0]=0; // End marker @@ -8667,8 +8727,8 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, bfill(null_flags,null_pack_length,255); // Set null fields table->null_flags= (uchar*) table->record[0]; - table->s->null_fields= null_count+ hidden_null_count; - table->s->null_bytes= null_pack_length; + share->null_fields= null_count+ hidden_null_count; + share->null_bytes= null_pack_length; } null_count= (blob_count == 0) ? 1 : 0; hidden_field_count=param->hidden_field_count; @@ -8741,13 +8801,13 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, store_record(table,s->default_values); // Make empty default record if (thd->variables.tmp_table_size == ~(ulong) 0) // No limit - table->s->max_rows= ~(ha_rows) 0; + share->max_rows= ~(ha_rows) 0; else - table->s->max_rows= (((table->s->db_type == DB_TYPE_HEAP) ? + share->max_rows= (((share->db_type == &heap_hton) ? min(thd->variables.tmp_table_size, thd->variables.max_heap_table_size) : - thd->variables.tmp_table_size)/ table->s->reclength); - set_if_bigger(table->s->max_rows,1); // For dummy start options + thd->variables.tmp_table_size)/ share->reclength); + set_if_bigger(share->max_rows,1); // For dummy start options keyinfo= param->keyinfo; if (group) @@ -8755,8 +8815,8 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, DBUG_PRINT("info",("Creating group key in temporary table")); table->group=group; /* Table is grouped by key */ param->group_buff=group_buff; - table->s->keys=1; - table->s->uniques= test(using_unique_constraint); + share->keys=1; + share->uniques= test(using_unique_constraint); table->key_info=keyinfo; keyinfo->key_part=key_part_info; keyinfo->flags=HA_NOSAME; @@ -8824,14 +8884,14 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, null_pack_length-=hidden_null_pack_length; keyinfo->key_parts= ((field_count-param->hidden_field_count)+ test(null_pack_length)); - set_if_smaller(table->s->max_rows, rows_limit); + set_if_smaller(share->max_rows, rows_limit); param->end_write_records= rows_limit; table->distinct= 1; - table->s->keys= 1; + share->keys= 1; if (blob_count) { using_unique_constraint=1; - table->s->uniques= 1; + share->uniques= 1; } if (!(key_part_info= (KEY_PART_INFO*) alloc_root(&table->mem_root, @@ -8850,12 +8910,15 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, key_part_info->null_bit=0; key_part_info->offset=hidden_null_pack_length; key_part_info->length=null_pack_length; - key_part_info->field=new Field_string((char*) table->record[0], - (uint32) key_part_info->length, - (uchar*) 0, - (uint) 0, - Field::NONE, - NullS, table, &my_charset_bin); + key_part_info->field= new Field_string((char*) table->record[0], + (uint32) key_part_info->length, + (uchar*) 0, + (uint) 0, + Field::NONE, + NullS, &my_charset_bin); + if (!key_part_info->field) + goto err; + key_part_info->field->init(table); key_part_info->key_type=FIELDFLAG_BINARY; key_part_info->type= HA_KEYTYPE_BINARY; key_part_info++; @@ -8879,8 +8942,8 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, } if (thd->is_fatal_error) // If end of memory goto err; /* purecov: inspected */ - table->s->db_record_offset= 1; - if (table->s->db_type == DB_TYPE_MYISAM) + share->db_record_offset= 1; + if (share->db_type == &myisam_hton) { if (create_myisam_tmp_table(table,param,select_options)) goto err; @@ -8888,6 +8951,8 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, if (open_tmp_table(table)) goto err; + table->file->ha_set_all_bits_in_read_set(); + table->file->ha_set_all_bits_in_write_set(); thd->mem_root= mem_root_save; DBUG_RETURN(table); @@ -8895,7 +8960,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, err: thd->mem_root= mem_root_save; free_tmp_table(thd,table); /* purecov: inspected */ - bitmap_clear_bit(&temp_pool, temp_pool_slot); + bitmap_lock_clear_bit(&temp_pool, temp_pool_slot); DBUG_RETURN(NULL); /* purecov: inspected */ } @@ -8912,7 +8977,7 @@ err: field_list list of column definitions DESCRIPTION - The created table doesn't have a table handler assotiated with + The created table doesn't have a table handler associated with it, has no keys, no group/distinct, no copy_funcs array. The sole purpose of this TABLE object is to use the power of Field class to read/write data to/from table->record[0]. Then one can store @@ -8933,67 +8998,68 @@ TABLE *create_virtual_tmp_table(THD *thd, List<create_field> &field_list) uint record_length= 0; uint null_count= 0; /* number of columns which may be null */ uint null_pack_length; /* NULL representation array length */ - TABLE_SHARE *s; + TABLE_SHARE *share; /* Create the table and list of all fields */ - TABLE *table= (TABLE*) thd->calloc(sizeof(*table)); + TABLE *table= (TABLE*) thd->calloc(sizeof(*table)+sizeof(*share)); field= (Field**) thd->alloc((field_count + 1) * sizeof(Field*)); if (!table || !field) return 0; table->field= field; - table->s= s= &table->share_not_to_be_used; - s->fields= field_count; + table->s= share= (TABLE_SHARE*) (table+1); + share->fields= field_count; - if (!(s->blob_field= (uint*)thd->alloc((field_list.elements + 1) * - sizeof(uint)))) + if (!(share->blob_field= (uint*)thd->alloc((field_list.elements + 1) * + sizeof(uint)))) return 0; - s->blob_ptr_size= mi_portable_sizeof_char_ptr; + share->blob_ptr_size= mi_portable_sizeof_char_ptr; /* Create all fields and calculate the total length of record */ List_iterator_fast<create_field> it(field_list); while ((cdef= it++)) { - *field= make_field(0, cdef->length, + *field= make_field(share, 0, cdef->length, (uchar*) (f_maybe_null(cdef->pack_flag) ? "" : 0), f_maybe_null(cdef->pack_flag) ? 1 : 0, cdef->pack_flag, cdef->sql_type, cdef->charset, cdef->geom_type, cdef->unireg_check, - cdef->interval, cdef->field_name, table); + cdef->interval, cdef->field_name); if (!*field) goto error; - record_length+= (**field).pack_length(); - if (! ((**field).flags & NOT_NULL_FLAG)) - ++null_count; + (*field)->init(table); + record_length+= (*field)->pack_length(); + if (! ((*field)->flags & NOT_NULL_FLAG)) + null_count++; if ((*field)->flags & BLOB_FLAG) - s->blob_field[blob_count++]= (uint) (field - table->field); + share->blob_field[blob_count++]= (uint) (field - table->field); - ++field; + field++; } *field= NULL; /* mark the end of the list */ - s->blob_field[blob_count]= 0; /* mark the end of the list */ - s->blob_fields= blob_count; + share->blob_field[blob_count]= 0; /* mark the end of the list */ + share->blob_fields= blob_count; null_pack_length= (null_count + 7)/8; - s->reclength= record_length + null_pack_length; - s->rec_buff_length= ALIGN_SIZE(s->reclength + 1); - table->record[0]= (byte*) thd->alloc(s->rec_buff_length); + share->reclength= record_length + null_pack_length; + share->rec_buff_length= ALIGN_SIZE(share->reclength + 1); + table->record[0]= (byte*) thd->alloc(share->rec_buff_length); if (!table->record[0]) goto error; if (null_pack_length) { table->null_flags= (uchar*) table->record[0]; - s->null_fields= null_count; - s->null_bytes= null_pack_length; + share->null_fields= null_count; + share->null_bytes= null_pack_length; } table->in_use= thd; /* field->reset() may access table->in_use */ { /* Set up field pointers */ byte *null_pos= table->record[0]; - byte *field_pos= null_pos + s->null_bytes; + byte *field_pos= null_pos + share->null_bytes; uint null_bit= 1; for (field= table->field; *field; ++field) @@ -9027,7 +9093,7 @@ error: static bool open_tmp_table(TABLE *table) { int error; - if ((error=table->file->ha_open(table->s->table_name,O_RDWR, + if ((error=table->file->ha_open(table, table->s->table_name.str,O_RDWR, HA_OPEN_TMP_TABLE))) { table->file->print_error(error,MYF(0)); /* purecov: inspected */ @@ -9046,9 +9112,10 @@ static bool create_myisam_tmp_table(TABLE *table,TMP_TABLE_PARAM *param, MI_KEYDEF keydef; MI_UNIQUEDEF uniquedef; KEY *keyinfo=param->keyinfo; + TABLE_SHARE *share= table->s; DBUG_ENTER("create_myisam_tmp_table"); - if (table->s->keys) + if (share->keys) { // Get keys for ni_create bool using_unique_constraint=0; HA_KEYSEG *seg= (HA_KEYSEG*) alloc_root(&table->mem_root, @@ -9059,11 +9126,11 @@ static bool create_myisam_tmp_table(TABLE *table,TMP_TABLE_PARAM *param, bzero(seg, sizeof(*seg) * keyinfo->key_parts); if (keyinfo->key_length >= table->file->max_key_length() || keyinfo->key_parts > table->file->max_key_parts() || - table->s->uniques) + share->uniques) { /* Can't create a key; Make a unique constraint instead of a key */ - table->s->keys= 0; - table->s->uniques= 1; + share->keys= 0; + share->uniques= 1; using_unique_constraint=1; bzero((char*) &uniquedef,sizeof(uniquedef)); uniquedef.keysegs=keyinfo->key_parts; @@ -9075,7 +9142,7 @@ static bool create_myisam_tmp_table(TABLE *table,TMP_TABLE_PARAM *param, param->recinfo->type= FIELD_CHECK; param->recinfo->length=MI_UNIQUE_HASH_LENGTH; param->recinfo++; - table->s->reclength+=MI_UNIQUE_HASH_LENGTH; + share->reclength+=MI_UNIQUE_HASH_LENGTH; } else { @@ -9097,7 +9164,7 @@ static bool create_myisam_tmp_table(TABLE *table,TMP_TABLE_PARAM *param, seg->type= ((keyinfo->key_part[i].key_type & FIELDFLAG_BINARY) ? HA_KEYTYPE_VARBINARY2 : HA_KEYTYPE_VARTEXT2); - seg->bit_start= (uint8)(field->pack_length() - table->s->blob_ptr_size); + seg->bit_start= (uint8)(field->pack_length() - share->blob_ptr_size); seg->flag= HA_BLOB_PART; seg->length=0; // Whole blob in unique constraint } @@ -9130,10 +9197,10 @@ static bool create_myisam_tmp_table(TABLE *table,TMP_TABLE_PARAM *param, OPTION_BIG_TABLES) create_info.data_file_length= ~(ulonglong) 0; - if ((error=mi_create(table->s->table_name,table->s->keys,&keydef, + if ((error=mi_create(share->table_name.str, share->keys, &keydef, (uint) (param->recinfo-param->start_recinfo), param->start_recinfo, - table->s->uniques, &uniquedef, + share->uniques, &uniquedef, &create_info, HA_CREATE_TMP_TABLE))) { @@ -9143,7 +9210,7 @@ static bool create_myisam_tmp_table(TABLE *table,TMP_TABLE_PARAM *param, } statistic_increment(table->in_use->status_var.created_tmp_disk_tables, &LOCK_status); - table->s->db_record_offset= 1; + share->db_record_offset= 1; DBUG_RETURN(0); err: DBUG_RETURN(1); @@ -9164,17 +9231,9 @@ free_tmp_table(THD *thd, TABLE *entry) if (entry->file) { if (entry->db_stat) - { - (void) entry->file->close(); - } - /* - We can't call ha_delete_table here as the table may created in mixed case - here and we have to ensure that delete_table gets the table name in - the original case. - */ - if (!(test_flags & TEST_KEEP_TMP_TABLES) || - entry->s->db_type == DB_TYPE_HEAP) - entry->file->delete_table(entry->s->table_name); + entry->file->drop_table(entry->s->table_name.str); + else + entry->file->delete_table(entry->s->table_name.str); delete entry->file; } @@ -9183,7 +9242,7 @@ free_tmp_table(THD *thd, TABLE *entry) (*ptr)->free(); free_io_cache(entry); - bitmap_clear_bit(&temp_pool, entry->temp_pool_slot); + bitmap_lock_clear_bit(&temp_pool, entry->temp_pool_slot); free_root(&own_root, MYF(0)); /* the table is allocated in its own root */ thd->proc_info=save_proc_info; @@ -9199,26 +9258,29 @@ bool create_myisam_from_heap(THD *thd, TABLE *table, TMP_TABLE_PARAM *param, int error, bool ignore_last_dupp_key_error) { TABLE new_table; + TABLE_SHARE share; const char *save_proc_info; int write_err; DBUG_ENTER("create_myisam_from_heap"); - if (table->s->db_type != DB_TYPE_HEAP || error != HA_ERR_RECORD_FILE_FULL) + if (table->s->db_type != &heap_hton || + error != HA_ERR_RECORD_FILE_FULL) { table->file->print_error(error,MYF(0)); DBUG_RETURN(1); } new_table= *table; - new_table.s= &new_table.share_not_to_be_used; - new_table.s->db_type= DB_TYPE_MYISAM; - if (!(new_table.file= get_new_handler(&new_table, &new_table.mem_root, - DB_TYPE_MYISAM))) + share= *table->s; + new_table.s= &share; + new_table.s->db_type= &myisam_hton; + if (!(new_table.file= get_new_handler(&share, &new_table.mem_root, + &myisam_hton))) DBUG_RETURN(1); // End of memory save_proc_info=thd->proc_info; thd->proc_info="converting HEAP to MyISAM"; - if (create_myisam_tmp_table(&new_table,param, + if (create_myisam_tmp_table(&new_table, param, thd->lex->select_lex.options | thd->options)) goto err2; if (open_tmp_table(&new_table)) @@ -9245,14 +9307,19 @@ bool create_myisam_from_heap(THD *thd, TABLE *table, TMP_TABLE_PARAM *param, new_table.file->extra(HA_EXTRA_WRITE_CACHE); #endif - /* copy all old rows */ + /* + copy all old rows from heap table to MyISAM table + This is the only code that uses record[1] to read/write but this + is safe as this is a temporary MyISAM table without timestamp/autoincrement + or partitioning. + */ while (!table->file->rnd_next(new_table.record[1])) { - if ((write_err=new_table.file->write_row(new_table.record[1]))) + if ((write_err=new_table.file->ha_write_row(new_table.record[1]))) goto err; } /* copy row that filled HEAP table */ - if ((write_err=new_table.file->write_row(table->record[0]))) + if ((write_err=new_table.file->ha_write_row(table->record[0]))) { if (write_err != HA_ERR_FOUND_DUPP_KEY && write_err != HA_ERR_FOUND_DUPP_UNIQUE || !ignore_last_dupp_key_error) @@ -9262,12 +9329,13 @@ bool create_myisam_from_heap(THD *thd, TABLE *table, TMP_TABLE_PARAM *param, /* remove heap table and change to use myisam table */ (void) table->file->ha_rnd_end(); (void) table->file->close(); - (void) table->file->delete_table(table->s->table_name); + (void) table->file->delete_table(table->s->table_name.str); delete table->file; table->file=0; + new_table.s= table->s; // Keep old share *table= new_table; - table->s= &table->share_not_to_be_used; - table->file->change_table_ptr(table); + *table->s= share; + table->file->change_table_ptr(table, table->s); if (save_proc_info) thd->proc_info= (!strcmp(save_proc_info,"Copying to tmp table") ? "Copying to tmp table on disk" : save_proc_info); @@ -9279,9 +9347,9 @@ bool create_myisam_from_heap(THD *thd, TABLE *table, TMP_TABLE_PARAM *param, (void) table->file->ha_rnd_end(); (void) new_table.file->close(); err1: - new_table.file->delete_table(new_table.s->table_name); - delete new_table.file; + new_table.file->delete_table(new_table.s->table_name.str); err2: + delete new_table.file; thd->proc_info=save_proc_info; DBUG_RETURN(1); } @@ -9386,7 +9454,7 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure) empty_record(table); if (table->group && join->tmp_table_param.sum_func_count && table->s->keys && !table->file->inited) - table->file->ha_index_init(0); + table->file->ha_index_init(0, 0); } /* Set up select_end */ join->join_tab[join->tables-1].next_select= setup_end_select_func(join); @@ -9527,7 +9595,7 @@ sub_select_cache(JOIN *join,JOIN_TAB *join_tab,bool end_of_records) given the selected plan prescribes to nest retrievals of the joined tables in the following order: t1,t2,t3. A pushed down predicate are attached to the table which it pushed to, - at the field select_cond. + at the field join_tab->select_cond. When executing a nested loop of level k the function runs through the rows of 'join_tab' and for each row checks the pushed condition attached to the table. @@ -9566,7 +9634,7 @@ sub_select_cache(JOIN *join,JOIN_TAB *join_tab,bool end_of_records) is complemented by nulls for t2 and t3. Then the pushed down predicates are checked for the composed row almost in the same way as it had been done for the first row with a match. The only difference is - the predicates from on expressions are not checked. + the predicates from on expressions are not checked. IMPLEMENTATION The function forms output rows for a current partial join of k @@ -9575,7 +9643,7 @@ sub_select_cache(JOIN *join,JOIN_TAB *join_tab,bool end_of_records) join_tab it calls sub_select that builds all possible matching tails from the result set. To be able check predicates conditionally items of the class - Item_func_trig_cond are employed. + Item_func_trig_cond are employed. An object of this class is constructed from an item of class COND and a pointer to a guarding boolean variable. When the value of the guard variable is true the value of the object @@ -10089,7 +10157,7 @@ join_read_const(JOIN_TAB *tab) table->status= STATUS_NOT_FOUND; mark_as_null_row(tab->table); empty_record(table); - if (error != HA_ERR_KEY_NOT_FOUND) + if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) return report_error(table, error); return -1; } @@ -10112,7 +10180,9 @@ join_read_key(JOIN_TAB *tab) TABLE *table= tab->table; if (!table->file->inited) - table->file->ha_index_init(tab->ref.key); + { + table->file->ha_index_init(tab->ref.key, tab->sorted); + } if (cmp_buffer_with_ref(tab) || (table->status & (STATUS_GARBAGE | STATUS_NO_PARENT | STATUS_NULL_ROW))) { @@ -10124,7 +10194,7 @@ join_read_key(JOIN_TAB *tab) error=table->file->index_read(table->record[0], tab->ref.key_buff, tab->ref.key_length,HA_READ_KEY_EXACT); - if (error && error != HA_ERR_KEY_NOT_FOUND) + if (error && error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) return report_error(table, error); } table->null_row=0; @@ -10144,14 +10214,16 @@ join_read_always_key(JOIN_TAB *tab) return -1; } if (!table->file->inited) - table->file->ha_index_init(tab->ref.key); + { + table->file->ha_index_init(tab->ref.key, tab->sorted); + } if (cp_buffer_from_ref(tab->join->thd, &tab->ref)) return -1; if ((error=table->file->index_read(table->record[0], tab->ref.key_buff, tab->ref.key_length,HA_READ_KEY_EXACT))) { - if (error != HA_ERR_KEY_NOT_FOUND) + if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) return report_error(table, error); return -1; /* purecov: inspected */ } @@ -10171,14 +10243,14 @@ join_read_last_key(JOIN_TAB *tab) TABLE *table= tab->table; if (!table->file->inited) - table->file->ha_index_init(tab->ref.key); + table->file->ha_index_init(tab->ref.key, tab->sorted); if (cp_buffer_from_ref(tab->join->thd, &tab->ref)) return -1; if ((error=table->file->index_read_last(table->record[0], tab->ref.key_buff, tab->ref.key_length))) { - if (error != HA_ERR_KEY_NOT_FOUND) + if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) return report_error(table, error); return -1; /* purecov: inspected */ } @@ -10281,7 +10353,7 @@ join_read_first(JOIN_TAB *tab) tab->read_record.index=tab->index; tab->read_record.record=table->record[0]; if (!table->file->inited) - table->file->ha_index_init(tab->index); + table->file->ha_index_init(tab->index, tab->sorted); if ((error=tab->table->file->index_first(tab->table->record[0]))) { if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) @@ -10320,7 +10392,7 @@ join_read_last(JOIN_TAB *tab) tab->read_record.index=tab->index; tab->read_record.record=table->record[0]; if (!table->file->inited) - table->file->ha_index_init(tab->index); + table->file->ha_index_init(tab->index, 1); if ((error= tab->table->file->index_last(tab->table->record[0]))) return report_error(table, error); return 0; @@ -10344,7 +10416,7 @@ join_ft_read_first(JOIN_TAB *tab) TABLE *table= tab->table; if (!table->file->inited) - table->file->ha_index_init(tab->ref.key); + table->file->ha_index_init(tab->ref.key, 1); #if NOT_USED_YET if (cp_buffer_from_ref(tab->join->thd, &tab->ref)) // as ft-key doesn't use store_key's return -1; // see also FT_SELECT::init() @@ -10646,7 +10718,7 @@ end_write(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), { int error; join->found_records++; - if ((error=table->file->write_row(table->record[0]))) + if ((error=table->file->ha_write_row(table->record[0]))) { if (error == HA_ERR_FOUND_DUPP_KEY || error == HA_ERR_FOUND_DUPP_UNIQUE) @@ -10708,8 +10780,8 @@ end_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), { /* Update old record */ restore_record(table,record[1]); update_tmptable_sum_func(join->sum_funcs,table); - if ((error=table->file->update_row(table->record[1], - table->record[0]))) + if ((error=table->file->ha_update_row(table->record[1], + table->record[0]))) { table->file->print_error(error,MYF(0)); /* purecov: inspected */ DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */ @@ -10732,13 +10804,13 @@ end_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), } init_tmptable_sum_functions(join->sum_funcs); copy_funcs(join->tmp_table_param.items_to_copy); - if ((error=table->file->write_row(table->record[0]))) + if ((error=table->file->ha_write_row(table->record[0]))) { if (create_myisam_from_heap(join->thd, table, &join->tmp_table_param, error, 0)) DBUG_RETURN(NESTED_LOOP_ERROR); // Not a table_is_full error /* Change method to update rows */ - table->file->ha_index_init(0); + table->file->ha_index_init(0, 0); join->join_tab[join->tables-1].next_select=end_unique_update; } join->send_records++; @@ -10768,7 +10840,7 @@ end_unique_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), copy_fields(&join->tmp_table_param); // Groups are copied twice. copy_funcs(join->tmp_table_param.items_to_copy); - if (!(error=table->file->write_row(table->record[0]))) + if (!(error=table->file->ha_write_row(table->record[0]))) join->send_records++; // New group else { @@ -10784,8 +10856,8 @@ end_unique_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), } restore_record(table,record[1]); update_tmptable_sum_func(join->sum_funcs,table); - if ((error=table->file->update_row(table->record[1], - table->record[0]))) + if ((error=table->file->ha_update_row(table->record[1], + table->record[0]))) { table->file->print_error(error,MYF(0)); /* purecov: inspected */ DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */ @@ -10828,7 +10900,7 @@ end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), join->sum_funcs_end[send_group_parts]); if (!join->having || join->having->val_int()) { - int error= table->file->write_row(table->record[0]); + int error= table->file->ha_write_row(table->record[0]); if (error && create_myisam_from_heap(join->thd, table, &join->tmp_table_param, error, 0)) @@ -11641,7 +11713,7 @@ remove_duplicates(JOIN *join, TABLE *entry,List<Item> &fields, Item *having) free_io_cache(entry); // Safety entry->file->info(HA_STATUS_VARIABLE); - if (entry->s->db_type == DB_TYPE_HEAP || + if (entry->s->db_type == &heap_hton || (!entry->s->blob_fields && ((ALIGN_SIZE(reclength) + HASH_OVERHEAD) * entry->file->records < thd->variables.sortbuff_size))) @@ -11690,7 +11762,7 @@ static int remove_dup_with_compare(THD *thd, TABLE *table, Field **first_field, } if (having && !having->val_int()) { - if ((error=file->delete_row(record))) + if ((error=file->ha_delete_row(record))) goto err; error=file->rnd_next(record); continue; @@ -11717,7 +11789,7 @@ static int remove_dup_with_compare(THD *thd, TABLE *table, Field **first_field, } if (compare_record(table, first_field) == 0) { - if ((error=file->delete_row(record))) + if ((error=file->ha_delete_row(record))) goto err; } else if (!found) @@ -11814,7 +11886,7 @@ static int remove_dup_with_hash_index(THD *thd, TABLE *table, } if (having && !having->val_int()) { - if ((error=file->delete_row(record))) + if ((error=file->ha_delete_row(record))) goto err; continue; } @@ -11831,7 +11903,7 @@ static int remove_dup_with_hash_index(THD *thd, TABLE *table, if (hash_search(&hash, org_key_pos, key_length)) { /* Duplicated found ; Remove the row */ - if ((error=file->delete_row(record))) + if ((error=file->ha_delete_row(record))) goto err; } else @@ -13657,7 +13729,7 @@ int JOIN::rollup_write_data(uint idx, TABLE *table) item->save_in_result_field(1); } copy_sum_funcs(sum_funcs_end[i+1], sum_funcs_end[i]); - if ((error= table->file->write_row(table->record[0]))) + if ((error= table->file->ha_write_row(table->record[0]))) { if (create_myisam_from_heap(thd, table, &tmp_table_param, error, 0)) @@ -13723,6 +13795,9 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, strlen(join->select_lex->type), cs)); for (uint i=0 ; i < 7; i++) item_list.push_back(item_null); + if (join->thd->lex->describe & DESCRIBE_PARTITIONS) + item_list.push_back(item_null); + item_list.push_back(new Item_string(message,strlen(message),cs)); if (result->send_data(item_list)) join->error= 1; @@ -13843,7 +13918,28 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, item_list.push_back(new Item_string(table->alias, strlen(table->alias), cs)); - /* type */ + /* "partitions" column */ + if (join->thd->lex->describe & DESCRIBE_PARTITIONS) + { +#ifdef WITH_PARTITION_STORAGE_ENGINE + partition_info *part_info; + if (!table->derived_select_number && + (part_info= table->part_info)) + { + char parts_buff[128]; + String parts_str(parts_buff,sizeof(parts_buff),cs); + make_used_partitions_str(part_info, &parts_str); + item_list.push_back(new Item_string(parts_str.ptr(), + parts_str.length(), cs)); + } + else + item_list.push_back(item_null); +#else + /* just produce empty column if partitioning is not compiled in */ + item_list.push_back(item_null); +#endif + } + /* "type" column */ item_list.push_back(new Item_string(join_type_str[tab->type], strlen(join_type_str[tab->type]), cs)); diff --git a/sql/sql_select.h b/sql/sql_select.h index 4aa238641e5..523182d96cd 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -133,6 +133,7 @@ typedef struct st_join_table { uint used_fields,used_fieldlength,used_blobs; enum join_type type; bool cached_eq_ref_table,eq_ref_table,not_used_in_distinct; + bool sorted; TABLE_REF ref; JOIN_CACHE cache; JOIN *join; @@ -444,10 +445,11 @@ class store_key :public Sql_alloc { if (field_arg->type() == FIELD_TYPE_BLOB) { - /* Key segments are always packed with a 2 byte length prefix */ - to_field=new Field_varstring(ptr, length, 2, (uchar*) null, 1, - Field::NONE, field_arg->field_name, - field_arg->table, field_arg->charset()); + /* Key segments are always packed with a 2 byte length prefix */ + to_field= new Field_varstring(ptr, length, 2, (uchar*) null, 1, + Field::NONE, field_arg->field_name, + field_arg->table->s, field_arg->charset()); + to_field->init(field_arg->table); } else to_field=field_arg->new_key_field(thd->mem_root, field_arg->table, diff --git a/sql/sql_show.cc b/sql/sql_show.cc index 089314078a6..cd0df5184cd 100644 --- a/sql/sql_show.cc +++ b/sql/sql_show.cc @@ -19,15 +19,14 @@ #include "mysql_priv.h" #include "sql_select.h" // For select_describe +#include "sql_show.h" #include "repl_failsafe.h" #include "sp.h" #include "sp_head.h" #include "sql_trigger.h" +#include "authors.h" #include <my_dir.h> -#ifdef HAVE_BERKELEY_DB -#include "ha_berkeley.h" // For berkeley_show_logs -#endif static const char *grant_names[]={ "select","insert","update","delete","create","drop","reload","shutdown", @@ -39,10 +38,6 @@ static TYPELIB grant_types = { sizeof(grant_names)/sizeof(char **), grant_names, NULL}; #endif -static int -store_create_info(THD *thd, TABLE_LIST *table_list, String *packet); -static int -view_store_create_info(THD *thd, TABLE_LIST *table, String *buff); static bool schema_table_store_record(THD *thd, TABLE *table); @@ -50,6 +45,32 @@ static bool schema_table_store_record(THD *thd, TABLE *table); ** List all table types supported ***************************************************************************/ +static my_bool show_handlerton(THD *thd, st_plugin_int *plugin, + void *arg) +{ + handlerton *default_type= (handlerton *) arg; + Protocol *protocol= thd->protocol; + handlerton *hton= (handlerton *) plugin->plugin->info; + + if (!(hton->flags & HTON_HIDDEN)) + { + protocol->prepare_for_resend(); + protocol->store(hton->name, system_charset_info); + const char *option_name= show_comp_option_name[(int) hton->state]; + + if (hton->state == SHOW_OPTION_YES && default_type == hton) + option_name= "DEFAULT"; + protocol->store(option_name, system_charset_info); + protocol->store(hton->comment, system_charset_info); + protocol->store(hton->commit ? "YES" : "NO", system_charset_info); + protocol->store(hton->prepare ? "YES" : "NO", system_charset_info); + protocol->store(hton->savepoint_set ? "YES" : "NO", system_charset_info); + + return protocol->write() ? 1 : 0; + } + return 0; +} + bool mysqld_show_storage_engines(THD *thd) { List<Item> field_list; @@ -59,37 +80,151 @@ bool mysqld_show_storage_engines(THD *thd) field_list.push_back(new Item_empty_string("Engine",10)); field_list.push_back(new Item_empty_string("Support",10)); field_list.push_back(new Item_empty_string("Comment",80)); + field_list.push_back(new Item_empty_string("Transactions",3)); + field_list.push_back(new Item_empty_string("XA",3)); + field_list.push_back(new Item_empty_string("Savepoints",3)); if (protocol->send_fields(&field_list, Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF)) DBUG_RETURN(TRUE); - const char *default_type_name= - ha_get_storage_engine((enum db_type)thd->variables.table_type); + if (plugin_foreach(thd, show_handlerton, + MYSQL_STORAGE_ENGINE_PLUGIN, thd->variables.table_type)) + DBUG_RETURN(TRUE); - handlerton **types; - for (types= sys_table_types; *types; types++) + send_eof(thd); + DBUG_RETURN(FALSE); +} + +static int make_version_string(char *buf, int buf_length, uint version) +{ + return my_snprintf(buf, buf_length, "%d.%d", version>>8,version&0xff); +} + +static my_bool show_plugins(THD *thd, st_plugin_int *plugin, + void *arg) +{ + TABLE *table= (TABLE*) arg; + struct st_mysql_plugin *plug= plugin->plugin; + Protocol *protocol= thd->protocol; + CHARSET_INFO *cs= system_charset_info; + char version_buf[20]; + + restore_record(table, s->default_values); + + table->field[0]->store(plugin->name.str, plugin->name.length, cs); + + table->field[1]->store(version_buf, + make_version_string(version_buf, sizeof(version_buf), plug->version), + cs); + + + switch (plugin->state) { - if (!((*types)->flags & HTON_HIDDEN)) - { - protocol->prepare_for_resend(); - protocol->store((*types)->name, system_charset_info); - const char *option_name= show_comp_option_name[(int) (*types)->state]; - - if ((*types)->state == SHOW_OPTION_YES && - !my_strcasecmp(system_charset_info, default_type_name, (*types)->name)) - option_name= "DEFAULT"; - protocol->store(option_name, system_charset_info); - protocol->store((*types)->comment, system_charset_info); - if (protocol->write()) - DBUG_RETURN(TRUE); - } + /* case PLUGIN_IS_FREED: does not happen */ + case PLUGIN_IS_DELETED: + table->field[2]->store(STRING_WITH_LEN("DELETED"), cs); + break; + case PLUGIN_IS_UNINITIALIZED: + table->field[2]->store(STRING_WITH_LEN("INACTIVE"), cs); + break; + case PLUGIN_IS_READY: + table->field[2]->store(STRING_WITH_LEN("ACTIVE"), cs); + break; + default: + DBUG_ASSERT(0); + } + + table->field[3]->store(plugin_type_names[plug->type].str, + plugin_type_names[plug->type].length, + cs); + table->field[4]->store(version_buf, + make_version_string(version_buf, sizeof(version_buf), + *(uint *)plug->info), cs); + + if (plugin->plugin_dl) + { + table->field[5]->store(plugin->plugin_dl->dl.str, + plugin->plugin_dl->dl.length, cs); + table->field[5]->set_notnull(); + table->field[6]->store(version_buf, + make_version_string(version_buf, sizeof(version_buf), + plugin->plugin_dl->version), + cs); + table->field[6]->set_notnull(); + } + else + { + table->field[5]->set_null(); + table->field[6]->set_null(); + } + + + if (plug->author) + { + table->field[7]->store(plug->author, strlen(plug->author), cs); + table->field[7]->set_notnull(); + } + else + table->field[7]->set_null(); + + if (plug->descr) + { + table->field[8]->store(plug->descr, strlen(plug->descr), cs); + table->field[8]->set_notnull(); + } + else + table->field[8]->set_null(); + + return schema_table_store_record(thd, table); +} + + +int fill_plugins(THD *thd, TABLE_LIST *tables, COND *cond) +{ + DBUG_ENTER("fill_plugins"); + TABLE *table= tables->table; + + if (plugin_foreach(thd, show_plugins, MYSQL_ANY_PLUGIN, table)) + DBUG_RETURN(1); + + DBUG_RETURN(0); +} + + +/*************************************************************************** +** List all Authors. +** If you can update it, you get to be in it :) +***************************************************************************/ + +bool mysqld_show_authors(THD *thd) +{ + List<Item> field_list; + Protocol *protocol= thd->protocol; + DBUG_ENTER("mysqld_show_authors"); + + field_list.push_back(new Item_empty_string("Name",40)); + field_list.push_back(new Item_empty_string("Location",40)); + field_list.push_back(new Item_empty_string("Comment",80)); + + if (protocol->send_fields(&field_list, + Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF)) + DBUG_RETURN(TRUE); + + show_table_authors_st *authors; + for (authors= show_table_authors; authors->name; authors++) + { + protocol->prepare_for_resend(); + protocol->store(authors->name, system_charset_info); + protocol->store(authors->location, system_charset_info); + protocol->store(authors->comment, system_charset_info); + if (protocol->write()) + DBUG_RETURN(TRUE); } send_eof(thd); DBUG_RETURN(FALSE); } - /*************************************************************************** List all privileges supported ***************************************************************************/ @@ -278,9 +413,14 @@ mysql_find_files(THD *thd,List<char> *files, const char *db,const char *path, for (i=0 ; i < (uint) dirp->number_off_files ; i++) { + char uname[NAME_LEN*3+1]; /* Unencoded name */ file=dirp->dir_entry+i; if (dir) { /* Return databases */ + if ((file->name[0] == '.' && + ((file->name[1] == '.' && file->name[2] == '\0') || + file->name[1] == '\0'))) + continue; /* . or .. */ #ifdef USE_SYMDIR char *ext; char buff[FN_REFLEN]; @@ -297,17 +437,22 @@ mysql_find_files(THD *thd,List<char> *files, const char *db,const char *path, continue; } #endif - if (file->name[0] == '.' || !MY_S_ISDIR(file->mystat->st_mode) || - (wild && wild_compare(file->name,wild,0))) - continue; + if (!MY_S_ISDIR(file->mystat->st_mode)) + continue; + VOID(filename_to_tablename(file->name, uname, sizeof(uname))); + if (wild && wild_compare(uname, wild, 0)) + continue; + file->name= uname; } else { // Return only .frm files which aren't temp files. - if (my_strcasecmp(system_charset_info, ext=fn_ext(file->name),reg_ext) || + if (my_strcasecmp(system_charset_info, ext=fn_rext(file->name),reg_ext) || is_prefix(file->name,tmp_file_prefix)) continue; *ext=0; + VOID(filename_to_tablename(file->name, uname, sizeof(uname))); + file->name= uname; if (wild) { if (lower_case_table_names) @@ -389,7 +534,7 @@ mysqld_show_create(THD *thd, TABLE_LIST *table_list) buffer.length(0); if ((table_list->view ? view_store_create_info(thd, table_list, &buffer) : - store_create_info(thd, table_list, &buffer))) + store_create_info(thd, table_list, &buffer, NULL))) DBUG_RETURN(TRUE); List<Item> field_list; @@ -446,12 +591,6 @@ bool mysqld_show_create_db(THD *thd, char *dbname, Protocol *protocol=thd->protocol; DBUG_ENTER("mysql_show_create_db"); - if (check_db_name(dbname)) - { - my_error(ER_WRONG_DB_NAME, MYF(0), dbname); - DBUG_RETURN(TRUE); - } - #ifndef NO_EMBEDDED_ACCESS_CHECKS if (test_all_bits(sctx->master_access, DB_ACLS)) db_access=DB_ACLS; @@ -475,8 +614,7 @@ bool mysqld_show_create_db(THD *thd, char *dbname, } else { - (void) sprintf(path,"%s/%s",mysql_data_home, dbname); - length=unpack_dirname(path,path); // Convert if not unix + length= build_table_filename(path, sizeof(path), dbname, "", ""); found_libchar= 0; if (length && path[length-1] == FN_LIBCHAR) { @@ -529,29 +667,6 @@ bool mysqld_show_create_db(THD *thd, char *dbname, DBUG_RETURN(FALSE); } -bool -mysqld_show_logs(THD *thd) -{ - List<Item> field_list; - Protocol *protocol= thd->protocol; - DBUG_ENTER("mysqld_show_logs"); - - field_list.push_back(new Item_empty_string("File",FN_REFLEN)); - field_list.push_back(new Item_empty_string("Type",10)); - field_list.push_back(new Item_empty_string("Status",10)); - - if (protocol->send_fields(&field_list, - Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF)) - DBUG_RETURN(TRUE); - -#ifdef HAVE_BERKELEY_DB - if ((have_berkeley_db == SHOW_OPTION_YES) && berkeley_show_logs(protocol)) - DBUG_RETURN(TRUE); -#endif - - send_eof(thd); - DBUG_RETURN(FALSE); -} /**************************************************************************** @@ -594,10 +709,10 @@ mysqld_dump_create_info(THD *thd, TABLE_LIST *table_list, int fd) Protocol *protocol= thd->protocol; String *packet= protocol->storage_packet(); DBUG_ENTER("mysqld_dump_create_info"); - DBUG_PRINT("enter",("table: %s",table_list->table->s->table_name)); + DBUG_PRINT("enter",("table: %s",table_list->table->s->table_name.str)); protocol->prepare_for_resend(); - if (store_create_info(thd, table_list, packet)) + if (store_create_info(thd, table_list, packet, NULL)) DBUG_RETURN(-1); if (fd < 0) @@ -750,11 +865,34 @@ static void append_directory(THD *thd, String *packet, const char *dir_type, #define LIST_PROCESS_HOST_LEN 64 -static int -store_create_info(THD *thd, TABLE_LIST *table_list, String *packet) +/* + Build a CREATE TABLE statement for a table. + + SYNOPSIS + store_create_info() + thd The thread + table_list A list containing one table to write statement + for. + packet Pointer to a string where statement will be + written. + create_info_arg Pointer to create information that can be used + to tailor the format of the statement. Can be + NULL, in which case only SQL_MODE is considered + when building the statement. + + NOTE + Currently always return 0, but might return error code in the + future. + + RETURN + 0 OK + */ +int +store_create_info(THD *thd, TABLE_LIST *table_list, String *packet, + HA_CREATE_INFO *create_info_arg) { List<Item> field_list; - char tmp[MAX_FIELD_WIDTH], *for_str, buff[128], *end; + char tmp[MAX_FIELD_WIDTH], *for_str, buff[128], *end, uname[NAME_LEN*3+1]; const char *alias; String type(tmp, sizeof(tmp), system_charset_info); Field **ptr,*field; @@ -774,7 +912,7 @@ store_create_info(THD *thd, TABLE_LIST *table_list, String *packet) (MODE_NO_FIELD_OPTIONS | MODE_MYSQL323 | MODE_MYSQL40)) != 0; DBUG_ENTER("store_create_info"); - DBUG_PRINT("enter",("table: %s", table->s->table_name)); + DBUG_PRINT("enter",("table: %s", table->s->table_name.str)); restore_record(table, s->default_values); // Get empty record @@ -785,8 +923,14 @@ store_create_info(THD *thd, TABLE_LIST *table_list, String *packet) if (table_list->schema_table) alias= table_list->schema_table->table_name; else - alias= (lower_case_table_names == 2 ? table->alias : - share->table_name); + { + if (lower_case_table_names == 2) + alias= table->alias; + else + { + alias= share->table_name.str; + } + } append_identifier(thd, packet, alias, strlen(alias)); packet->append(STRING_WITH_LEN(" (\n")); @@ -964,6 +1108,12 @@ store_create_info(THD *thd, TABLE_LIST *table_list, String *packet) } } packet->append(')'); + if (key_info->parser) + { + packet->append(" WITH PARSER ", 13); + append_identifier(thd, packet, key_info->parser->name.str, + key_info->parser->name.length); + } } /* @@ -980,22 +1130,46 @@ store_create_info(THD *thd, TABLE_LIST *table_list, String *packet) packet->append(STRING_WITH_LEN("\n)")); if (!(thd->variables.sql_mode & MODE_NO_TABLE_OPTIONS) && !foreign_db_mode) { - if (thd->variables.sql_mode & (MODE_MYSQL323 | MODE_MYSQL40)) - packet->append(STRING_WITH_LEN(" TYPE=")); + /* + IF check_create_info + THEN add ENGINE only if it was used when creating the table + */ + if (!create_info_arg || + (create_info_arg->used_fields & HA_CREATE_USED_ENGINE)) + { + if (thd->variables.sql_mode & (MODE_MYSQL323 | MODE_MYSQL40)) + packet->append(STRING_WITH_LEN(" TYPE=")); + else + packet->append(STRING_WITH_LEN(" ENGINE=")); +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (table->part_info) + packet->append(ha_resolve_storage_engine_name( + table->part_info->default_engine_type)); else - packet->append(STRING_WITH_LEN(" ENGINE=")); - packet->append(file->table_type()); + packet->append(file->table_type()); +#else + packet->append(file->table_type()); +#endif + } if (share->table_charset && !(thd->variables.sql_mode & MODE_MYSQL323) && !(thd->variables.sql_mode & MODE_MYSQL40)) { - packet->append(STRING_WITH_LEN(" DEFAULT CHARSET=")); - packet->append(share->table_charset->csname); - if (!(share->table_charset->state & MY_CS_PRIMARY)) + /* + IF check_create_info + THEN add DEFAULT CHARSET only if it was used when creating the table + */ + if (!create_info_arg || + (create_info_arg->used_fields & HA_CREATE_USED_DEFAULT_CHARSET)) { - packet->append(STRING_WITH_LEN(" COLLATE=")); - packet->append(table->s->table_charset->name); + packet->append(STRING_WITH_LEN(" DEFAULT CHARSET=")); + packet->append(share->table_charset->csname); + if (!(share->table_charset->state & MY_CS_PRIMARY)) + { + packet->append(STRING_WITH_LEN(" COLLATE=")); + packet->append(table->s->table_charset->name); + } } } @@ -1056,6 +1230,23 @@ store_create_info(THD *thd, TABLE_LIST *table_list, String *packet) append_directory(thd, packet, "DATA", create_info.data_file_name); append_directory(thd, packet, "INDEX", create_info.index_file_name); } +#ifdef WITH_PARTITION_STORAGE_ENGINE + { + /* + Partition syntax for CREATE TABLE is at the end of the syntax. + */ + uint part_syntax_len; + char *part_syntax; + if (table->part_info && + ((part_syntax= generate_partition_syntax(table->part_info, + &part_syntax_len, + FALSE,FALSE)))) + { + packet->append(part_syntax, part_syntax_len); + my_free(part_syntax, MYF(0)); + } + } +#endif DBUG_RETURN(0); } @@ -1083,7 +1274,6 @@ view_store_options(THD *thd, TABLE_LIST *table, String *buff) buff->append(STRING_WITH_LEN("SQL SECURITY INVOKER ")); } - /* Append DEFINER clause to the given buffer. @@ -1106,7 +1296,7 @@ void append_definer(THD *thd, String *buffer, const LEX_STRING *definer_user, } -static int +int view_store_create_info(THD *thd, TABLE_LIST *table, String *buff) { my_bool foreign_db_mode= (thd->variables.sql_mode & (MODE_POSTGRESQL | @@ -1479,8 +1669,11 @@ static bool show_status_array(THD *thd, const char *wild, break; } #endif /* HAVE_REPLICATION */ - case SHOW_OPENTABLES: - end= int10_to_str((long) cached_tables(), buff, 10); + case SHOW_OPEN_TABLES: + end= int10_to_str((long) cached_open_tables(), buff, 10); + break; + case SHOW_TABLE_DEFINITIONS: + end= int10_to_str((long) cached_table_definitions(), buff, 10); break; case SHOW_CHAR_PTR: { @@ -1768,7 +1961,7 @@ typedef struct st_index_field_values static bool schema_table_store_record(THD *thd, TABLE *table) { int error; - if ((error= table->file->write_row(table->record[0]))) + if ((error= table->file->ha_write_row(table->record[0]))) { if (create_myisam_from_heap(thd, table, table->pos_in_table_list->schema_table_param, @@ -2043,7 +2236,7 @@ int get_all_tables(THD *thd, TABLE_LIST *tables, COND *cond) Security_context *sctx= thd->security_ctx; uint derived_tables= lex->derived_tables; int error= 1; - db_type not_used; + enum legacy_db_type not_used; Open_tables_state open_tables_state_backup; bool save_view_prepare_mode= lex->view_prepare_mode; lex->view_prepare_mode= TRUE; @@ -2136,8 +2329,8 @@ int get_all_tables(THD *thd, TABLE_LIST *tables, COND *cond) } else { - strxmov(path, mysql_data_home, "/", base_name, NullS); - end= path + (len= unpack_dirname(path,path)); + len= build_table_filename(path, sizeof(path), base_name, "", ""); + end= path + len; len= FN_LEN - len; if (mysql_find_files(thd, &files, base_name, path, idx_field_vals.table_value, 0)) @@ -2285,8 +2478,7 @@ int fill_schema_shemata(THD *thd, TABLE_LIST *tables, COND *cond) (grant_option && !check_grant_db(thd, file_name))) #endif { - strxmov(path, mysql_data_home, "/", file_name, NullS); - length=unpack_dirname(path,path); // Convert if not unix + length= build_table_filename(path, sizeof(path), file_name, "", ""); found_libchar= 0; if (length && path[length-1] == FN_LIBCHAR) { @@ -2739,6 +2931,46 @@ int fill_schema_charsets(THD *thd, TABLE_LIST *tables, COND *cond) } +int fill_schema_engines(THD *thd, TABLE_LIST *tables, COND *cond) +{ + const char *wild= thd->lex->wild ? thd->lex->wild->ptr() : NullS; + TABLE *table= tables->table; + CHARSET_INFO *scs= system_charset_info; + handlerton **types; + + DBUG_ENTER("fill_schema_engines"); + + for (types= sys_table_types; *types; types++) + { + if ((*types)->flags & HTON_HIDDEN) + continue; + + if (!(wild && wild[0] && + wild_case_compare(scs, (*types)->name,wild))) + { + const char *tmp; + restore_record(table, s->default_values); + + table->field[0]->store((*types)->name, strlen((*types)->name), scs); + tmp= (*types)->state ? "DISABLED" : "ENABLED"; + table->field[1]->store( tmp, strlen(tmp), scs); + table->field[2]->store((*types)->comment, strlen((*types)->comment), scs); + tmp= (*types)->commit ? "YES" : "NO"; + table->field[3]->store( tmp, strlen(tmp), scs); + tmp= (*types)->prepare ? "YES" : "NO"; + table->field[4]->store( tmp, strlen(tmp), scs); + tmp= (*types)->savepoint_set ? "YES" : "NO"; + table->field[5]->store( tmp, strlen(tmp), scs); + + if (schema_table_store_record(thd, table)) + DBUG_RETURN(1); + } + } + + DBUG_RETURN(0); +} + + int fill_schema_collation(THD *thd, TABLE_LIST *tables, COND *cond) { CHARSET_INFO **cs; @@ -2905,7 +3137,7 @@ int fill_schema_proc(THD *thd, TABLE_LIST *tables, COND *cond) { DBUG_RETURN(1); } - proc_table->file->ha_index_init(0); + proc_table->file->ha_index_init(0, 1); if ((res= proc_table->file->index_first(proc_table->record[0]))) { res= (res == HA_ERR_END_OF_FILE) ? 0 : 1; @@ -3723,8 +3955,8 @@ int mysql_schema_table(THD *thd, LEX *lex, TABLE_LIST *table_list) table->alias_name_used= my_strcasecmp(table_alias_charset, table_list->schema_table_name, table_list->alias); - table_list->table_name= (char*) table->s->table_name; - table_list->table_name_length= strlen(table->s->table_name); + table_list->table_name= table->s->table_name.str; + table_list->table_name_length= table->s->table_name.length; table_list->table= table; table->next= thd->derived_tables; thd->derived_tables= table; @@ -3793,6 +4025,7 @@ int make_schema_select(THD *thd, SELECT_LEX *sel, ST_SCHEMA_TABLE *schema_table= get_schema_table(schema_table_idx); LEX_STRING db, table; DBUG_ENTER("mysql_schema_select"); + DBUG_PRINT("enter", ("mysql_schema_select: %s", schema_table->table_name)); /* We have to make non const db_name & table_name because of lower_case_table_names @@ -3946,6 +4179,18 @@ ST_FIELD_INFO collation_fields_info[]= }; +ST_FIELD_INFO engines_fields_info[]= +{ + {"ENGINE", 64, MYSQL_TYPE_STRING, 0, 0, "Engine"}, + {"SUPPORT", 8, MYSQL_TYPE_STRING, 0, 0, "Support"}, + {"COMMENT", 80, MYSQL_TYPE_STRING, 0, 0, "Comment"}, + {"TRANSACTIONS", 3, MYSQL_TYPE_STRING, 0, 0, "Transactions"}, + {"XA", 3, MYSQL_TYPE_STRING, 0, 0, "XA"}, + {"SAVEPOINTS", 3 ,MYSQL_TYPE_STRING, 0, 0, "Savepoints"}, + {0, 0, MYSQL_TYPE_STRING, 0, 0, 0} +}; + + ST_FIELD_INFO coll_charset_app_fields_info[]= { {"COLLATION_NAME", 64, MYSQL_TYPE_STRING, 0, 0, 0}, @@ -4144,8 +4389,26 @@ ST_FIELD_INFO variables_fields_info[]= }; +ST_FIELD_INFO plugin_fields_info[]= +{ + {"PLUGIN_NAME", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Name"}, + {"PLUGIN_VERSION", 20, MYSQL_TYPE_STRING, 0, 0, 0}, + {"PLUGIN_STATUS", 10, MYSQL_TYPE_STRING, 0, 0, "Status"}, + {"PLUGIN_TYPE", 10, MYSQL_TYPE_STRING, 0, 0, "Type"}, + {"PLUGIN_TYPE_VERSION", 20, MYSQL_TYPE_STRING, 0, 0, 0}, + {"PLUGIN_LIBRARY", NAME_LEN, MYSQL_TYPE_STRING, 0, 1, "Library"}, + {"PLUGIN_LIBRARY_VERSION", 20, MYSQL_TYPE_STRING, 0, 1, 0}, + {"PLUGIN_AUTHOR", NAME_LEN, MYSQL_TYPE_STRING, 0, 1, 0}, + {"PLUGIN_DESCRIPTION", 65535, MYSQL_TYPE_STRING, 0, 1, 0}, + {0, 0, MYSQL_TYPE_STRING, 0, 0, 0} +}; + + /* Description of ST_FIELD_INFO in table.h + + Make sure that the order of schema_tables and enum_schema_tables are the same. + */ ST_SCHEMA_TABLE schema_tables[]= @@ -4160,10 +4423,14 @@ ST_SCHEMA_TABLE schema_tables[]= get_all_tables, make_columns_old_format, get_schema_column_record, 1, 2, 0}, {"COLUMN_PRIVILEGES", column_privileges_fields_info, create_schema_table, fill_schema_column_privileges, 0, 0, -1, -1, 0}, + {"ENGINES", engines_fields_info, create_schema_table, + fill_schema_engines, make_old_format, 0, -1, -1, 0}, {"KEY_COLUMN_USAGE", key_column_usage_fields_info, create_schema_table, get_all_tables, 0, get_schema_key_column_usage_record, 4, 5, 0}, {"OPEN_TABLES", open_tables_fields_info, create_schema_table, fill_open_tables, make_old_format, 0, -1, -1, 1}, + {"PLUGINS", plugin_fields_info, create_schema_table, + fill_plugins, make_old_format, 0, -1, -1, 0}, {"ROUTINES", proc_fields_info, create_schema_table, fill_schema_proc, make_proc_old_format, 0, -1, -1, 0}, {"SCHEMATA", schema_fields_info, create_schema_table, diff --git a/sql/sql_show.h b/sql/sql_show.h new file mode 100644 index 00000000000..6fce5e94ca3 --- /dev/null +++ b/sql/sql_show.h @@ -0,0 +1,17 @@ + +#ifndef SQL_SHOW_H +#define SQL_SHOW_H + +/* Forward declarations */ +class String; +class THD; +struct st_ha_create_information; +struct st_table_list; +typedef st_ha_create_information HA_CREATE_INFO; +typedef st_table_list TABLE_LIST; + +int store_create_info(THD *thd, TABLE_LIST *table_list, String *packet, + HA_CREATE_INFO *create_info_arg); +int view_store_create_info(THD *thd, TABLE_LIST *table, String *buff); + +#endif /* SQL_SHOW_H */ diff --git a/sql/sql_table.cc b/sql/sql_table.cc index ba4a606537f..dd91d7ecd0f 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -17,14 +17,12 @@ /* drop and alter of tables */ #include "mysql_priv.h" -#ifdef HAVE_BERKELEY_DB -#include "ha_berkeley.h" -#endif #include <hash.h> #include <myisam.h> #include <my_dir.h> #include "sp_head.h" #include "sql_trigger.h" +#include "sql_show.h" #ifdef __WIN__ #include <io.h> @@ -42,32 +40,148 @@ static int copy_data_between_tables(TABLE *from,TABLE *to, ha_rows *copied,ha_rows *deleted); static bool prepare_blob_field(THD *thd, create_field *sql_field); static bool check_engine(THD *thd, const char *table_name, - enum db_type *new_engine); + handlerton **new_engine); +/* + SYNOPSIS + write_bin_log() + thd Thread object + clear_error is clear_error to be called + RETURN VALUES + NONE + DESCRIPTION + Write the binlog if open, routine used in multiple places in this + file +*/ + +static void write_bin_log(THD *thd, bool clear_error, + char const* query, ulong query_length) +{ + if (mysql_bin_log.is_open()) + { + if (clear_error) + thd->clear_error(); + thd->binlog_query(THD::STMT_QUERY_TYPE, + query, query_length, FALSE, FALSE); + } +} /* - Build the path to a file for a table (or the base path that can - then have various extensions stuck on to it). + SYNOPSIS + abort_and_upgrade_lock() + thd Thread object + table Table object + db Database name + table_name Table name + old_lock_level Old lock level + RETURN VALUES + TRUE Failure + FALSE Success + DESCRIPTION + Remember old lock level (for possible downgrade later on), abort all + waiting threads and ensure that all keeping locks currently are + completed such that we own the lock exclusively and no other interaction + is ongoing. +*/ + +static bool abort_and_upgrade_lock(THD *thd, TABLE *table, const char *db, + const char *table_name, + uint *old_lock_level) +{ + uint flags= RTFC_WAIT_OTHER_THREAD_FLAG | RTFC_CHECK_KILLED_FLAG; + DBUG_ENTER("abort_and_upgrade_locks"); + + *old_lock_level= table->reginfo.lock_type; + mysql_lock_abort(thd, table); + VOID(remove_table_from_cache(thd, db, table_name, flags)); + if (thd->killed) + { + thd->no_warnings_for_error= 0; + DBUG_RETURN(TRUE); + } + DBUG_RETURN(FALSE); +} + + +#define MYSQL50_TABLE_NAME_PREFIX "#mysql50#" +#define MYSQL50_TABLE_NAME_PREFIX_LENGTH 9 + +uint filename_to_tablename(const char *from, char *to, uint to_length) +{ + uint errors, res= strconvert(&my_charset_filename, from, + system_charset_info, to, to_length, &errors); + if (errors) // Old 5.0 name + { + res= strxnmov(to, to_length, MYSQL50_TABLE_NAME_PREFIX, from, NullS) - to; + sql_print_error("Invalid (old?) table or database name '%s'", from); + /* + TODO: add a stored procedure for fix table and database names, + and mention its name in error log. + */ + } + return res; +} + + +uint tablename_to_filename(const char *from, char *to, uint to_length) +{ + uint errors; + if (from[0] && !strncmp(from, MYSQL50_TABLE_NAME_PREFIX, + MYSQL50_TABLE_NAME_PREFIX_LENGTH)) + return my_snprintf(to, to_length, "%s", from + 9); + return strconvert(system_charset_info, from, + &my_charset_filename, to, to_length, &errors); +} + + +/* + Creates path to a file: mysql_data_dir/db/table.ext SYNOPSIS - build_table_path() - buff Buffer to build the path into - bufflen sizeof(buff) - db Name of database - table Name of table - ext Filename extension + build_table_filename() + buff where to write result + bufflen buff size + db database name, in system_charset_info + table table name, in system_charset_info + ext file extension + + NOTES + + Uses database and table name, and extension to create + a file name in mysql_data_dir. Database and table + names are converted from system_charset_info into "fscs". + 'ext' is not converted. RETURN - 0 Error - # Size of path - */ -static uint build_table_path(char *buff, size_t bufflen, const char *db, +*/ + + +uint build_table_filename(char *buff, size_t bufflen, const char *db, + const char *table, const char *ext) +{ + uint length; + char dbbuff[FN_REFLEN]; + char tbbuff[FN_REFLEN]; + VOID(tablename_to_filename(table, tbbuff, sizeof(tbbuff))); + VOID(tablename_to_filename(db, dbbuff, sizeof(dbbuff))); + strxnmov(buff, bufflen, + mysql_data_home, "/", dbbuff, "/", tbbuff, ext, NullS); + length= unpack_filename(buff, buff); + return length; +} + + +uint build_tmptable_filename(char *buff, size_t bufflen, + const char *tmpdir, const char *table, const char *ext) { - strxnmov(buff, bufflen-1, mysql_data_home, "/", db, "/", table, ext, - NullS); - return unpack_filename(buff,buff); + uint length; + char tbbuff[FN_REFLEN]; + VOID(tablename_to_filename(table, tbbuff, sizeof(tbbuff))); + strxnmov(buff, bufflen, tmpdir, "/", tbbuff, ext, NullS); + length= unpack_filename(buff, buff); + return length; } @@ -219,13 +333,37 @@ int mysql_rm_table_part2(THD *thd, TABLE_LIST *tables, bool if_exists, bool dont_log_query) { TABLE_LIST *table; - char path[FN_REFLEN], *alias; + char path[FN_REFLEN], *alias; + uint path_length; String wrong_tables; int error; + int non_temp_tables_count= 0; bool some_tables_deleted=0, tmp_table_deleted=0, foreign_key_error=0; - + String built_query; DBUG_ENTER("mysql_rm_table_part2"); + if (binlog_row_based && !dont_log_query) + { + built_query.set_charset(system_charset_info); + if (if_exists) + built_query.append("DROP TABLE IF EXISTS "); + else + built_query.append("DROP TABLE "); + } + /* + If we have the table in the definition cache, we don't have to check the + .frm file to find if the table is a normal table (not view) and what + engine to use. + */ + + for (table= tables; table; table= table->next_local) + { + TABLE_SHARE *share; + table->db_type= NULL; + if ((share= get_cached_table_share(table->db, table->table_name))) + table->db_type= share->db_type; + } + if (lock_table_names(thd, tables)) DBUG_RETURN(1); @@ -235,16 +373,42 @@ int mysql_rm_table_part2(THD *thd, TABLE_LIST *tables, bool if_exists, for (table= tables; table; table= table->next_local) { char *db=table->db; - db_type table_type= DB_TYPE_UNKNOWN; + handlerton *table_type; + enum legacy_db_type frm_db_type; mysql_ha_flush(thd, table, MYSQL_HA_CLOSE_FINAL, TRUE); - if (!close_temporary_table(thd, db, table->table_name)) + if (!close_temporary_table(thd, table)) { tmp_table_deleted=1; continue; // removed temporary table } + /* + If row-based replication is used and the table is not a + temporary table, we add the table name to the drop statement + being built. The string always end in a comma and the comma + will be chopped off before being written to the binary log. + */ + if (binlog_row_based && !dont_log_query) + { + ++non_temp_tables_count; + /* + Don't write the database name if it is the current one (or if + thd->db is NULL). + */ + built_query.append("`"); + if (thd->db == NULL || strcmp(db,thd->db) != 0) + { + built_query.append(db); + built_query.append("`.`"); + } + + built_query.append(table->table_name); + built_query.append("`,"); + } + error=0; + table_type= table->db_type; if (!drop_temporary) { abort_locked_tables(thd, db, table->table_name); @@ -258,14 +422,16 @@ int mysql_rm_table_part2(THD *thd, TABLE_LIST *tables, bool if_exists, DBUG_RETURN(-1); } alias= (lower_case_table_names == 2) ? table->alias : table->table_name; - /* remove form file and isam files */ - build_table_path(path, sizeof(path), db, alias, reg_ext); + /* remove .frm file and engine files */ + path_length= build_table_filename(path, sizeof(path), + db, alias, reg_ext); } - if (drop_temporary || - (access(path,F_OK) && - ha_create_table_from_engine(thd,db,alias)) || - (!drop_view && - mysql_frm_type(thd, path, &table_type) != FRMTYPE_TABLE)) + if (table_type == NULL && + (drop_temporary || + (access(path, F_OK) && + ha_create_table_from_engine(thd, db, alias)) || + (!drop_view && + mysql_frm_type(thd, path, &frm_db_type) != FRMTYPE_TABLE))) { // Table was not found on disk and table can't be created from engine if (if_exists) @@ -278,13 +444,17 @@ int mysql_rm_table_part2(THD *thd, TABLE_LIST *tables, bool if_exists, else { char *end; - if (table_type == DB_TYPE_UNKNOWN) - mysql_frm_type(thd, path, &table_type); - *(end=fn_ext(path))=0; // Remove extension for delete - error= ha_delete_table(thd, table_type, path, table->table_name, + if (table_type == NULL) + { + mysql_frm_type(thd, path, &frm_db_type); + table_type= ha_resolve_by_legacy_type(thd, frm_db_type); + } + // Remove extension for delete + *(end= path + path_length - reg_ext_length)= '\0'; + error= ha_delete_table(thd, table_type, path, db, table->table_name, !dont_log_query); if ((error == ENOENT || error == HA_ERR_NO_SUCH_TABLE) && - (if_exists || table_type == DB_TYPE_UNKNOWN)) + (if_exists || table_type == NULL)) error= 0; if (error == HA_ERR_ROW_IS_REFERENCED) { @@ -327,12 +497,48 @@ int mysql_rm_table_part2(THD *thd, TABLE_LIST *tables, bool if_exists, if (some_tables_deleted || tmp_table_deleted || !error) { query_cache_invalidate3(thd, tables, 0); - if (!dont_log_query && mysql_bin_log.is_open()) + if (!dont_log_query) { - if (!error) - thd->clear_error(); - Query_log_event qinfo(thd, thd->query, thd->query_length, FALSE, FALSE); - mysql_bin_log.write(&qinfo); + if (!binlog_row_based || + non_temp_tables_count > 0 && !tmp_table_deleted) + { + /* + In this case, we are either using statement-based + replication or using row-based replication but have only + deleted one or more non-temporary tables (and no temporary + tables). In this case, we can write the original query into + the binary log. + */ + write_bin_log(thd, !error, thd->query, thd->query_length); + } + else if (binlog_row_based && + non_temp_tables_count > 0 && + tmp_table_deleted) + { + /* + In this case we have deleted both temporary and + non-temporary tables, so: + - since we have deleted a non-temporary table we have to + binlog the statement, but + - since we have deleted a temporary table we cannot binlog + the statement (since the table has not been created on the + slave, this might cause the slave to stop). + + Instead, we write a built statement, only containing the + non-temporary tables, to the binary log + */ + built_query.chop(); // Chop of the last comma + built_query.append(" /* generated by server */"); + write_bin_log(thd, !error, built_query.ptr(), built_query.length()); + } + /* + The remaining cases are: + - no tables where deleted and + - only temporary tables where deleted and row-based + replication is used. + In both these cases, nothing should be written to the binary + log. + */ } } @@ -342,16 +548,20 @@ int mysql_rm_table_part2(THD *thd, TABLE_LIST *tables, bool if_exists, } -int quick_rm_table(enum db_type base,const char *db, +bool quick_rm_table(handlerton *base,const char *db, const char *table_name) { char path[FN_REFLEN]; - int error=0; - build_table_path(path, sizeof(path), db, table_name, reg_ext); + bool error= 0; + DBUG_ENTER("quick_rm_table"); + + uint path_length= build_table_filename(path, sizeof(path), + db, table_name, reg_ext); if (my_delete(path,MYF(0))) - error=1; /* purecov: inspected */ - *fn_ext(path)= 0; // Remove reg_ext - return ha_delete_table(current_thd, base, path, table_name, 0) || error; + error= 1; /* purecov: inspected */ + path[path_length - reg_ext_length]= '\0'; // Remove reg_ext + DBUG_RETURN(ha_delete_table(current_thd, base, path, db, table_name, 0) || + error); } /* @@ -1063,6 +1273,8 @@ static int mysql_prepare_table(THD *thd, HA_CREATE_INFO *create_info, break; case Key::FULLTEXT: key_info->flags= HA_FULLTEXT; + if ((key_info->parser_name= key->parser_name)) + key_info->flags|= HA_USES_PARSER; break; case Key::SPATIAL: #ifdef HAVE_SPATIAL @@ -1392,6 +1604,34 @@ static int mysql_prepare_table(THD *thd, HA_CREATE_INFO *create_info, /* + Set table default charset, if not set + + SYNOPSIS + set_table_default_charset() + create_info Table create information + + DESCRIPTION + If the table character set was not given explicitely, + let's fetch the database default character set and + apply it to the table. +*/ + +static void set_table_default_charset(THD *thd, + HA_CREATE_INFO *create_info, char *db) +{ + if (!create_info->default_table_charset) + { + HA_CREATE_INFO db_info; + char path[FN_REFLEN]; + /* Abuse build_table_filename() to build the path to the db.opt file */ + build_table_filename(path, sizeof(path), db, "", MY_DB_OPT_FILE); + load_db_opt(thd, path, &db_info); + create_info->default_table_charset= db_info.default_table_charset; + } +} + + +/* Extend long VARCHAR fields to blob & prepare field if it's a blob SYNOPSIS @@ -1528,6 +1768,7 @@ bool mysql_create_table(THD *thd,const char *db, const char *table_name, uint select_field_count) { char path[FN_REFLEN]; + uint path_length; const char *alias; uint db_options, key_count; KEY *key_info_buffer; @@ -1548,7 +1789,73 @@ bool mysql_create_table(THD *thd,const char *db, const char *table_name, if (create_info->row_type == ROW_TYPE_DYNAMIC) db_options|=HA_OPTION_PACK_RECORD; alias= table_case_name(create_info, table_name); - file= get_new_handler((TABLE*) 0, thd->mem_root, create_info->db_type); + if (!(file=get_new_handler((TABLE_SHARE*) 0, thd->mem_root, + create_info->db_type))) + { + my_error(ER_OUTOFMEMORY, MYF(0), 128);//128 bytes invented + DBUG_RETURN(TRUE); + } +#ifdef WITH_PARTITION_STORAGE_ENGINE + partition_info *part_info= thd->lex->part_info; + if (part_info) + { + /* + The table has been specified as a partitioned table. + If this is part of an ALTER TABLE the handler will be the partition + handler but we need to specify the default handler to use for + partitions also in the call to check_partition_info. We transport + this information in the default_db_type variable, it is either + DB_TYPE_DEFAULT or the engine set in the ALTER TABLE command. + */ + handlerton *part_engine_type= create_info->db_type; + char *part_syntax_buf; + uint syntax_len; + if (part_engine_type == &partition_hton) + { + /* + This only happens at ALTER TABLE. + default_engine_type was assigned from the engine set in the ALTER + TABLE command. + */ + part_engine_type= ha_checktype(thd, + ha_legacy_type(part_info->default_engine_type), 0, 0); + } + else + { + part_info->default_engine_type= create_info->db_type; + } + if (check_partition_info(part_info, part_engine_type, + file, create_info->max_rows)) + goto err; + + /* + We reverse the partitioning parser and generate a standard format + for syntax stored in frm file. + */ + if (!(part_syntax_buf= generate_partition_syntax(part_info, + &syntax_len, + TRUE,TRUE))) + goto err; + part_info->part_info_string= part_syntax_buf; + part_info->part_info_len= syntax_len; + if ((!(file->partition_flags() & HA_CAN_PARTITION)) || + create_info->db_type == &partition_hton) + { + /* + The handler assigned to the table cannot handle partitioning. + Assign the partition handler as the handler of the table. + */ + DBUG_PRINT("info", ("db_type: %d part_flag: %d", + create_info->db_type,file->partition_flags())); + delete file; + create_info->db_type= &partition_hton; + if (!(file= get_ha_partition(part_info))) + { + DBUG_RETURN(TRUE); + } + } + } +#endif #ifdef NOT_USED /* @@ -1562,47 +1869,37 @@ bool mysql_create_table(THD *thd,const char *db, const char *table_name, (file->table_flags() & HA_NO_TEMP_TABLES)) { my_error(ER_ILLEGAL_HA, MYF(0), table_name); - DBUG_RETURN(TRUE); + goto err; } #endif - /* - If the table character set was not given explicitely, - let's fetch the database default character set and - apply it to the table. - */ - if (!create_info->default_table_charset) - { - HA_CREATE_INFO db_info; - char path[FN_REFLEN]; - /* Abuse build_table_path() to build the path to the db.opt file */ - build_table_path(path, sizeof(path), db, MY_DB_OPT_FILE, ""); - load_db_opt(thd, path, &db_info); - create_info->default_table_charset= db_info.default_table_charset; - } + set_table_default_charset(thd, create_info, (char*) db); if (mysql_prepare_table(thd, create_info, &fields, &keys, internal_tmp_table, &db_options, file, &key_info_buffer, &key_count, select_field_count)) - DBUG_RETURN(TRUE); + goto err; /* Check if table exists */ if (create_info->options & HA_LEX_CREATE_TMP_TABLE) { - my_snprintf(path, sizeof(path), "%s%s%lx_%lx_%x%s", - mysql_tmpdir, tmp_file_prefix, current_pid, thd->thread_id, - thd->tmp_table++, reg_ext); + char tmp_table_name[tmp_file_prefix_length+22+22+22+3]; + my_snprintf(tmp_table_name, sizeof(tmp_table_name), "%s%lx_%lx_%x", + tmp_file_prefix, current_pid, thd->thread_id, + thd->tmp_table++); + path_length= build_tmptable_filename(path, sizeof(path), mysql_tmpdir, + tmp_table_name, reg_ext); if (lower_case_table_names) my_casedn_str(files_charset_info, path); create_info->table_options|=HA_CREATE_DELAY_KEY_WRITE; } else - build_table_path(path, sizeof(path), db, alias, reg_ext); + path_length= build_table_filename(path, sizeof(path), db, alias, reg_ext); /* Check if table already exists */ - if ((create_info->options & HA_LEX_CREATE_TMP_TABLE) - && find_temporary_table(thd,db,table_name)) + if ((create_info->options & HA_LEX_CREATE_TMP_TABLE) && + find_temporary_table(thd, db, table_name)) { if (create_info->options & HA_LEX_CREATE_IF_NOT_EXISTS) { @@ -1610,13 +1907,14 @@ bool mysql_create_table(THD *thd,const char *db, const char *table_name, push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_NOTE, ER_TABLE_EXISTS_ERROR, ER(ER_TABLE_EXISTS_ERROR), alias); - DBUG_RETURN(FALSE); + error= 0; + goto err; } my_error(ER_TABLE_EXISTS_ERROR, MYF(0), alias); - DBUG_RETURN(TRUE); + goto err; } if (wait_if_global_read_lock(thd, 0, 1)) - DBUG_RETURN(TRUE); + goto err; VOID(pthread_mutex_lock(&LOCK_open)); if (!internal_tmp_table && !(create_info->options & HA_LEX_CREATE_TMP_TABLE)) { @@ -1625,8 +1923,9 @@ bool mysql_create_table(THD *thd,const char *db, const char *table_name, if (create_info->options & HA_LEX_CREATE_IF_NOT_EXISTS) goto warn; my_error(ER_TABLE_EXISTS_ERROR,MYF(0),table_name); - goto end; + goto unlock_and_end; } + DBUG_ASSERT(get_cached_table_share(db, alias) == 0); } /* @@ -1649,7 +1948,7 @@ bool mysql_create_table(THD *thd,const char *db, const char *table_name, if (create_if_not_exists) goto warn; my_error(ER_TABLE_EXISTS_ERROR,MYF(0),table_name); - goto end; + goto unlock_and_end; } } @@ -1660,32 +1959,42 @@ bool mysql_create_table(THD *thd,const char *db, const char *table_name, create_info->data_file_name= create_info->index_file_name= 0; create_info->table_options=db_options; - if (rea_create_table(thd, path, db, table_name, - create_info, fields, key_count, - key_info_buffer)) - goto end; + path[path_length - reg_ext_length]= '\0'; // Remove .frm extension + if (rea_create_table(thd, path, db, table_name, create_info, fields, + key_count, key_info_buffer, file)) + goto unlock_and_end; + if (create_info->options & HA_LEX_CREATE_TMP_TABLE) { /* Open table and put in temporary table list */ if (!(open_temporary_table(thd, path, db, table_name, 1))) { (void) rm_temporary_table(create_info->db_type, path); - goto end; + goto unlock_and_end; } thd->tmp_table_used= 1; } - if (!internal_tmp_table && mysql_bin_log.is_open()) - { - thd->clear_error(); - Query_log_event qinfo(thd, thd->query, thd->query_length, FALSE, FALSE); - mysql_bin_log.write(&qinfo); - } - error= FALSE; -end: + /* + Don't write statement if: + - It is an internal temporary table, + - Row-based logging is used and it we are creating a temporary table, or + - The binary log is not open. + Otherwise, the statement shall be binlogged. + */ + if (!internal_tmp_table && + (!binlog_row_based || + (binlog_row_based && + !(create_info->options & HA_LEX_CREATE_TMP_TABLE)))) + write_bin_log(thd, TRUE, thd->query, thd->query_length); + error= FALSE; +unlock_and_end: VOID(pthread_mutex_unlock(&LOCK_open)); start_waiting_global_read_lock(thd); + +err: thd->proc_info="After create"; + delete file; DBUG_RETURN(error); warn: @@ -1694,7 +2003,7 @@ warn: ER_TABLE_EXISTS_ERROR, ER(ER_TABLE_EXISTS_ERROR), alias); create_info->table_existed= 1; // Mark that table existed - goto end; + goto unlock_and_end; } /* @@ -1748,6 +2057,7 @@ TABLE *create_table_from_items(THD *thd, HA_CREATE_INFO *create_info, MYSQL_LOCK **lock) { TABLE tmp_table; // Used during 'create_field()' + TABLE_SHARE share; TABLE *table= 0; uint select_field_count= items->elements; /* Add selected items to field list */ @@ -1759,11 +2069,14 @@ TABLE *create_table_from_items(THD *thd, HA_CREATE_INFO *create_info, tmp_table.alias= 0; tmp_table.timestamp_field= 0; - tmp_table.s= &tmp_table.share_not_to_be_used; + tmp_table.s= &share; + init_tmp_table_share(&share, "", 0, "", ""); + tmp_table.s->db_create_options=0; tmp_table.s->blob_ptr_size= portable_sizeof_char_ptr; - tmp_table.s->db_low_byte_first= test(create_info->db_type == DB_TYPE_MYISAM || - create_info->db_type == DB_TYPE_HEAP); + tmp_table.s->db_low_byte_first= + test(create_info->db_type == &myisam_hton || + create_info->db_type == &heap_hton); tmp_table.null_row=tmp_table.maybe_null=0; while ((item=it++)) @@ -1840,7 +2153,7 @@ TABLE *create_table_from_items(THD *thd, HA_CREATE_INFO *create_info, ****************************************************************************/ bool -mysql_rename_table(enum db_type base, +mysql_rename_table(handlerton *base, const char *old_db, const char *old_name, const char *new_db, @@ -1850,13 +2163,15 @@ mysql_rename_table(enum db_type base, char from[FN_REFLEN], to[FN_REFLEN], lc_from[FN_REFLEN], lc_to[FN_REFLEN]; char *from_base= from, *to_base= to; char tmp_name[NAME_LEN+1]; - handler *file= (base == DB_TYPE_UNKNOWN ? 0 : - get_new_handler((TABLE*) 0, thd->mem_root, base)); + handler *file; int error=0; DBUG_ENTER("mysql_rename_table"); - build_table_path(from, sizeof(from), old_db, old_name, ""); - build_table_path(to, sizeof(to), new_db, new_name, ""); + file= (base == NULL ? 0 : + get_new_handler((TABLE_SHARE*) 0, thd->mem_root, base)); + + build_table_filename(from, sizeof(from), old_db, old_name, ""); + build_table_filename(to, sizeof(to), new_db, new_name, ""); /* If lower_case_table_names == 2 (case-preserving but case-insensitive @@ -1868,12 +2183,12 @@ mysql_rename_table(enum db_type base, { strmov(tmp_name, old_name); my_casedn_str(files_charset_info, tmp_name); - build_table_path(lc_from, sizeof(lc_from), old_db, tmp_name, ""); + build_table_filename(lc_from, sizeof(lc_from), old_db, tmp_name, ""); from_base= lc_from; strmov(tmp_name, new_name); my_casedn_str(files_charset_info, tmp_name); - build_table_path(lc_to, sizeof(lc_to), new_db, tmp_name, ""); + build_table_filename(lc_to, sizeof(lc_to), new_db, tmp_name, ""); to_base= lc_to; } @@ -1915,17 +2230,19 @@ mysql_rename_table(enum db_type base, static void wait_while_table_is_used(THD *thd,TABLE *table, enum ha_extra_function function) { - DBUG_PRINT("enter",("table: %s", table->s->table_name)); DBUG_ENTER("wait_while_table_is_used"); - safe_mutex_assert_owner(&LOCK_open); + DBUG_PRINT("enter", ("table: '%s' share: 0x%lx db_stat: %u version: %u", + table->s->table_name.str, (ulong) table->s, + table->db_stat, table->s->version)); VOID(table->file->extra(function)); /* Mark all tables that are in use as 'old' */ mysql_lock_abort(thd, table); // end threads waiting on lock /* Wait until all there are no other threads that has this table open */ - remove_table_from_cache(thd, table->s->db, - table->s->table_name, RTFC_WAIT_OTHER_THREAD_FLAG); + remove_table_from_cache(thd, table->s->db.str, + table->s->table_name.str, + RTFC_WAIT_OTHER_THREAD_FLAG); DBUG_VOID_RETURN; } @@ -1996,23 +2313,21 @@ static int prepare_for_restore(THD* thd, TABLE_LIST* table, else { char* backup_dir= thd->lex->backup_dir; - char src_path[FN_REFLEN], dst_path[FN_REFLEN]; + char src_path[FN_REFLEN], dst_path[FN_REFLEN], uname[FN_REFLEN]; char* table_name= table->table_name; char* db= table->db; - if (fn_format_relative_to_data_home(src_path, table_name, backup_dir, - reg_ext)) + VOID(tablename_to_filename(table->table_name, uname, sizeof(uname))); + + if (fn_format_relative_to_data_home(src_path, uname, backup_dir, reg_ext)) DBUG_RETURN(-1); // protect buffer overflow - my_snprintf(dst_path, sizeof(dst_path), "%s%s/%s", - mysql_real_data_home, db, table_name); + build_table_filename(dst_path, sizeof(dst_path), db, table_name, reg_ext); if (lock_and_wait_for_table_name(thd,table)) DBUG_RETURN(-1); - if (my_copy(src_path, - fn_format(dst_path, dst_path,"", reg_ext, 4), - MYF(MY_WME))) + if (my_copy(src_path, dst_path, MYF(MY_WME))) { pthread_mutex_lock(&LOCK_open); unlock_table_name(thd, table); @@ -2047,11 +2362,15 @@ static int prepare_for_restore(THD* thd, TABLE_LIST* table, } -static int prepare_for_repair(THD* thd, TABLE_LIST *table_list, +static int prepare_for_repair(THD *thd, TABLE_LIST *table_list, HA_CHECK_OPT *check_opt) { int error= 0; TABLE tmp_table, *table; + TABLE_SHARE *share; + char from[FN_REFLEN],tmp[FN_REFLEN+32]; + const char **ext; + MY_STAT stat_info; DBUG_ENTER("prepare_for_repair"); if (!(check_opt->sql_flags & TT_USEFRM)) @@ -2059,12 +2378,26 @@ static int prepare_for_repair(THD* thd, TABLE_LIST *table_list, if (!(table= table_list->table)) /* if open_ltable failed */ { - char name[FN_REFLEN]; - build_table_path(name, sizeof(name), table_list->db, - table_list->table_name, ""); - if (openfrm(thd, name, "", 0, 0, 0, &tmp_table)) + char key[MAX_DBKEY_LENGTH]; + uint key_length; + + key_length= create_table_def_key(thd, key, table_list, 0); + pthread_mutex_lock(&LOCK_open); + if (!(share= (get_table_share(thd, table_list, key, key_length, 0, + &error)))) + { + pthread_mutex_unlock(&LOCK_open); DBUG_RETURN(0); // Can't open frm file + } + + if (open_table_from_share(thd, share, "", 0, 0, 0, &tmp_table)) + { + release_table_share(share, RELEASE_NORMAL); + pthread_mutex_unlock(&LOCK_open); + DBUG_RETURN(0); // Out of memory + } table= &tmp_table; + pthread_mutex_unlock(&LOCK_open); } /* @@ -2077,18 +2410,16 @@ static int prepare_for_repair(THD* thd, TABLE_LIST *table_list, - Run a normal repair using the new index file and the old data file */ - char from[FN_REFLEN],tmp[FN_REFLEN+32]; - const char **ext= table->file->bas_ext(); - MY_STAT stat_info; - /* Check if this is a table type that stores index and data separately, like ISAM or MyISAM */ + ext= table->file->bas_ext(); if (!ext[0] || !ext[1]) goto end; // No data file - strxmov(from, table->s->path, ext[1], NullS); // Name of data file + // Name of data file + strxmov(from, table->s->normalized_path.str, ext[1], NullS); if (!my_stat(from, &stat_info, MYF(0))) goto end; // Can't use USE_FRM flag @@ -2152,7 +2483,11 @@ static int prepare_for_repair(THD* thd, TABLE_LIST *table_list, end: if (table == &tmp_table) - closefrm(table); // Free allocated memory + { + pthread_mutex_lock(&LOCK_open); + closefrm(table, 1); // Free allocated memory + pthread_mutex_unlock(&LOCK_open); + } DBUG_RETURN(error); } @@ -2315,8 +2650,8 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables, const char *old_message=thd->enter_cond(&COND_refresh, &LOCK_open, "Waiting to get writelock"); mysql_lock_abort(thd,table->table); - remove_table_from_cache(thd, table->table->s->db, - table->table->s->table_name, + remove_table_from_cache(thd, table->table->s->db.str, + table->table->s->table_name.str, RTFC_WAIT_OTHER_THREAD_FLAG | RTFC_CHECK_KILLED_FLAG); thd->exit_cond(old_message); @@ -2484,8 +2819,8 @@ send_result_message: else if (open_for_modify) { pthread_mutex_lock(&LOCK_open); - remove_table_from_cache(thd, table->table->s->db, - table->table->s->table_name, RTFC_NO_FLAG); + remove_table_from_cache(thd, table->table->s->db.str, + table->table->s->table_name.str, RTFC_NO_FLAG); pthread_mutex_unlock(&LOCK_open); /* Something may be modified, that's why we have to invalidate cache */ query_cache_invalidate3(thd, table->table, 0); @@ -2663,15 +2998,16 @@ bool mysql_create_like_table(THD* thd, TABLE_LIST* table, HA_CREATE_INFO *create_info, Table_ident *table_ident) { - TABLE **tmp_table; + TABLE *tmp_table; char src_path[FN_REFLEN], dst_path[FN_REFLEN]; + uint dst_path_length; char *db= table->db; char *table_name= table->table_name; char *src_db; char *src_table= table_ident->table.str; int err; bool res= TRUE; - db_type not_used; + enum legacy_db_type not_used; TABLE_LIST src_tables_list; DBUG_ENTER("mysql_create_like_table"); @@ -2701,13 +3037,13 @@ bool mysql_create_like_table(THD* thd, TABLE_LIST* table, goto err; if ((tmp_table= find_temporary_table(thd, src_db, src_table))) - strxmov(src_path, (*tmp_table)->s->path, reg_ext, NullS); + strxmov(src_path, tmp_table->s->path.str, reg_ext, NullS); else { - strxmov(src_path, mysql_data_home, "/", src_db, "/", src_table, - reg_ext, NullS); + build_table_filename(src_path, sizeof(src_path), + src_db, src_table, reg_ext); /* Resolve symlinks (for windows) */ - fn_format(src_path, src_path, "", "", MYF(MY_UNPACK_FILENAME)); + unpack_filename(src_path, src_path); if (lower_case_table_names) my_casedn_str(files_charset_info, src_path); if (access(src_path, F_OK)) @@ -2736,18 +3072,18 @@ bool mysql_create_like_table(THD* thd, TABLE_LIST* table, { if (find_temporary_table(thd, db, table_name)) goto table_exists; - my_snprintf(dst_path, sizeof(dst_path), "%s%s%lx_%lx_%x%s", - mysql_tmpdir, tmp_file_prefix, current_pid, - thd->thread_id, thd->tmp_table++, reg_ext); + dst_path_length= my_snprintf(dst_path, sizeof(dst_path), + "%s%s%lx_%lx_%x%s", + mysql_tmpdir, tmp_file_prefix, current_pid, + thd->thread_id, thd->tmp_table++, reg_ext); if (lower_case_table_names) my_casedn_str(files_charset_info, dst_path); create_info->table_options|= HA_CREATE_DELAY_KEY_WRITE; } else { - strxmov(dst_path, mysql_data_home, "/", db, "/", table_name, - reg_ext, NullS); - fn_format(dst_path, dst_path, "", "", MYF(MY_UNPACK_FILENAME)); + dst_path_length= build_table_filename(dst_path, sizeof(dst_path), + db, table_name, reg_ext); if (!access(dst_path, F_OK)) goto table_exists; } @@ -2769,8 +3105,8 @@ bool mysql_create_like_table(THD* thd, TABLE_LIST* table, creation, instead create the table directly (for both normal and temporary tables). */ - *fn_ext(dst_path)= 0; - err= ha_create_table(dst_path, create_info, 1); + dst_path[dst_path_length - reg_ext_length]= '\0'; // Remove .frm + err= ha_create_table(thd, dst_path, db, table_name, create_info, 1); if (create_info->options & HA_LEX_CREATE_TMP_TABLE) { @@ -2788,13 +3124,63 @@ bool mysql_create_like_table(THD* thd, TABLE_LIST* table, goto err; /* purecov: inspected */ } - // Must be written before unlock - if (mysql_bin_log.is_open()) + /* + We have to write the query before we unlock the tables. + */ + if (binlog_row_based) { - thd->clear_error(); - Query_log_event qinfo(thd, thd->query, thd->query_length, FALSE, FALSE); - mysql_bin_log.write(&qinfo); + /* + Since temporary tables are not replicated under row-based + replication, CREATE TABLE ... LIKE ... needs special + treatement. We have four cases to consider, according to the + following decision table: + + ==== ========= ========= ============================== + Case Target Source Write to binary log + ==== ========= ========= ============================== + 1 normal normal Original statement + 2 normal temporary Generated statement + 3 temporary normal Nothing + 4 temporary temporary Nothing + ==== ========= ========= ============================== + + The variable 'tmp_table' below is used to see if the source + table is a temporary table: if it is set, then the source table + was a temporary table and we can take apropriate actions. + */ + if (!(create_info->options & HA_LEX_CREATE_TMP_TABLE)) + { + if (tmp_table) // Case 2 + { + char buf[2048]; + String query(buf, sizeof(buf), system_charset_info); + query.length(0); // Have to zero it since constructor doesn't + TABLE *table_ptr; + int error; + + /* + Let's open and lock the table: it will be closed (and + unlocked) by close_thread_tables() at the end of the + statement anyway. + */ + if (!(table_ptr= open_ltable(thd, table, TL_READ_NO_INSERT))) + goto err; + + int result= store_create_info(thd, table, &query, create_info); + + DBUG_ASSERT(result == 0); // store_create_info() always return 0 + write_bin_log(thd, TRUE, query.ptr(), query.length()); + } + else // Case 1 + write_bin_log(thd, TRUE, thd->query, thd->query_length); + } + /* + Case 3 and 4 does nothing under RBR + */ } + else + write_bin_log(thd, TRUE, thd->query, thd->query_length); + res= FALSE; goto err; @@ -2900,11 +3286,7 @@ mysql_discard_or_import_tablespace(THD *thd, error=1; if (error) goto err; - if (mysql_bin_log.is_open()) - { - Query_log_event qinfo(thd, thd->query, thd->query_length, FALSE, FALSE); - mysql_bin_log.write(&qinfo); - } + write_bin_log(thd, FALSE, thd->query, thd->query_length); err: close_thread_tables(thd); thd->tablespace_op=FALSE; @@ -2968,7 +3350,7 @@ int mysql_create_indexes(THD *thd, TABLE_LIST *table_list, List<Key> &keys) fields.push_back(c_fld); } bzero((char*) &create_info,sizeof(create_info)); - create_info.db_type=DB_TYPE_DEFAULT; + create_info.db_type= (handlerton*) &default_hton; create_info.default_table_charset= thd->variables.collation_database; db_options= 0; if (mysql_prepare_table(thd, &create_info, &fields, @@ -2992,7 +3374,7 @@ int mysql_create_indexes(THD *thd, TABLE_LIST *table_list, List<Key> &keys) { /* Re-initialize the create_info, which was changed by prepare table. */ bzero((char*) &create_info,sizeof(create_info)); - create_info.db_type=DB_TYPE_DEFAULT; + create_info.db_type= (handlerton*) &default_hton; create_info.default_table_charset= thd->variables.collation_database; /* Cleanup the fields list. We do not want to create existing fields. */ fields.delete_elements(); @@ -3006,10 +3388,10 @@ int mysql_create_indexes(THD *thd, TABLE_LIST *table_list, List<Key> &keys) else { if (table->file->add_index(table, key_info_buffer, key_count)|| - build_table_path(path, sizeof(path), table_list->db, - (lower_case_table_names == 2) ? - table_list->alias : table_list->table_name, - reg_ext) == 0 || + build_table_filename(path, sizeof(path), table_list->db, + (lower_case_table_names == 2) ? + table_list->alias : table_list->table_name, + reg_ext) == 0 || mysql_create_frm(thd, path, &create_info, fields, key_count, key_info_buffer, table->file)) /* don't need to free((gptr) key_info_buffer);*/ @@ -3087,7 +3469,7 @@ int mysql_drop_indexes(THD *thd, TABLE_LIST *table_list, } bzero((char*) &create_info,sizeof(create_info)); - create_info.db_type=DB_TYPE_DEFAULT; + create_info.db_type= (handlerton*) &default_hton; create_info.default_table_charset= thd->variables.collation_database; if ((drop_key)|| (drop.elements<= 0)) @@ -3107,10 +3489,10 @@ int mysql_drop_indexes(THD *thd, TABLE_LIST *table_list, &keys, /*tmp_table*/ 0, &db_options, table->file, &key_info_buffer, key_count, /*select_field_count*/ 0)|| - build_table_path(path, sizeof(path), table_list->db, - (lower_case_table_names == 2) ? - table_list->alias : table_list->table_name, - reg_ext) == 0 || + build_table_filename(path, sizeof(path), table_list->db, + (lower_case_table_names == 2) ? + table_list->alias : table_list->table_name, + reg_ext) == 0 || mysql_create_frm(thd, path, &create_info, fields, key_count, key_info_buffer, table->file)) /*don't need to free((gptr) key_numbers);*/ @@ -3123,6 +3505,166 @@ int mysql_drop_indexes(THD *thd, TABLE_LIST *table_list, #endif /* NOT_USED */ + +#define ALTER_TABLE_DATA_CHANGED 1 +#define ALTER_TABLE_INDEX_CHANGED 2 + +/* + SYNOPSIS + compare tables() + table original table + create_list fields in new table + key_list keys in new table + create_info create options in new table + + DESCRIPTION + 'table' (first argument) contains information of the original + table, which includes all corresponding parts that the new + table has in arguments create_list, key_list and create_info. + + By comparing the changes between the original and new table + we can determine how much it has changed after ALTER TABLE + and whether we need to make a copy of the table, or just change + the .frm file. + + RETURN VALUES + 0 No copy needed + 1 Data changes, copy needed + 2 Index changes, copy needed +*/ + +uint compare_tables(TABLE *table, List<create_field> *create_list, + List<Key> *key_list, HA_CREATE_INFO *create_info, + ALTER_INFO *alter_info, uint order_num) +{ + Field **f_ptr, *field; + uint changes= 0, tmp; + List_iterator_fast<create_field> new_field_it(*create_list); + create_field *new_field; + + /* + Some very basic checks. If number of fields changes, or the + handler, we need to run full ALTER TABLE. In the future + new fields can be added and old dropped without copy, but + not yet. + + Test also that engine was not given during ALTER TABLE, or + we are force to run regular alter table (copy). + E.g. ALTER TABLE tbl_name ENGINE=MyISAM. + + For the following ones we also want to run regular alter table: + ALTER TABLE tbl_name ORDER BY .. + ALTER TABLE tbl_name CONVERT TO CHARACTER SET .. + + At the moment we can't handle altering temporary tables without a copy. + We also test if OPTIMIZE TABLE was given and was mapped to alter table. + In that case we always do full copy. + */ + if (table->s->fields != create_list->elements || + table->s->db_type != create_info->db_type || + table->s->tmp_table || + create_info->used_fields & HA_CREATE_USED_ENGINE || + create_info->used_fields & HA_CREATE_USED_CHARSET || + create_info->used_fields & HA_CREATE_USED_DEFAULT_CHARSET || + (alter_info->flags & ALTER_RECREATE) || + order_num) + return ALTER_TABLE_DATA_CHANGED; + + /* + Go through fields and check if the original ones are compatible + with new table. + */ + for (f_ptr= table->field, new_field= new_field_it++; + (field= *f_ptr); f_ptr++, new_field= new_field_it++) + { + /* Make sure we have at least the default charset in use. */ + if (!new_field->charset) + new_field->charset= create_info->default_table_charset; + + /* Check that NULL behavior is same for old and new fields */ + if ((new_field->flags & NOT_NULL_FLAG) != + (uint) (field->flags & NOT_NULL_FLAG)) + return ALTER_TABLE_DATA_CHANGED; + + /* Don't pack rows in old tables if the user has requested this. */ + if (create_info->row_type == ROW_TYPE_DYNAMIC || + (new_field->flags & BLOB_FLAG) || + new_field->sql_type == MYSQL_TYPE_VARCHAR && + create_info->row_type != ROW_TYPE_FIXED) + create_info->table_options|= HA_OPTION_PACK_RECORD; + + /* Evaluate changes bitmap and send to check_if_incompatible_data() */ + if (!(tmp= field->is_equal(new_field))) + return ALTER_TABLE_DATA_CHANGED; + + changes|= tmp; + } + /* Check if changes are compatible with current handler without a copy */ + if (table->file->check_if_incompatible_data(create_info, changes)) + return ALTER_TABLE_DATA_CHANGED; + + /* + Go through keys and check if the original ones are compatible + with new table. + */ + KEY *table_key_info= table->key_info; + List_iterator_fast<Key> key_it(*key_list); + Key *key= key_it++; + + /* Check if the number of key elements has changed */ + if (table->s->keys != key_list->elements) + return ALTER_TABLE_INDEX_CHANGED; + + for (uint i= 0; i < table->s->keys; i++, table_key_info++, key= key_it++) + { + /* + Check that the key types are compatible between old and new tables. + */ + if (table_key_info->algorithm != key->algorithm || + ((key->type == Key::PRIMARY || key->type == Key::UNIQUE) && + !(table_key_info->flags & HA_NOSAME)) || + (!(key->type == Key::PRIMARY || key->type == Key::UNIQUE) && + (table_key_info->flags & HA_NOSAME)) || + ((key->type == Key::SPATIAL) && + !(table_key_info->flags & HA_SPATIAL)) || + (!(key->type == Key::SPATIAL) && + (table_key_info->flags & HA_SPATIAL)) || + ((key->type == Key::FULLTEXT) && + !(table_key_info->flags & HA_FULLTEXT)) || + (!(key->type == Key::FULLTEXT) && + (table_key_info->flags & HA_FULLTEXT))) + return ALTER_TABLE_INDEX_CHANGED; + + if (table_key_info->key_parts != key->columns.elements) + return ALTER_TABLE_INDEX_CHANGED; + + /* + Check that the key parts remain compatible between the old and + new tables. + */ + KEY_PART_INFO *table_key_part= table_key_info->key_part; + List_iterator_fast<key_part_spec> key_part_it(key->columns); + key_part_spec *key_part= key_part_it++; + for (uint j= 0; j < table_key_info->key_parts; j++, + table_key_part++, key_part= key_part_it++) + { + /* + Key definition has changed if we are using a different field or + if the used key length is different + (If key_part->length == 0 it means we are using the whole field) + */ + if (strcmp(key_part->field_name, table_key_part->field->field_name) || + (key_part->length && key_part->length != table_key_part->length) || + (key_part->length == 0 && table_key_part->length != + table_key_part->field->pack_length())) + return ALTER_TABLE_INDEX_CHANGED; + } + } + + return 0; // Tables are compatible +} + + /* Alter table */ @@ -3143,8 +3685,14 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, ha_rows copied,deleted; ulonglong next_insert_id; uint db_create_options, used_fields; - enum db_type old_db_type,new_db_type; - bool need_copy_table; + handlerton *old_db_type, *new_db_type; + uint need_copy_table= 0; +#ifdef WITH_PARTITION_STORAGE_ENGINE + bool online_add_empty_partition= FALSE; + bool online_drop_partition= FALSE; + bool partition_changed= FALSE; + handlerton *default_engine_type; +#endif DBUG_ENTER("mysql_alter_table"); thd->proc_info="init"; @@ -3190,7 +3738,7 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, } else { - if (table->s->tmp_table) + if (table->s->tmp_table != NO_TMP_TABLE) { if (find_temporary_table(thd,new_db,new_name_buff)) { @@ -3201,7 +3749,8 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, else { char dir_buff[FN_REFLEN]; - strxnmov(dir_buff, FN_REFLEN, mysql_real_data_home, new_db, NullS); + strxnmov(dir_buff, sizeof(dir_buff)-1, + mysql_real_data_home, new_db, NullS); if (!access(fn_format(new_name_buff,new_name_buff,dir_buff,reg_ext,0), F_OK)) { @@ -3219,8 +3768,419 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, } old_db_type= table->s->db_type; - if (create_info->db_type == DB_TYPE_DEFAULT) + if (create_info->db_type == (handlerton*) &default_hton) create_info->db_type= old_db_type; + +#ifdef WITH_PARTITION_STORAGE_ENGINE + /* + We need to handle both partition management command such as Add Partition + and others here as well as an ALTER TABLE that completely changes the + partitioning and yet others that don't change anything at all. We start + by checking the partition management variants and then check the general + change patterns. + */ + if (alter_info->flags & (ALTER_ADD_PARTITION + + ALTER_DROP_PARTITION + ALTER_COALESCE_PARTITION + + ALTER_REORGANISE_PARTITION)) + { + partition_info *tab_part_info= table->part_info; + if (!tab_part_info) + { + my_error(ER_PARTITION_MGMT_ON_NONPARTITIONED, MYF(0)); + DBUG_RETURN(TRUE); + } + default_engine_type= tab_part_info->default_engine_type; + /* + We are going to manipulate the partition info on the table object + so we need to ensure that the data structure of the table object + is freed by setting version to 0. + */ + table->s->version= 0L; + if (alter_info->flags == ALTER_ADD_PARTITION) + { + /* + We start by moving the new partitions to the list of temporary + partitions. We will then check that the new partitions fit in the + partitioning scheme as currently set-up. + Partitions are always added at the end in ADD PARTITION. + */ + partition_info *alt_part_info= thd->lex->part_info; + uint no_new_partitions= alt_part_info->no_parts; + uint no_orig_partitions= tab_part_info->no_parts; + uint check_total_partitions= no_new_partitions + no_orig_partitions; + uint new_total_partitions= check_total_partitions; + /* + We allow quite a lot of values to be supplied by defaults, however we + must know the number of new partitions in this case. + */ + if (no_new_partitions == 0) + { + my_error(ER_ADD_PARTITION_NO_NEW_PARTITION, MYF(0)); + DBUG_RETURN(TRUE); + } + if (is_sub_partitioned(tab_part_info)) + { + if (alt_part_info->no_subparts == 0) + alt_part_info->no_subparts= tab_part_info->no_subparts; + else if (alt_part_info->no_subparts != tab_part_info->no_subparts) + { + my_error(ER_ADD_PARTITION_SUBPART_ERROR, MYF(0)); + DBUG_RETURN(TRUE); + } + check_total_partitions= new_total_partitions* + alt_part_info->no_subparts; + } + if (check_total_partitions > MAX_PARTITIONS) + { + my_error(ER_TOO_MANY_PARTITIONS_ERROR, MYF(0)); + DBUG_RETURN(TRUE); + } + alt_part_info->part_type= tab_part_info->part_type; + if (set_up_defaults_for_partitioning(alt_part_info, + table->file, + (ulonglong)0ULL, + tab_part_info->no_parts)) + { + DBUG_RETURN(TRUE); + } + /* + Need to concatenate the lists here to make it possible to check the + partition info for correctness using check_partition_info + */ + { + List_iterator<partition_element> alt_it(alt_part_info->partitions); + uint part_count= 0; + do + { + partition_element *part_elem= alt_it++; + tab_part_info->partitions.push_back(part_elem); + tab_part_info->temp_partitions.push_back(part_elem); + } while (++part_count < no_new_partitions); + tab_part_info->no_parts+= no_new_partitions; + } + { + List_iterator<partition_element> tab_it(tab_part_info->partitions); + partition_element *part_elem= tab_it++; + if (is_sub_partitioned(tab_part_info)) + { + List_iterator<partition_element> sub_it(part_elem->subpartitions); + part_elem= sub_it++; + } + if (check_partition_info(tab_part_info, part_elem->engine_type, + table->file, (ulonglong)0ULL)) + { + DBUG_RETURN(TRUE); + } + } + create_info->db_type= &partition_hton; + thd->lex->part_info= tab_part_info; + if (table->file->alter_table_flags() & HA_ONLINE_ADD_EMPTY_PARTITION && + (tab_part_info->part_type == RANGE_PARTITION || + tab_part_info->part_type == LIST_PARTITION)) + { + /* + For range and list partitions add partition is simply adding a new + empty partition to the table. If the handler support this we will + use the simple method of doing this. In this case we need to break + out the new partitions from the list again and only keep them in the + temporary list. Added partitions are always added at the end. + */ + { + List_iterator<partition_element> tab_it(tab_part_info->partitions); + uint part_count= 0; + do + { + tab_it++; + } while (++part_count < no_orig_partitions); + do + { + tab_it++; + tab_it.remove(); + } while (++part_count < new_total_partitions); + } + tab_part_info->no_parts-= no_new_partitions; + online_add_empty_partition= TRUE; + } + else + { + tab_part_info->temp_partitions.empty(); + } + } + else if (alter_info->flags == ALTER_DROP_PARTITION) + { + /* + Drop a partition from a range partition and list partitioning is + always safe and can be made more or less immediate. It is necessary + however to ensure that the partition to be removed is safely removed + and that REPAIR TABLE can remove the partition if for some reason the + command to drop the partition failed in the middle. + */ + uint part_count= 0; + uint no_parts_dropped= alter_info->partition_names.elements; + uint no_parts_found= 0; + List_iterator<partition_element> part_it(tab_part_info->partitions); + if (!(tab_part_info->part_type == RANGE_PARTITION || + tab_part_info->part_type == LIST_PARTITION)) + { + my_error(ER_ONLY_ON_RANGE_LIST_PARTITION, MYF(0), "DROP"); + DBUG_RETURN(TRUE); + } + if (no_parts_dropped >= tab_part_info->no_parts) + { + my_error(ER_DROP_LAST_PARTITION, MYF(0)); + DBUG_RETURN(TRUE); + } + do + { + partition_element *part_elem= part_it++; + if (is_partition_in_list(part_elem->partition_name, + alter_info->partition_names)) + { + /* + Remove the partition from the list and put it instead in the + list of temporary partitions with a new state. + */ + no_parts_found++; + part_elem->part_state= PART_IS_DROPPED; + } + } while (++part_count < tab_part_info->no_parts); + if (no_parts_found != no_parts_dropped) + { + my_error(ER_DROP_PARTITION_NON_EXISTENT, MYF(0)); + DBUG_RETURN(TRUE); + } + if (!(table->file->alter_table_flags() & HA_ONLINE_DROP_PARTITION)) + { + my_error(ER_DROP_PARTITION_FAILURE, MYF(0)); + DBUG_RETURN(TRUE); + } + if (table->file->is_fk_defined_on_table_or_index(MAX_KEY)) + { + my_error(ER_DROP_PARTITION_WHEN_FK_DEFINED, MYF(0)); + DBUG_RETURN(TRUE); + } + /* + This code needs set-up of structures needed by mysql_create_table + before it is called and thus we only set a boolean variable to be + checked later down in the code when all needed data structures are + prepared. + */ + online_drop_partition= TRUE; + } + else if (alter_info->flags == ALTER_COALESCE_PARTITION) + { + /* + In this version COALESCE PARTITION is implemented by simply removing + a partition from the table and using the normal ALTER TABLE code + and ensuring that copy to a new table occurs. Later on we can optimise + this function for Linear Hash partitions. In that case we can avoid + reorganising the entire table. For normal hash partitions it will + be a complete reorganise anyways so that can only be made on-line + if it still uses a copy table. + */ + uint part_count= 0; + uint no_parts_coalesced= alter_info->no_parts; + uint no_parts_remain= tab_part_info->no_parts - no_parts_coalesced; + List_iterator<partition_element> part_it(tab_part_info->partitions); + if (tab_part_info->part_type != HASH_PARTITION) + { + my_error(ER_COALESCE_ONLY_ON_HASH_PARTITION, MYF(0)); + DBUG_RETURN(TRUE); + } + if (no_parts_coalesced == 0) + { + my_error(ER_COALESCE_PARTITION_NO_PARTITION, MYF(0)); + DBUG_RETURN(TRUE); + } + if (no_parts_coalesced >= tab_part_info->no_parts) + { + my_error(ER_DROP_LAST_PARTITION, MYF(0)); + DBUG_RETURN(TRUE); + } + do + { + part_it++; + if (++part_count > no_parts_remain) + part_it.remove(); + } while (part_count < tab_part_info->no_parts); + tab_part_info->no_parts= no_parts_remain; + } + else if (alter_info->flags == ALTER_REORGANISE_PARTITION) + { + /* + Reorganise partitions takes a number of partitions that are next + to each other (at least for RANGE PARTITIONS) and then uses those + to create a set of new partitions. So data is copied from those + partitions into the new set of partitions. Those new partitions + can have more values in the LIST value specifications or less both + are allowed. The ranges can be different but since they are + changing a set of consecutive partitions they must cover the same + range as those changed from. + This command can be used on RANGE and LIST partitions. + */ + uint no_parts_reorged= alter_info->partition_names.elements; + uint no_parts_new= thd->lex->part_info->partitions.elements; + partition_info *alt_part_info= thd->lex->part_info; + uint check_total_partitions; + if (no_parts_reorged > tab_part_info->no_parts) + { + my_error(ER_REORG_PARTITION_NOT_EXIST, MYF(0)); + DBUG_RETURN(TRUE); + } + if (!(tab_part_info->part_type == RANGE_PARTITION || + tab_part_info->part_type == LIST_PARTITION)) + { + my_error(ER_ONLY_ON_RANGE_LIST_PARTITION, MYF(0), "REORGANISE"); + DBUG_RETURN(TRUE); + } + if (check_reorganise_list(alt_part_info, tab_part_info, + alter_info->partition_names)) + { + my_error(ER_SAME_NAME_PARTITION, MYF(0)); + DBUG_RETURN(TRUE); + } + check_total_partitions= tab_part_info->no_parts + no_parts_new; + check_total_partitions-= no_parts_reorged; + if (check_total_partitions > MAX_PARTITIONS) + { + my_error(ER_TOO_MANY_PARTITIONS_ERROR, MYF(0)); + DBUG_RETURN(TRUE); + } + { + List_iterator<partition_element> tab_it(tab_part_info->partitions); + uint part_count= 0; + bool found_first= FALSE, found_last= FALSE; + uint drop_count= 0; + longlong tab_max_range, alt_max_range; + do + { + partition_element *part_elem= tab_it++; + if (is_partition_in_list(part_elem->partition_name, + alter_info->partition_names)) + { + drop_count++; + tab_max_range= part_elem->range_value; + if (!found_first) + { + uint alt_part_count= 0; + found_first= TRUE; + List_iterator<partition_element> alt_it(alt_part_info->partitions); + do + { + partition_element *alt_part_elem= alt_it++; + alt_max_range= alt_part_elem->range_value; + if (alt_part_count == 0) + tab_it.replace(alt_part_elem); + else + tab_it.after(alt_part_elem); + } while (++alt_part_count < no_parts_new); + } + else if (found_last) + { + my_error(ER_CONSECUTIVE_REORG_PARTITIONS, MYF(0)); + DBUG_RETURN(TRUE); + } + else + tab_it.remove(); + } + else + { + if (found_first) + found_last= TRUE; + } + } while (++part_count < tab_part_info->no_parts); + if (drop_count != no_parts_reorged) + { + my_error(ER_DROP_PARTITION_NON_EXISTENT, MYF(0)); + DBUG_RETURN(TRUE); + } + if (tab_part_info->part_type == RANGE_PARTITION && + alt_max_range > tab_max_range) + { + my_error(ER_REORG_OUTSIDE_RANGE, MYF(0)); + DBUG_RETURN(TRUE); + } + } + } + partition_changed= TRUE; + tab_part_info->no_parts= tab_part_info->partitions.elements; + create_info->db_type= &partition_hton; + thd->lex->part_info= tab_part_info; + if (alter_info->flags == ALTER_ADD_PARTITION || + alter_info->flags == ALTER_REORGANISE_PARTITION) + { + if (check_partition_info(tab_part_info, default_engine_type, + table->file, (ulonglong)0ULL)) + { + DBUG_RETURN(TRUE); + } + } + } + else + { + /* + When thd->lex->part_info has a reference to a partition_info the + ALTER TABLE contained a definition of a partitioning. + + Case I: + If there was a partition before and there is a new one defined. + We use the new partitioning. The new partitioning is already + defined in the correct variable so no work is needed to + accomplish this. + We do however need to update partition_changed to ensure that not + only the frm file is changed in the ALTER TABLE command. + + Case IIa: + There was a partitioning before and there is no new one defined. + Also the user has not specified an explicit engine to use. + + We use the old partitioning also for the new table. We do this + by assigning the partition_info from the table loaded in + open_ltable to the partition_info struct used by mysql_create_table + later in this method. + + Case IIb: + There was a partitioning before and there is no new one defined. + The user has specified an explicit engine to use. + + Since the user has specified an explicit engine to use we override + the old partitioning info and create a new table using the specified + engine. This is the reason for the extra check if old and new engine + is equal. + In this case the partition also is changed. + + Case III: + There was no partitioning before altering the table, there is + partitioning defined in the altered table. Use the new partitioning. + No work needed since the partitioning info is already in the + correct variable. + Also here partition has changed and thus a new table must be + created. + + Case IV: + There was no partitioning before and no partitioning defined. + Obviously no work needed. + */ + if (table->part_info) + { + if (!thd->lex->part_info && + create_info->db_type == old_db_type) + thd->lex->part_info= table->part_info; + } + if (thd->lex->part_info) + { + /* + Need to cater for engine types that can handle partition without + using the partition handler. + */ + if (thd->lex->part_info != table->part_info) + partition_changed= TRUE; + if (create_info->db_type != &partition_hton) + thd->lex->part_info->default_engine_type= create_info->db_type; + create_info->db_type= &partition_hton; + } + } +#endif if (check_engine(thd, new_name, &create_info->db_type)) DBUG_RETURN(TRUE); new_db_type= create_info->db_type; @@ -3255,6 +4215,7 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, else { *fn_ext(new_name)=0; + table->s->version= 0; // Force removal of table def close_cached_table(thd, table); if (mysql_rename_table(old_db_type,db,table_name,new_db,new_alias)) error= -1; @@ -3293,12 +4254,7 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, } if (!error) { - if (mysql_bin_log.is_open()) - { - thd->clear_error(); - Query_log_event qinfo(thd, thd->query, thd->query_length, FALSE, FALSE); - mysql_bin_log.write(&qinfo); - } + write_bin_log(thd, TRUE, thd->query, thd->query_length); if (do_send_ok) send_ok(thd); } @@ -3379,8 +4335,8 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, def_it.remove(); } } - else - { // Use old field value + else // This field was not dropped and not changed, add it to the list + { // for the new table. create_list.push_back(def=new create_field(field,field)); alter_it.rewind(); // Change default if ALTER Alter_column *alter; @@ -3535,7 +4491,9 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, key_name, key_info->algorithm, test(key_info->flags & HA_GENERATED_KEY), - key_parts)); + key_parts, + key_info->flags & HA_USES_PARSER ? + &key_info->parser->name : 0)); } { Key *key; @@ -3596,17 +4554,132 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, if (table->s->tmp_table) create_info->options|=HA_LEX_CREATE_TMP_TABLE; + set_table_default_charset(thd, create_info, db); + +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (thd->variables.old_alter_table || partition_changed) +#else + if (thd->variables.old_alter_table) +#endif + need_copy_table= 1; + else + need_copy_table= compare_tables(table, &create_list, &key_list, + create_info, alter_info, order_num); + /* better have a negative test here, instead of positive, like alter_info->flags & ALTER_ADD_COLUMN|ALTER_ADD_INDEX|... so that ALTER TABLE won't break when somebody will add new flag */ - need_copy_table= (alter_info->flags & - ~(ALTER_CHANGE_COLUMN_DEFAULT|ALTER_OPTIONS) || - (create_info->used_fields & - ~(HA_CREATE_USED_COMMENT|HA_CREATE_USED_PASSWORD)) || - table->s->tmp_table); - create_info->frm_only= !need_copy_table; + + if (!need_copy_table) + create_info->frm_only= 1; + +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (partition_changed) + { + if (online_drop_partition) + { + /* + Now after all checks and setting state on dropped partitions we can + start the actual dropping of the partitions. + 1) Lock table in TL_WRITE_ONLY to ensure all other accesses on table + are completed and no new ones are started until we have changed + the frm file. + 2) Write the new frm file where state of dropped partitions is + changed to PART_IS_DROPPED + 3) Perform the actual drop of the partition using the handler of the + table. + 4) Write a new frm file of the table where the partitions are dropped + from the table. + + */ + uint old_lock_type; + partition_info *part_info= table->part_info; + char path[FN_REFLEN+1], noext_path[FN_REFLEN+1]; + uint db_options= 0, key_count, syntax_len; + KEY *key_info_buffer; + char *part_syntax_buf; + + VOID(pthread_mutex_lock(&LOCK_open)); + if (abort_and_upgrade_lock(thd, table, db, table_name, &old_lock_type)) + { + DBUG_RETURN(TRUE); + } + VOID(pthread_mutex_unlock(&LOCK_open)); + mysql_prepare_table(thd, create_info, &create_list, + &key_list, /*tmp_table*/ 0, &db_options, + table->file, &key_info_buffer, &key_count, + /*select_field_count*/ 0); + if (!(part_syntax_buf= generate_partition_syntax(part_info, + &syntax_len, + TRUE,TRUE))) + { + DBUG_RETURN(TRUE); + } + part_info->part_info_string= part_syntax_buf; + part_info->part_info_len= syntax_len; + build_table_filename(path, sizeof(path), db, table_name, reg_ext); + if (mysql_create_frm(thd, path, db, table_name, create_info, + create_list, key_count, key_info_buffer, + table->file)) + { + DBUG_RETURN(TRUE); + } + thd->lex->part_info= part_info; + build_table_filename(path, sizeof(path), db, table_name, ""); + if (table->file->drop_partitions(path)) + { + DBUG_RETURN(TRUE); + } + { + List_iterator<partition_element> part_it(part_info->partitions); + uint i= 0, remove_count= 0; + do + { + partition_element *part_elem= part_it++; + if (is_partition_in_list(part_elem->partition_name, + alter_info->partition_names)) + { + part_it.remove(); + remove_count++; + } + } while (++i < part_info->no_parts); + part_info->no_parts-= remove_count; + } + if (!(part_syntax_buf= generate_partition_syntax(part_info, + &syntax_len, + TRUE,TRUE))) + { + DBUG_RETURN(TRUE); + } + part_info->part_info_string= part_syntax_buf; + part_info->part_info_len= syntax_len; + build_table_filename(path, sizeof(path), db, table_name, reg_ext); + build_table_filename(noext_path, sizeof(noext_path), db, table_name, ""); + if (mysql_create_frm(thd, path, db, table_name, create_info, + create_list, key_count, key_info_buffer, + table->file) || + table->file->create_handler_files(noext_path)) + { + DBUG_RETURN(TRUE); + } + thd->proc_info="end"; + query_cache_invalidate3(thd, table_list, 0); + error= ha_commit_stmt(thd); + if (ha_commit(thd)) + error= 1; + if (!error) + { + close_thread_tables(thd); + write_bin_log(thd, FALSE, thd->query, thd->query_length); + send_ok(thd); + DBUG_RETURN(FALSE); + } + DBUG_RETURN(error); + } + } +#endif /* Handling of symlinked tables: @@ -3672,15 +4745,15 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, bzero((void*) &tbl, sizeof(tbl)); tbl.db= new_db; tbl.table_name= tbl.alias= tmp_name; + /* Table is in thd->temporary_tables */ new_table= open_table(thd, &tbl, thd->mem_root, (bool*) 0, MYSQL_LOCK_IGNORE_FLUSH); } else { char path[FN_REFLEN]; - my_snprintf(path, sizeof(path), "%s/%s/%s", mysql_data_home, - new_db, tmp_name); - fn_format(path,path,"","",4); + /* table is a normal table: Create temporary table in same directory */ + build_table_filename(path, sizeof(path), new_db, tmp_name, ""); new_table=open_temporary_table(thd, path, new_db, tmp_name,0); } if (!new_table) @@ -3696,7 +4769,7 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, thd->proc_info="copy to tmp table"; next_insert_id=thd->next_insert_id; // Remember for logging copied=deleted=0; - if (new_table && !new_table->s->is_view) + if (new_table && !(new_table->file->table_flags() & HA_NO_COPY_ON_ALTER)) { new_table->timestamp_field_type= TIMESTAMP_NO_AUTO_SET; new_table->next_number_field=new_table->found_next_number_field; @@ -3707,7 +4780,7 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, thd->last_insert_id=next_insert_id; // Needed for correct log thd->count_cuted_fields= CHECK_FIELD_IGNORE; - if (table->s->tmp_table) + if (table->s->tmp_table != NO_TMP_TABLE) { /* We changed a temporary table */ if (error) @@ -3716,7 +4789,7 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, The following function call will free the new_table pointer, in close_temporary_table(), so we can safely directly jump to err */ - close_temporary_table(thd,new_db,tmp_name); + close_temporary_table(thd, new_table, 1, 1); goto err; } /* Close lock if this is a transactional table */ @@ -3726,26 +4799,24 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, thd->lock=0; } /* Remove link to old table and rename the new one */ - close_temporary_table(thd, table->s->db, table_name); + close_temporary_table(thd, table, 1, 1); /* Should pass the 'new_name' as we store table name in the cache */ if (rename_temporary_table(thd, new_table, new_db, new_name)) { // Fatal error - close_temporary_table(thd,new_db,tmp_name); + close_temporary_table(thd, new_table, 1, 1); my_free((gptr) new_table,MYF(0)); goto err; } - if (mysql_bin_log.is_open()) - { - thd->clear_error(); - Query_log_event qinfo(thd, thd->query, thd->query_length, FALSE, FALSE); - mysql_bin_log.write(&qinfo); - } + /* We don't replicate alter table statement on temporary tables */ + if (!binlog_row_based) + write_bin_log(thd, TRUE, thd->query, thd->query_length); goto end_temporary; } if (new_table) { - intern_close_table(new_table); /* close temporary table */ + /* close temporary table that will be the new table */ + intern_close_table(new_table); my_free((gptr) new_table,MYF(0)); } VOID(pthread_mutex_lock(&LOCK_open)); @@ -3788,6 +4859,7 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, close the original table at before doing the rename */ table_name=thd->strdup(table_name); // must be saved + table->s->version= 0; // Force removal of table def close_cached_table(thd, table); table=0; // Marker that table is closed } @@ -3799,7 +4871,7 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, error=0; if (!need_copy_table) - new_db_type=old_db_type=DB_TYPE_UNKNOWN; // this type cannot happen in regular ALTER + new_db_type=old_db_type= NULL; // this type cannot happen in regular ALTER if (mysql_rename_table(old_db_type,db,table_name,db,old_name)) { error=1; @@ -3820,18 +4892,24 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, closing the locked table. */ if (table) + { + table->s->version= 0; // Force removal of table def close_cached_table(thd,table); + } VOID(pthread_mutex_unlock(&LOCK_open)); goto err; } if (thd->lock || new_name != table_name) // True if WIN32 { /* - Not table locking or alter table with rename - free locks and remove old table + Not table locking or alter table with rename. + Free locks and remove old table */ if (table) + { + table->s->version= 0; // Force removal of table def close_cached_table(thd,table); + } VOID(quick_rm_table(old_db_type,db,old_name)); } else @@ -3854,7 +4932,10 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, reopen_tables(thd,1,0)) { // This shouldn't happen if (table) + { + table->s->version= 0; // Force removal of table def close_cached_table(thd,table); // Remove lock for table + } VOID(pthread_mutex_unlock(&LOCK_open)); goto err; } @@ -3870,16 +4951,17 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, goto err; } thd->proc_info="end"; - if (mysql_bin_log.is_open()) - { - thd->clear_error(); - Query_log_event qinfo(thd, thd->query, thd->query_length, FALSE, FALSE); - mysql_bin_log.write(&qinfo); - } + + DBUG_ASSERT(!(mysql_bin_log.is_open() && binlog_row_based && + (create_info->options & HA_LEX_CREATE_TMP_TABLE))); + write_bin_log(thd, TRUE, thd->query, thd->query_length); VOID(pthread_cond_broadcast(&COND_refresh)); VOID(pthread_mutex_unlock(&LOCK_open)); -#ifdef HAVE_BERKELEY_DB - if (old_db_type == DB_TYPE_BERKELEY_DB) + /* + TODO RONM: This problem needs to handled for Berkeley DB partitions + as well + */ + if (ha_check_storage_engine_flag(old_db_type,HTON_FLUSH_AFTER_RENAME)) { /* For the alter table to be properly flushed to the logs, we @@ -3887,7 +4969,7 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, shutdown. */ char path[FN_REFLEN]; - build_table_path(path, sizeof(path), new_db, table_name, ""); + build_table_filename(path, sizeof(path), new_db, table_name, ""); table=open_temporary_table(thd, path, new_db, tmp_name,0); if (table) { @@ -3895,11 +4977,10 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, my_free((char*) table, MYF(0)); } else - sql_print_warning("Could not open BDB table %s.%s after rename\n", + sql_print_warning("Could not open table %s.%s after rename\n", new_db,table_name); - (void) berkeley_flush_logs(); + ha_flush_logs(old_db_type); } -#endif table_list->table=0; // For query cache query_cache_invalidate3(thd, table_list, 0); @@ -3915,7 +4996,7 @@ end_temporary: err: DBUG_RETURN(TRUE); } - +/* mysql_alter_table */ static int copy_data_between_tables(TABLE *from,TABLE *to, @@ -4001,8 +5082,8 @@ copy_data_between_tables(TABLE *from,TABLE *to, MYF(MY_FAE | MY_ZEROFILL)); bzero((char*) &tables,sizeof(tables)); tables.table= from; - tables.alias= tables.table_name= (char*) from->s->table_name; - tables.db= (char*) from->s->db; + tables.alias= tables.table_name= from->s->table_name.str; + tables.db= from->s->db.str; error=1; if (thd->lex->select_lex.setup_ref_array(thd, order_num) || @@ -4021,7 +5102,8 @@ copy_data_between_tables(TABLE *from,TABLE *to, this function does not set field->query_id in the columns to the current query id */ - from->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS); + to->file->ha_set_all_bits_in_write_set(); + from->file->ha_retrieve_all_cols(); init_read_record(&info, thd, from, (SQL_SELECT *) 0, 1,1); if (ignore || handle_duplicates == DUP_REPLACE) @@ -4049,7 +5131,7 @@ copy_data_between_tables(TABLE *from,TABLE *to, { copy_ptr->do_copy(copy_ptr); } - if ((error=to->file->write_row((byte*) to->record[0]))) + if ((error=to->file->ha_write_row((byte*) to->record[0]))) { if ((!ignore && handle_duplicates != DUP_REPLACE) || @@ -4122,11 +5204,11 @@ bool mysql_recreate_table(THD *thd, TABLE_LIST *table_list, lex->col_list.empty(); lex->alter_info.reset(); bzero((char*) &create_info,sizeof(create_info)); - create_info.db_type=DB_TYPE_DEFAULT; + create_info.db_type= (handlerton*) &default_hton; create_info.row_type=ROW_TYPE_NOT_USED; create_info.default_table_charset=default_charset_info; /* Force alter table to recreate table */ - lex->alter_info.flags= ALTER_CHANGE_COLUMN; + lex->alter_info.flags= (ALTER_CHANGE_COLUMN | ALTER_RECREATE); DBUG_RETURN(mysql_alter_table(thd, NullS, NullS, &create_info, table_list, lex->create_list, lex->key_list, 0, (ORDER *) 0, @@ -4185,10 +5267,11 @@ bool mysql_checksum_table(THD *thd, TABLE_LIST *tables, HA_CHECK_OPT *check_opt) ha_checksum crc= 0; uchar null_mask=256 - (1 << t->s->last_null_bit_pos); - /* InnoDB must be told explicitly to retrieve all columns, because - this function does not set field->query_id in the columns to the - current query id */ - t->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS); + /* + Set all bits in read set and inform InnoDB that we are reading all + fields + */ + t->file->ha_retrieve_all_cols(); if (t->file->ha_rnd_init(1)) protocol->store_null(); @@ -4253,21 +5336,21 @@ bool mysql_checksum_table(THD *thd, TABLE_LIST *tables, HA_CHECK_OPT *check_opt) } static bool check_engine(THD *thd, const char *table_name, - enum db_type *new_engine) + handlerton **new_engine) { - enum db_type req_engine= *new_engine; + handlerton *req_engine= *new_engine; bool no_substitution= test(thd->variables.sql_mode & MODE_NO_ENGINE_SUBSTITUTION); - if ((*new_engine= - ha_checktype(thd, req_engine, no_substitution, 1)) == DB_TYPE_UNKNOWN) + if (!(*new_engine= ha_checktype(thd, ha_legacy_type(req_engine), + no_substitution, 1))) return TRUE; - if (req_engine != *new_engine) + if (req_engine != (handlerton*) &default_hton && req_engine != *new_engine) { push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_WARN_USING_OTHER_HANDLER, ER(ER_WARN_USING_OTHER_HANDLER), - ha_get_storage_engine(*new_engine), + ha_resolve_storage_engine_name(*new_engine), table_name); } return FALSE; diff --git a/sql/sql_test.cc b/sql/sql_test.cc index 1bd298dda04..c4448ff8abe 100644 --- a/sql/sql_test.cc +++ b/sql/sql_test.cc @@ -79,7 +79,7 @@ void print_cached_tables(void) { TABLE *entry=(TABLE*) hash_element(&open_cache,idx); printf("%-14.14s %-32s%6ld%8ld%10ld%6d %s\n", - entry->s->db, entry->s->table_name, entry->s->version, + entry->s->db.str, entry->s->table_name.str, entry->s->version, entry->in_use ? entry->in_use->thread_id : 0L, entry->in_use ? entry->in_use->dbug_thread_id : 0L, entry->db_stat ? 1 : 0, entry->in_use ? lock_descriptions[(int)entry->reginfo.lock_type] : "Not in use"); @@ -261,7 +261,7 @@ print_plan(JOIN* join, double read_time, double record_count, pos = join->positions[i]; table= pos.table->table; if (table) - fputs(table->s->table_name, DBUG_FILE); + fputs(table->s->table_name.str, DBUG_FILE); fputc(' ', DBUG_FILE); } fputc('\n', DBUG_FILE); @@ -278,7 +278,7 @@ print_plan(JOIN* join, double read_time, double record_count, pos= join->best_positions[i]; table= pos.table->table; if (table) - fputs(table->s->table_name, DBUG_FILE); + fputs(table->s->table_name.str, DBUG_FILE); fputc(' ', DBUG_FILE); } } @@ -289,7 +289,7 @@ print_plan(JOIN* join, double read_time, double record_count, for (plan_nodes= join->best_ref ; *plan_nodes ; plan_nodes++) { join_table= (*plan_nodes); - fputs(join_table->table->s->table_name, DBUG_FILE); + fputs(join_table->table->s->table_name.str, DBUG_FILE); fprintf(DBUG_FILE, "(%lu,%lu,%lu)", (ulong) join_table->found_records, (ulong) join_table->records, @@ -336,8 +336,8 @@ static void push_locks_into_array(DYNAMIC_ARRAY *ar, THR_LOCK_DATA *data, { TABLE_LOCK_INFO table_lock_info; table_lock_info.thread_id= table->in_use->thread_id; - memcpy(table_lock_info.table_name, table->s->table_cache_key, - table->s->key_length); + memcpy(table_lock_info.table_name, table->s->table_cache_key.str, + table->s->table_cache_key.length); table_lock_info.table_name[strlen(table_lock_info.table_name)]='.'; table_lock_info.waiting=wait; table_lock_info.lock_text=text; @@ -484,7 +484,7 @@ Open tables: %10lu\n\ Open files: %10lu\n\ Open streams: %10lu\n", tmp.opened_tables, - (ulong) cached_tables(), + (ulong) cached_open_tables(), (ulong) my_file_opened, (ulong) my_stream_opened); diff --git a/sql/sql_trigger.cc b/sql/sql_trigger.cc index bbc32950c2d..e4b22cffca0 100644 --- a/sql/sql_trigger.cc +++ b/sql/sql_trigger.cc @@ -202,7 +202,7 @@ bool mysql_create_or_drop_trigger(THD *thd, TABLE_LIST *tables, bool create) } /* We do not allow creation of triggers on temporary tables. */ - if (create && find_temporary_table(thd, tables->db, tables->table_name)) + if (create && find_temporary_table(thd, tables)) { my_error(ER_TRG_ON_VIEW_OR_TEMP_TABLE, MYF(0), tables->alias); DBUG_RETURN(TRUE); @@ -312,9 +312,9 @@ bool Table_triggers_list::create_trigger(THD *thd, TABLE_LIST *tables, { LEX *lex= thd->lex; TABLE *table= tables->table; - char dir_buff[FN_REFLEN], file_buff[FN_REFLEN], trigname_buff[FN_REFLEN], + char file_buff[FN_REFLEN], trigname_buff[FN_REFLEN], trigname_path[FN_REFLEN]; - LEX_STRING dir, file, trigname_file; + LEX_STRING file, trigname_file; LEX_STRING *trg_def, *name; ulonglong *trg_sql_mode; char trg_definer_holder[HOSTNAME_LENGTH + USERNAME_LENGTH + 2]; @@ -324,7 +324,8 @@ bool Table_triggers_list::create_trigger(THD *thd, TABLE_LIST *tables, /* Trigger must be in the same schema as target table. */ - if (my_strcasecmp(table_alias_charset, table->s->db, lex->spname->m_db.str)) + if (my_strcasecmp(table_alias_charset, table->s->db.str, + lex->spname->m_db.str)) { my_error(ER_TRG_IN_WRONG_SCHEMA, MYF(0)); return 1; @@ -392,20 +393,18 @@ bool Table_triggers_list::create_trigger(THD *thd, TABLE_LIST *tables, sql_create_definition_file() files handles renaming and backup of older versions */ - strxnmov(dir_buff, FN_REFLEN, mysql_data_home, "/", tables->db, "/", NullS); - dir.length= unpack_filename(dir_buff, dir_buff); - dir.str= dir_buff; - file.length= strxnmov(file_buff, FN_REFLEN, tables->table_name, - triggers_file_ext, NullS) - file_buff; + file.length= build_table_filename(file_buff, FN_REFLEN-1, + tables->db, tables->table_name, + triggers_file_ext); file.str= file_buff; - trigname_file.length= strxnmov(trigname_buff, FN_REFLEN, - lex->spname->m_name.str, - trigname_file_ext, NullS) - trigname_buff; + trigname_file.length= build_table_filename(trigname_buff, FN_REFLEN-1, + tables->db, + lex->spname->m_name.str, + trigname_file_ext); trigname_file.str= trigname_buff; - strxnmov(trigname_path, FN_REFLEN, dir_buff, trigname_buff, NullS); /* Use the filesystem to enforce trigger namespace constraints. */ - if (!access(trigname_path, F_OK)) + if (!access(trigname_buff, F_OK)) { my_error(ER_TRG_ALREADY_EXISTS, MYF(0)); return 1; @@ -414,7 +413,7 @@ bool Table_triggers_list::create_trigger(THD *thd, TABLE_LIST *tables, trigname.trigger_table.str= tables->table_name; trigname.trigger_table.length= tables->table_name_length; - if (sql_create_definition_file(&dir, &trigname_file, &trigname_file_type, + if (sql_create_definition_file(NULL, &trigname_file, &trigname_file_type, (gptr)&trigname, trigname_file_parameters, 0)) return 1; @@ -461,7 +460,7 @@ bool Table_triggers_list::create_trigger(THD *thd, TABLE_LIST *tables, trg_definer->length= strxmov(trg_definer->str, definer_user->str, "@", definer_host->str, NullS) - trg_definer->str; - if (!sql_create_definition_file(&dir, &file, &triggers_file_type, + if (!sql_create_definition_file(NULL, &file, &triggers_file_type, (gptr)this, triggers_file_parameters, TRG_MAX_VERSIONS)) return 0; @@ -489,9 +488,7 @@ err_with_cleanup: static bool rm_trigger_file(char *path, char *db, char *table_name) { - strxnmov(path, FN_REFLEN, mysql_data_home, "/", db, "/", table_name, - triggers_file_ext, NullS); - unpack_filename(path, path); + build_table_filename(path, FN_REFLEN-1, db, table_name, triggers_file_ext); return my_delete(path, MYF(MY_WME)); } @@ -513,9 +510,7 @@ static bool rm_trigger_file(char *path, char *db, char *table_name) static bool rm_trigname_file(char *path, char *db, char *trigger_name) { - strxnmov(path, FN_REFLEN, mysql_data_home, "/", db, "/", trigger_name, - trigname_file_ext, NullS); - unpack_filename(path, path); + build_table_filename(path, FN_REFLEN-1, db, trigger_name, trigname_file_ext); return my_delete(path, MYF(MY_WME)); } @@ -573,18 +568,14 @@ bool Table_triggers_list::drop_trigger(THD *thd, TABLE_LIST *tables) } else { - char dir_buff[FN_REFLEN], file_buff[FN_REFLEN]; - LEX_STRING dir, file; - - strxnmov(dir_buff, FN_REFLEN, mysql_data_home, "/", tables->db, - "/", NullS); - dir.length= unpack_filename(dir_buff, dir_buff); - dir.str= dir_buff; - file.length= strxnmov(file_buff, FN_REFLEN, tables->table_name, - triggers_file_ext, NullS) - file_buff; - file.str= file_buff; + char file_buff[FN_REFLEN]; + LEX_STRING file; - if (sql_create_definition_file(&dir, &file, &triggers_file_type, + file.length= build_table_filename(file_buff, FN_REFLEN-1, + tables->db, tables->table_name, + triggers_file_ext); + file.str= file_buff; + if (sql_create_definition_file(NULL, &file, &triggers_file_type, (gptr)this, triggers_file_parameters, TRG_MAX_VERSIONS)) return 1; @@ -643,7 +634,7 @@ bool Table_triggers_list::prepare_record1_accessors(TABLE *table) */ if (!(*old_fld= (*fld)->new_field(&table->mem_root, table))) return 1; - (*old_fld)->move_field((my_ptrdiff_t)(table->record[1] - + (*old_fld)->move_field_offset((my_ptrdiff_t)(table->record[1] - table->record[0])); } *old_fld= 0; @@ -698,9 +689,8 @@ bool Table_triggers_list::check_n_load(THD *thd, const char *db, DBUG_ENTER("Table_triggers_list::check_n_load"); - strxnmov(path_buff, FN_REFLEN, mysql_data_home, "/", db, "/", table_name, - triggers_file_ext, NullS); - path.length= unpack_filename(path_buff, path_buff); + path.length= build_table_filename(path_buff, FN_REFLEN-1, + db, table_name, triggers_file_ext); path.str= path_buff; // QQ: should we analyze errno somehow ? @@ -881,7 +871,7 @@ bool Table_triggers_list::check_n_load(THD *thd, const char *db, schema. */ - lex.sphead->set_definer("", 0); + lex.sphead->set_definer((char*) "", 0); /* Triggers without definer information are executed under the @@ -1032,9 +1022,9 @@ static TABLE_LIST *add_table_for_trigger(THD *thd, sp_name *trig) struct st_trigname trigname; DBUG_ENTER("add_table_for_trigger"); - strxnmov(path_buff, FN_REFLEN, mysql_data_home, "/", trig->m_db.str, "/", - trig->m_name.str, trigname_file_ext, NullS); - path.length= unpack_filename(path_buff, path_buff); + path.length= build_table_filename(path_buff, FN_REFLEN-1, + trig->m_db.str, trig->m_name.str, + trigname_file_ext); path.str= path_buff; if (access(path_buff, F_OK)) @@ -1180,10 +1170,10 @@ bool Table_triggers_list::process_triggers(THD *thd, trg_event_type event, { TABLE_LIST table_list, **save_query_tables_own_last; bzero((char *) &table_list, sizeof (table_list)); - table_list.db= (char *) table->s->db; - table_list.db_length= strlen(table_list.db); - table_list.table_name= (char *) table->s->table_name; - table_list.table_name_length= strlen(table_list.table_name); + table_list.db= (char *) table->s->db.str; + table_list.db_length= table->s->db.length; + table_list.table_name= table->s->table_name.str; + table_list.table_name_length= table->s->table_name.length; table_list.alias= (char *) table->alias; table_list.table= table; save_query_tables_own_last= thd->lex->query_tables_own_last; diff --git a/sql/sql_udf.cc b/sql/sql_udf.cc index 40e5a9a00cf..77bfba5ba28 100644 --- a/sql/sql_udf.cc +++ b/sql/sql_udf.cc @@ -38,36 +38,10 @@ #ifdef HAVE_DLOPEN extern "C" { -#if defined(__WIN__) - void* dlsym(void* lib,const char* name) - { - return GetProcAddress((HMODULE)lib,name); - } - void* dlopen(const char* libname,int unused) - { - return LoadLibraryEx(libname,NULL,0); - } - void dlclose(void* lib) - { - FreeLibrary((HMODULE)lib); - } - -#elif !defined(OS2) -#include <dlfcn.h> -#endif - #include <stdarg.h> #include <hash.h> } -#ifndef RTLD_NOW -#define RTLD_NOW 1 // For FreeBSD 2.2.2 -#endif - -#ifndef HAVE_DLERROR -#define dlerror() "" -#endif - static bool initialized = 0; static MEM_ROOT mem; static HASH udf_hash; @@ -195,9 +169,8 @@ void udf_init() This is done to ensure that only approved dll from the system directories are used (to make this even remotely secure). */ - if (strchr(dl_name, '/') || - IF_WIN(strchr(dl_name, '\\'),0) || - strlen(name.str) > NAME_LEN) + if (my_strchr(files_charset_info, dl_name, dl_name + strlen(dl_name), FN_LIBCHAR) || + strlen(name.str) > NAME_LEN) { sql_print_error("Invalid row in mysql.func table for function '%.64s'", name.str); @@ -215,10 +188,13 @@ void udf_init() void *dl = find_udf_dl(tmp->dl); if (dl == NULL) { - if (!(dl = dlopen(tmp->dl, RTLD_NOW))) + char dlpath[FN_REFLEN]; + strxnmov(dlpath, sizeof(dlpath) - 1, opt_plugin_dir, "/", tmp->dl, + NullS); + if (!(dl= dlopen(dlpath, RTLD_NOW))) { /* Print warning to log */ - sql_print_error(ER(ER_CANT_OPEN_LIBRARY), tmp->dl,errno,dlerror()); + sql_print_error(ER(ER_CANT_OPEN_LIBRARY), dlpath, errno, dlerror()); /* Keep the udf in the hash so that we can remove it later */ continue; } @@ -413,7 +389,7 @@ int mysql_create_function(THD *thd,udf_func *udf) This is done to ensure that only approved dll from the system directories are used (to make this even remotely secure). */ - if (strchr(udf->dl, '/') || IF_WIN(strchr(udf->dl, '\\'),0)) + if (my_strchr(files_charset_info, udf->dl, udf->dl + strlen(udf->dl), FN_LIBCHAR)) { my_message(ER_UDF_NO_PATHS, ER(ER_UDF_NO_PATHS), MYF(0)); DBUG_RETURN(1); @@ -432,12 +408,14 @@ int mysql_create_function(THD *thd,udf_func *udf) } if (!(dl = find_udf_dl(udf->dl))) { - if (!(dl = dlopen(udf->dl, RTLD_NOW))) + char dlpath[FN_REFLEN]; + strxnmov(dlpath, sizeof(dlpath) - 1, opt_plugin_dir, "/", udf->dl, NullS); + if (!(dl = dlopen(dlpath, RTLD_NOW))) { DBUG_PRINT("error",("dlopen of %s failed, error: %d (%s)", - udf->dl,errno,dlerror())); + dlpath, errno, dlerror())); my_error(ER_CANT_OPEN_LIBRARY, MYF(0), - udf->dl, errno, dlerror()); + dlpath, errno, dlerror()); goto err; } new_dl=1; @@ -477,7 +455,7 @@ int mysql_create_function(THD *thd,udf_func *udf) table->field[2]->store(u_d->dl,(uint) strlen(u_d->dl), system_charset_info); if (table->s->fields >= 4) // If not old func format table->field[3]->store((longlong) u_d->type, TRUE); - error = table->file->write_row(table->record[0]); + error = table->file->ha_write_row(table->record[0]); close_thread_tables(thd); if (error) @@ -529,14 +507,14 @@ int mysql_drop_function(THD *thd,const LEX_STRING *udf_name) if (!(table = open_ltable(thd,&tables,TL_WRITE))) goto err; table->field[0]->store(udf_name->str, udf_name->length, system_charset_info); - table->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS); + table->file->ha_retrieve_all_cols(); if (!table->file->index_read_idx(table->record[0], 0, (byte*) table->field[0]->ptr, table->key_info[0].key_length, HA_READ_KEY_EXACT)) { int error; - if ((error = table->file->delete_row(table->record[0]))) + if ((error = table->file->ha_delete_row(table->record[0]))) table->file->print_error(error, MYF(0)); } close_thread_tables(thd); diff --git a/sql/sql_union.cc b/sql/sql_union.cc index dee88af7d83..e80aaecfd64 100644 --- a/sql/sql_union.cc +++ b/sql/sql_union.cc @@ -62,7 +62,7 @@ bool select_union::send_data(List<Item> &values) if (thd->net.report_error) return 1; - if ((error= table->file->write_row(table->record[0]))) + if ((error= table->file->ha_write_row(table->record[0]))) { /* create_myisam_from_heap will generate error if needed */ if (error != HA_ERR_FOUND_DUPP_KEY && error != HA_ERR_FOUND_DUPP_UNIQUE && diff --git a/sql/sql_update.cc b/sql/sql_update.cc index 2ff8a4bc244..65d1beeaf3b 100644 --- a/sql/sql_update.cc +++ b/sql/sql_update.cc @@ -119,10 +119,10 @@ int mysql_update(THD *thd, { bool using_limit= limit != HA_POS_ERROR; bool safe_update= thd->options & OPTION_SAFE_UPDATES; - bool used_key_is_modified, transactional_table; + bool used_key_is_modified, transactional_table, will_batch; int res; - int error; - uint used_index= MAX_KEY; + int error, loc_error; + uint used_index= MAX_KEY, dup_key_found; bool need_sort= TRUE; #ifndef NO_EMBEDDED_ACCESS_CHECKS uint want_privilege; @@ -197,7 +197,11 @@ int mysql_update(THD *thd, table_list->grant.want_privilege= table->grant.want_privilege= want_privilege; table_list->register_want_access(want_privilege); #endif - if (setup_fields_with_no_wrap(thd, 0, fields, 1, 0, 0)) + /* + Indicate that the set of fields is to be updated by passing 2 for + set_query_id. + */ + if (setup_fields_with_no_wrap(thd, 0, fields, 2, 0, 0)) DBUG_RETURN(1); /* purecov: inspected */ if (table_list->view && check_fields(thd, fields)) { @@ -214,7 +218,10 @@ int mysql_update(THD *thd, if (table->timestamp_field->query_id == thd->query_id) table->timestamp_field_type= TIMESTAMP_NO_AUTO_SET; else + { table->timestamp_field->query_id=timestamp_query_id; + table->file->ha_set_bit_in_write_set(table->timestamp_field->fieldnr); + } } #ifndef NO_EMBEDDED_ACCESS_CHECKS @@ -284,13 +291,18 @@ int mysql_update(THD *thd, used_key_is_modified= check_if_key_used(table, used_index, fields); } +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (used_key_is_modified || order || + partition_key_modified(table, fields)) +#else if (used_key_is_modified || order) +#endif { /* We can't update table directly; We must first search after all matching rows before updating the table! */ - table->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS); + table->file->ha_retrieve_all_cols(); if (used_index < MAX_KEY && old_used_keys.is_set(used_index)) { table->key_read=1; @@ -343,6 +355,9 @@ int mysql_update(THD *thd, /* If quick select is used, initialize it before retrieving rows. */ if (select && select->quick && select->quick->reset()) goto err; + + table->file->try_semi_consistent_read(1); + if (used_index == MAX_KEY || (select && select->quick)) init_read_record(&info,thd,table,select,0,1); else @@ -355,6 +370,9 @@ int mysql_update(THD *thd, { if (!(select && select->skip_record())) { + if (table->file->was_semi_consistent_read()) + continue; /* repeat the read of the same row if it still exists */ + table->file->position(table->record[0]); if (my_b_write(&tempfile,table->file->ref, table->file->ref_length)) @@ -374,6 +392,7 @@ int mysql_update(THD *thd, if (thd->killed && !error) error= 1; // Aborted limit= tmp_limit; + table->file->try_semi_consistent_read(0); end_read_record(&info); /* Change select to use tempfile */ @@ -408,6 +427,7 @@ int mysql_update(THD *thd, if (select && select->quick && select->quick->reset()) goto err; + table->file->try_semi_consistent_read(1); init_read_record(&info,thd,table,select,0,1); updated= found= 0; @@ -422,11 +442,15 @@ int mysql_update(THD *thd, (thd->variables.sql_mode & (MODE_STRICT_TRANS_TABLES | MODE_STRICT_ALL_TABLES))); + will_batch= !table->file->start_bulk_update(); while (!(error=info.read_record(&info)) && !thd->killed) { if (!(select && select->skip_record())) { + if (table->file->was_semi_consistent_read()) + continue; /* repeat the read of the same row if it still exists */ + store_record(table,record[1]); if (fill_record_n_invoke_before_triggers(thd, fields, values, 0, table->triggers, @@ -449,8 +473,47 @@ int mysql_update(THD *thd, break; } } - if (!(error=table->file->update_row((byte*) table->record[1], - (byte*) table->record[0]))) + if (will_batch) + { + /* + Typically a batched handler can execute the batched jobs when: + 1) When specifically told to do so + 2) When it is not a good idea to batch anymore + 3) When it is necessary to send batch for other reasons + (One such reason is when READ's must be performed) + + 1) is covered by exec_bulk_update calls. + 2) and 3) is handled by the bulk_update_row method. + + bulk_update_row can execute the updates including the one + defined in the bulk_update_row or not including the row + in the call. This is up to the handler implementation and can + vary from call to call. + + The dup_key_found reports the number of duplicate keys found + in those updates actually executed. It only reports those if + the extra call with HA_EXTRA_IGNORE_DUP_KEY have been issued. + If this hasn't been issued it returns an error code and can + ignore this number. Thus any handler that implements batching + for UPDATE IGNORE must also handle this extra call properly. + + If a duplicate key is found on the record included in this + call then it should be included in the count of dup_key_found + and error should be set to 0 (only if these errors are ignored). + */ + error= table->file->bulk_update_row(table->record[1], + table->record[0], + &dup_key_found); + limit+= dup_key_found; + updated-= dup_key_found; + } + else + { + /* Non-batched update */ + error= table->file->ha_update_row((byte*) table->record[1], + (byte*) table->record[0]); + } + if (!error) { updated++; thd->no_trans_update= !transactional_table; @@ -479,20 +542,75 @@ int mysql_update(THD *thd, if (!--limit && using_limit) { - error= -1; // Simulate end of file - break; + /* + We have reached end-of-file in most common situations where no + batching has occurred and if batching was supposed to occur but + no updates were made and finally when the batch execution was + performed without error and without finding any duplicate keys. + If the batched updates were performed with errors we need to + check and if no error but duplicate key's found we need to + continue since those are not counted for in limit. + */ + if (will_batch && + ((error= table->file->exec_bulk_update(&dup_key_found)) || + !dup_key_found)) + { + if (error) + { + /* + The handler should not report error of duplicate keys if they + are ignored. This is a requirement on batching handlers. + */ + table->file->print_error(error,MYF(0)); + error= 1; + break; + } + /* + Either an error was found and we are ignoring errors or there + were duplicate keys found. In both cases we need to correct + the counters and continue the loop. + */ + limit= dup_key_found; //limit is 0 when we get here so need to + + updated-= dup_key_found; + } + else + { + error= -1; // Simulate end of file + break; + } } } else table->file->unlock_row(); thd->row_count++; } + dup_key_found= 0; if (thd->killed && !error) error= 1; // Aborted + else if (will_batch && + (loc_error= table->file->exec_bulk_update(&dup_key_found))) + /* + An error has occurred when a batched update was performed and returned + an error indication. It cannot be an allowed duplicate key error since + we require the batching handler to treat this as a normal behavior. + + Otherwise we simply remove the number of duplicate keys records found + in the batched update. + */ + { + thd->fatal_error(); + table->file->print_error(loc_error,MYF(0)); + error= 1; + } + else + updated-= dup_key_found; + if (will_batch) + table->file->end_bulk_update(); + table->file->try_semi_consistent_read(0); end_read_record(&info); free_io_cache(table); // If ORDER BY delete select; - thd->proc_info="end"; + thd->proc_info= "end"; VOID(table->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY)); /* @@ -519,10 +637,13 @@ int mysql_update(THD *thd, { if (error < 0) thd->clear_error(); - Query_log_event qinfo(thd, thd->query, thd->query_length, - transactional_table, FALSE); - if (mysql_bin_log.write(&qinfo) && transactional_table) + if (thd->binlog_query(THD::ROW_QUERY_TYPE, + thd->query, thd->query_length, + transactional_table, FALSE) && + transactional_table) + { error=1; // Rollback update + } } if (!transactional_table) thd->options|=OPTION_STATUS_NO_TRANS_UPDATE; @@ -702,7 +823,7 @@ reopen_tables: &lex->select_lex.leaf_tables, FALSE)) DBUG_RETURN(TRUE); - if (setup_fields_with_no_wrap(thd, 0, *fields, 1, 0, 0)) + if (setup_fields_with_no_wrap(thd, 0, *fields, 2, 0, 0)) DBUG_RETURN(TRUE); for (tl= table_list; tl ; tl= tl->next_local) @@ -1082,7 +1203,8 @@ multi_update::initialize_tables(JOIN *join) /* ok to be on stack as this is not referenced outside of this func */ Field_string offset(table->file->ref_length, 0, "offset", - table, &my_charset_bin); + &my_charset_bin); + offset.init(table); if (!(ifield= new Item_field(((Field *) &offset)))) DBUG_RETURN(1); ifield->maybe_null= 0; @@ -1257,8 +1379,8 @@ bool multi_update::send_data(List<Item> ¬_used_values) */ main_table->file->extra(HA_EXTRA_PREPARE_FOR_UPDATE); } - if ((error=table->file->update_row(table->record[1], - table->record[0]))) + if ((error=table->file->ha_update_row(table->record[1], + table->record[0]))) { updated--; if (!ignore || error != HA_ERR_FOUND_DUPP_KEY) @@ -1293,7 +1415,7 @@ bool multi_update::send_data(List<Item> ¬_used_values) memcpy((char*) tmp_table->field[0]->ptr, (char*) table->file->ref, table->file->ref_length); /* Write row, ignoring duplicated updates to a row */ - if (error= tmp_table->file->write_row(tmp_table->record[0])) + if (error= tmp_table->file->ha_write_row(tmp_table->record[0])) { if (error != HA_ERR_FOUND_DUPP_KEY && error != HA_ERR_FOUND_DUPP_UNIQUE && @@ -1410,8 +1532,8 @@ int multi_update::do_updates(bool from_send_error) if (compare_record(table, thd->query_id)) { - if ((local_error=table->file->update_row(table->record[1], - table->record[0]))) + if ((local_error=table->file->ha_update_row(table->record[1], + table->record[0]))) { if (!ignore || local_error != HA_ERR_FOUND_DUPP_KEY) goto err; @@ -1490,10 +1612,13 @@ bool multi_update::send_eof() { if (local_error == 0) thd->clear_error(); - Query_log_event qinfo(thd, thd->query, thd->query_length, - transactional_tables, FALSE); - if (mysql_bin_log.write(&qinfo) && trans_safe) + if (thd->binlog_query(THD::ROW_QUERY_TYPE, + thd->query, thd->query_length, + transactional_tables, FALSE) && + trans_safe) + { local_error= 1; // Rollback update + } } if (!transactional_tables) thd->options|=OPTION_STATUS_NO_TRANS_UPDATE; diff --git a/sql/sql_view.cc b/sql/sql_view.cc index 4f62a80cfd4..78497a2cf8b 100644 --- a/sql/sql_view.cc +++ b/sql/sql_view.cc @@ -568,8 +568,8 @@ static int mysql_register_view(THD *thd, TABLE_LIST *view, String str(buff,(uint32) sizeof(buff), system_charset_info); char md5[MD5_BUFF_LENGTH]; bool can_be_merged; - char dir_buff[FN_REFLEN], file_buff[FN_REFLEN]; - LEX_STRING dir, file; + char dir_buff[FN_REFLEN], file_buff[FN_REFLEN], path_buff[FN_REFLEN]; + LEX_STRING dir, file, path; DBUG_ENTER("mysql_register_view"); /* print query */ @@ -584,15 +584,17 @@ static int mysql_register_view(THD *thd, TABLE_LIST *view, DBUG_PRINT("info", ("View: %s", str.ptr())); /* print file name */ - (void) my_snprintf(dir_buff, FN_REFLEN, "%s/%s/", - mysql_data_home, view->db); - unpack_filename(dir_buff, dir_buff); + dir.length= build_table_filename(dir_buff, sizeof(dir_buff), + view->db, "", ""); dir.str= dir_buff; - dir.length= strlen(dir_buff); - file.str= file_buff; - file.length= (strxnmov(file_buff, FN_REFLEN, view->table_name, reg_ext, - NullS) - file_buff); + path.length= build_table_filename(path_buff, sizeof(path_buff), + view->db, view->table_name, reg_ext); + path.str= path_buff; + + file.str= path.str + dir.length; + file.length= path.length - dir.length; + /* init timestamp */ if (!view->timestamp.str) view->timestamp.str= view->timestamp_buffer; @@ -1175,17 +1177,17 @@ err: bool mysql_drop_view(THD *thd, TABLE_LIST *views, enum_drop_mode drop_mode) { - DBUG_ENTER("mysql_drop_view"); char path[FN_REFLEN]; TABLE_LIST *view; - bool type= 0; - db_type not_used; + enum legacy_db_type not_used; + DBUG_ENTER("mysql_drop_view"); for (view= views; view; view= view->next_local) { - strxnmov(path, FN_REFLEN, mysql_data_home, "/", view->db, "/", - view->table_name, reg_ext, NullS); - (void) unpack_filename(path, path); + TABLE_SHARE *share; + bool type= 0; + build_table_filename(path, sizeof(path), + view->db, view->table_name, reg_ext); VOID(pthread_mutex_lock(&LOCK_open)); if (access(path, F_OK) || (type= (mysql_frm_type(thd, path, ¬_used) != FRMTYPE_VIEW))) @@ -1208,6 +1210,20 @@ bool mysql_drop_view(THD *thd, TABLE_LIST *views, enum_drop_mode drop_mode) } if (my_delete(path, MYF(MY_WME))) goto err; + + /* + For a view, there is only one table_share object which should never + be used outside of LOCK_open + */ + if ((share= get_cached_table_share(view->db, view->table_name))) + { + DBUG_ASSERT(share->ref_count == 0); + pthread_mutex_lock(&share->mutex); + share->ref_count++; + share->version= 0; + pthread_mutex_unlock(&share->mutex); + release_table_share(share, RELEASE_WAIT_FOR_DROP); + } query_cache_invalidate3(thd, view, 0); sp_cache_invalidate(); VOID(pthread_mutex_unlock(&LOCK_open)); @@ -1235,7 +1251,7 @@ err: FRMTYPE_VIEW view */ -frm_type_enum mysql_frm_type(THD *thd, char *path, db_type *dbt) +frm_type_enum mysql_frm_type(THD *thd, char *path, enum legacy_db_type *dbt) { File file; uchar header[10]; //"TYPE=VIEW\n" it is 10 characters @@ -1264,7 +1280,7 @@ frm_type_enum mysql_frm_type(THD *thd, char *path, db_type *dbt) (header[2] < FRM_VER+3 || header[2] > FRM_VER+4))) DBUG_RETURN(FRMTYPE_TABLE); - *dbt= ha_checktype(thd, (enum db_type) (uint) *(header + 3), 0, 0); + *dbt= (enum legacy_db_type) (uint) *(header + 3); DBUG_RETURN(FRMTYPE_TABLE); // Is probably a .frm table } @@ -1486,7 +1502,7 @@ mysql_rename_view(THD *thd, DBUG_ENTER("mysql_rename_view"); - strxnmov(view_path, FN_REFLEN, mysql_data_home, "/", view->db, "/", + strxnmov(view_path, FN_REFLEN-1, mysql_data_home, "/", view->db, "/", view->table_name, reg_ext, NullS); (void) unpack_filename(view_path, view_path); @@ -1520,7 +1536,8 @@ mysql_rename_view(THD *thd, view_def.revision - 1, num_view_backups)) goto err; - strxnmov(dir_buff, FN_REFLEN, mysql_data_home, "/", view->db, "/", NullS); + strxnmov(dir_buff, FN_REFLEN-1, mysql_data_home, "/", view->db, "/", + NullS); (void) unpack_filename(dir_buff, dir_buff); pathstr.str= (char*)dir_buff; diff --git a/sql/sql_view.h b/sql/sql_view.h index cd61d7e9e71..1e3e5f4aa73 100644 --- a/sql/sql_view.h +++ b/sql/sql_view.h @@ -27,7 +27,7 @@ bool check_key_in_view(THD *thd, TABLE_LIST * view); bool insert_view_fields(THD *thd, List<Item> *list, TABLE_LIST *view); -frm_type_enum mysql_frm_type(THD *thd, char *path, db_type *dbt); +frm_type_enum mysql_frm_type(THD *thd, char *path, enum legacy_db_type *dbt); int view_checksum(THD *thd, TABLE_LIST *view); diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index 25e10362ece..7ebe851fc85 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -73,6 +73,7 @@ inline Item *is_truth_value(Item *A, bool v1, bool v2) int num; ulong ulong_num; ulonglong ulonglong_number; + longlong longlong_number; LEX_STRING lex_str; LEX_STRING *lex_str_ptr; LEX_SYMBOL symbol; @@ -91,7 +92,7 @@ inline Item *is_truth_value(Item *A, bool v1, bool v2) enum enum_var_type var_type; Key::Keytype key_type; enum ha_key_alg key_alg; - enum db_type db_type; + handlerton *db_type; enum row_type row_type; enum ha_rkey_function ha_rkey_mode; enum enum_tx_isolation tx_isolation; @@ -136,6 +137,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token ASCII_SYM %token ASENSITIVE_SYM %token ATAN +%token AUTHORS_SYM %token AUTO_INC %token AVG_ROW_LENGTH %token AVG_SYM @@ -335,6 +337,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token INSENSITIVE_SYM %token INSERT %token INSERT_METHOD +%token INSTALL_SYM %token INTERVAL_SYM %token INTO %token INT_SYM @@ -358,13 +361,16 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token LEAVES %token LEAVE_SYM %token LEFT +%token LESS_SYM %token LEVEL_SYM %token LEX_HOSTNAME %token LIKE %token LIMIT +%token LINEAR_SYM %token LINEFROMTEXT %token LINES %token LINESTRING +%token LIST_SYM %token LOAD %token LOCAL_SYM %token LOCATE @@ -404,6 +410,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token MAX_SYM %token MAX_UPDATES_PER_HOUR %token MAX_USER_CONNECTIONS_SYM +%token MAX_VALUE_SYM %token MEDIUMBLOB %token MEDIUMINT %token MEDIUMTEXT @@ -438,6 +445,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token NE %token NEW_SYM %token NEXT_SYM +%token NODEGROUP_SYM %token NONE_SYM %token NOT2_SYM %token NOT_SYM @@ -465,10 +473,14 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token OUTFILE %token OUT_SYM %token PACK_KEYS_SYM +%token PARSER_SYM %token PARTIAL +%token PARTITION_SYM +%token PARTITIONS_SYM %token PASSWORD %token PARAM_MARKER %token PHASE_SYM +%token PLUGIN_SYM %token POINTFROMTEXT %token POINT_SYM %token POLYFROMTEXT @@ -492,6 +504,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token RAID_STRIPED_SYM %token RAID_TYPE %token RAND +%token RANGE_SYM %token READS_SYM %token READ_SYM %token REAL @@ -505,6 +518,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token RELEASE_SYM %token RELOAD %token RENAME +%token REORGANISE_SYM %token REPAIR %token REPEATABLE_SYM %token REPEAT_SYM @@ -551,6 +565,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token SLAVE %token SMALLINT %token SNAPSHOT_SYM +%token SONAME_SYM %token SOUNDS_SYM %token SPATIAL_SYM %token SPECIFIC_SYM @@ -577,6 +592,8 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token STRING_SYM %token SUBDATE_SYM %token SUBJECT_SYM +%token SUBPARTITION_SYM +%token SUBPARTITIONS_SYM %token SUBSTRING %token SUBSTRING_INDEX %token SUM_SYM @@ -598,6 +615,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token TINYBLOB %token TINYINT %token TINYTEXT +%token THAN_SYM %token TO_SYM %token TRAILING %token TRANSACTION_SYM @@ -609,24 +627,21 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token TYPES_SYM %token TYPE_SYM %token UDF_RETURNS_SYM -%token UDF_SONAME_SYM %token ULONGLONG_NUM %token UNCOMMITTED_SYM %token UNDEFINED_SYM %token UNDERSCORE_CHARSET %token UNDO_SYM %token UNICODE_SYM +%token UNINSTALL_SYM %token UNION_SYM %token UNIQUE_SYM %token UNIQUE_USERS %token UNIX_TIMESTAMP %token UNKNOWN_SYM %token UNLOCK_SYM -%token UNLOCK_SYM %token UNSIGNED %token UNTIL_SYM -%token UNTIL_SYM -%token UPDATE_SYM %token UPDATE_SYM %token USAGE %token USER @@ -687,7 +702,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); sp_opt_label BIN_NUM label_ident %type <lex_str_ptr> - opt_table_alias + opt_table_alias opt_fulltext_parser %type <table> table_ident table_ident_nodb references xid @@ -714,6 +729,9 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %type <ulonglong_number> ulonglong_num +%type <longlong_number> + part_bit_expr + %type <lock_type> replace_lock_option opt_low_priority insert_lock_option load_data_lock @@ -730,6 +748,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); sp_opt_default simple_ident_nospvar simple_ident_q field_or_var limit_option + part_func_expr %type <item_num> NUM_literal @@ -834,6 +853,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); definer view_replace_or_algorithm view_replace view_algorithm_opt view_algorithm view_or_trigger_tail view_suid view_tail view_list_opt view_list view_select view_check_option trigger_tail + install uninstall partition_entry binlog_base64_event END_OF_INPUT %type <NONE> call sp_proc_stmts sp_proc_stmts1 sp_proc_stmt @@ -877,6 +897,7 @@ statement: alter | analyze | backup + | binlog_base64_event | call | change | check @@ -894,11 +915,13 @@ statement: | handler | help | insert + | install | kill | load | lock | optimize | keycache + | partition_entry | preload | prepare | purge @@ -917,6 +940,7 @@ statement: | slave | start | truncate + | uninstall | unlock | update | use @@ -1151,7 +1175,7 @@ create: lex->change=NullS; bzero((char*) &lex->create_info,sizeof(lex->create_info)); lex->create_info.options=$2 | $4; - lex->create_info.db_type= (enum db_type) lex->thd->variables.table_type; + lex->create_info.db_type= lex->thd->variables.table_type; lex->create_info.default_table_charset= NULL; lex->name=0; } @@ -1169,11 +1193,15 @@ create: lex->col_list.empty(); lex->change=NullS; } - '(' key_list ')' + '(' key_list ')' opt_fulltext_parser { LEX *lex=Lex; - - lex->key_list.push_back(new Key($2,$4.str, $5, 0, lex->col_list)); + if ($2 != Key::FULLTEXT && $12) + { + yyerror(ER(ER_SYNTAX_ERROR)); + YYABORT; + } + lex->key_list.push_back(new Key($2,$4.str,$5,0,lex->col_list,$12)); lex->col_list.empty(); } | CREATE DATABASE opt_if_not_exists ident @@ -1298,7 +1326,7 @@ sp_name: ; create_function_tail: - RETURNS_SYM udf_type UDF_SONAME_SYM TEXT_STRING_sys + RETURNS_SYM udf_type SONAME_SYM TEXT_STRING_sys { LEX *lex=Lex; lex->sql_command = SQLCOM_CREATE_FUNCTION; @@ -2513,7 +2541,9 @@ trg_event: create2: '(' create2a {} - | opt_create_table_options create3 {} + | opt_create_table_options + opt_partitioning {} + create3 {} | LIKE table_ident { LEX *lex=Lex; @@ -2529,8 +2559,12 @@ create2: ; create2a: - field_list ')' opt_create_table_options create3 {} - | create_select ')' { Select->set_braces(1);} union_opt {} + field_list ')' opt_create_table_options + opt_partitioning {} + create3 {} + | opt_partitioning {} + create_select ')' + { Select->set_braces(1);} union_opt {} ; create3: @@ -2541,6 +2575,484 @@ create3: { Select->set_braces(1);} union_opt {} ; +/* + This part of the parser is about handling of the partition information. + + It's first version was written by Mikael Ronström with lots of answers to + questions provided by Antony Curtis. + + The partition grammar can be called from three places. + 1) CREATE TABLE ... PARTITION .. + 2) ALTER TABLE table_name PARTITION ... + 3) PARTITION ... + + The first place is called when a new table is created from a MySQL client. + The second place is called when a table is altered with the ALTER TABLE + command from a MySQL client. + The third place is called when opening an frm file and finding partition + info in the .frm file. It is necessary to avoid allowing PARTITION to be + an allowed entry point for SQL client queries. This is arranged by setting + some state variables before arriving here. + + To be able to handle errors we will only set error code in this code + and handle the error condition in the function calling the parser. This + is necessary to ensure we can also handle errors when calling the parser + from the openfrm function. +*/ +opt_partitioning: + /* empty */ {} + | partitioning + ; + +partitioning: + PARTITION_SYM + { + LEX *lex= Lex; + lex->part_info= new partition_info(); + if (!lex->part_info) + { + my_error(ER_OUTOFMEMORY, MYF(0), sizeof(partition_info)); + YYABORT; + } + } + partition + ; + +partition_entry: + PARTITION_SYM + { + LEX *lex= Lex; + if (lex->part_info) + { + /* + We enter here when opening the frm file to translate + partition info string into part_info data structure. + */ + lex->part_info= new partition_info(); + if (!lex->part_info) + { + my_error(ER_OUTOFMEMORY, MYF(0), sizeof(partition_info)); + YYABORT; + } + } + else + { + yyerror(ER(ER_PARTITION_ENTRY_ERROR)); + YYABORT; + } + } + partition {} + ; + +partition: + BY part_type_def opt_no_parts {} opt_sub_part {} part_defs + ; + +part_type_def: + opt_linear KEY_SYM '(' part_field_list ')' + { + LEX *lex= Lex; + lex->part_info->list_of_part_fields= TRUE; + lex->part_info->part_type= HASH_PARTITION; + } + | opt_linear HASH_SYM + { Lex->part_info->part_type= HASH_PARTITION; } + part_func {} + | RANGE_SYM + { Lex->part_info->part_type= RANGE_PARTITION; } + part_func {} + | LIST_SYM + { Lex->part_info->part_type= LIST_PARTITION; } + part_func {} + ; + +opt_linear: + /* empty */ {} + | LINEAR_SYM + { Lex->part_info->linear_hash_ind= TRUE;} + ; + +part_field_list: + part_field_item {} + | part_field_list ',' part_field_item {} + ; + +part_field_item: + ident + { + Lex->part_info->part_field_list.push_back($1.str); + } + ; + +part_func: + '(' remember_name part_func_expr remember_end ')' + { + LEX *lex= Lex; + uint expr_len= (uint)($4 - $2) - 1; + lex->part_info->list_of_part_fields= FALSE; + lex->part_info->part_expr= $3; + lex->part_info->part_func_string= $2+1; + lex->part_info->part_func_len= expr_len; + } + ; + +sub_part_func: + '(' remember_name part_func_expr remember_end ')' + { + LEX *lex= Lex; + uint expr_len= (uint)($4 - $2) - 1; + lex->part_info->list_of_subpart_fields= FALSE; + lex->part_info->subpart_expr= $3; + lex->part_info->subpart_func_string= $2+1; + lex->part_info->subpart_func_len= expr_len; + } + ; + + +opt_no_parts: + /* empty */ {} + | PARTITIONS_SYM ulong_num + { + uint no_parts= $2; + if (no_parts == 0) + { + my_error(ER_NO_PARTS_ERROR, MYF(0), "partitions"); + YYABORT; + } + Lex->part_info->no_parts= no_parts; + } + ; + +opt_sub_part: + /* empty */ {} + | SUBPARTITION_SYM BY opt_linear HASH_SYM sub_part_func + { Lex->part_info->subpart_type= HASH_PARTITION; } + opt_no_subparts {} + | SUBPARTITION_SYM BY opt_linear KEY_SYM + '(' sub_part_field_list ')' + { + LEX *lex= Lex; + lex->part_info->subpart_type= HASH_PARTITION; + lex->part_info->list_of_subpart_fields= TRUE; + } + opt_no_subparts {} + ; + +sub_part_field_list: + sub_part_field_item {} + | sub_part_field_list ',' sub_part_field_item {} + ; + +sub_part_field_item: + ident + { Lex->part_info->subpart_field_list.push_back($1.str); } + ; + +part_func_expr: + bit_expr + { + LEX *lex= Lex; + bool not_corr_func; + not_corr_func= !lex->safe_to_cache_query; + lex->safe_to_cache_query= 1; + if (not_corr_func) + { + yyerror(ER(ER_CONST_EXPR_IN_PARTITION_FUNC_ERROR)); + YYABORT; + } + $$=$1; + } + ; + +opt_no_subparts: + /* empty */ {} + | SUBPARTITIONS_SYM ulong_num + { + uint no_parts= $2; + if (no_parts == 0) + { + my_error(ER_NO_PARTS_ERROR, MYF(0), "subpartitions"); + YYABORT; + } + Lex->part_info->no_subparts= no_parts; + } + ; + +part_defs: + /* empty */ + {} + | '(' part_def_list ')' + { + LEX *lex= Lex; + partition_info *part_info= lex->part_info; + if (part_info->no_parts != 0) + { + if (part_info->no_parts != + part_info->count_curr_parts) + { + yyerror(ER(ER_PARTITION_WRONG_NO_PART_ERROR)); + YYABORT; + } + } + else if (part_info->count_curr_parts > 0) + { + part_info->no_parts= part_info->count_curr_parts; + } + part_info->count_curr_subparts= 0; + part_info->count_curr_parts= 0; + } + ; + +part_def_list: + part_definition {} + | part_def_list ',' part_definition {} + ; + +part_definition: + PARTITION_SYM + { + LEX *lex= Lex; + partition_info *part_info= lex->part_info; + partition_element *p_elem= new partition_element(); + if (!p_elem) + { + my_error(ER_OUTOFMEMORY, MYF(0), sizeof(partition_element)); + YYABORT; + } + part_info->curr_part_elem= p_elem; + part_info->current_partition= p_elem; + part_info->use_default_partitions= FALSE; + part_info->partitions.push_back(p_elem); + p_elem->engine_type= NULL; + part_info->count_curr_parts++; + } + part_name {} + opt_part_values {} + opt_part_options {} + opt_sub_partition {} + ; + +part_name: + ident_or_text + { Lex->part_info->curr_part_elem->partition_name= $1.str; } + ; + +opt_part_values: + /* empty */ + { + LEX *lex= Lex; + if (!is_partition_management(lex)) + { + if (lex->part_info->part_type == RANGE_PARTITION) + { + my_error(ER_PARTITION_REQUIRES_VALUES_ERROR, MYF(0), + "RANGE", "LESS THAN"); + YYABORT; + } + if (lex->part_info->part_type == LIST_PARTITION) + { + my_error(ER_PARTITION_REQUIRES_VALUES_ERROR, MYF(0), + "LIST", "IN"); + YYABORT; + } + } + } + | VALUES LESS_SYM THAN_SYM part_func_max + { + LEX *lex= Lex; + if (!is_partition_management(lex)) + { + if (Lex->part_info->part_type != RANGE_PARTITION) + { + my_error(ER_PARTITION_WRONG_VALUES_ERROR, MYF(0), + "RANGE", "LESS THAN"); + YYABORT; + } + } + } + | VALUES IN_SYM '(' part_list_func ')' + { + LEX *lex= Lex; + if (!is_partition_management(lex)) + { + if (Lex->part_info->part_type != LIST_PARTITION) + { + my_error(ER_PARTITION_WRONG_VALUES_ERROR, MYF(0), + "LIST", "IN"); + YYABORT; + } + } + } + ; + +part_func_max: + MAX_VALUE_SYM + { + LEX *lex= Lex; + if (lex->part_info->defined_max_value) + { + yyerror(ER(ER_PARTITION_MAXVALUE_ERROR)); + YYABORT; + } + lex->part_info->defined_max_value= TRUE; + lex->part_info->curr_part_elem->range_value= LONGLONG_MAX; + } + | part_range_func + { + if (Lex->part_info->defined_max_value) + { + yyerror(ER(ER_PARTITION_MAXVALUE_ERROR)); + YYABORT; + } + } + ; + +part_range_func: + '(' part_bit_expr ')' + { + Lex->part_info->curr_part_elem->range_value= $2; + } + ; + +part_list_func: + part_list_item {} + | part_list_func ',' part_list_item {} + ; + +part_list_item: + part_bit_expr + { + longlong *value_ptr; + if (!(value_ptr= (longlong*)sql_alloc(sizeof(longlong)))) + { + my_error(ER_OUTOFMEMORY, MYF(0), sizeof(longlong)); + YYABORT; + } + *value_ptr= $1; + Lex->part_info->curr_part_elem->list_val_list.push_back(value_ptr); + } + ; + +part_bit_expr: + bit_expr + { + Item *part_expr= $1; + bool not_corr_func; + LEX *lex= Lex; + longlong item_value; + Name_resolution_context *context= &lex->current_select->context; + TABLE_LIST *save_list= context->table_list; + + context->table_list= 0; + part_expr->fix_fields(YYTHD, (Item**)0); + context->table_list= save_list; + not_corr_func= !part_expr->const_item() || + !lex->safe_to_cache_query; + if (not_corr_func) + { + yyerror(ER(ER_NO_CONST_EXPR_IN_RANGE_OR_LIST_ERROR)); + YYABORT; + } + if (part_expr->result_type() != INT_RESULT) + { + yyerror(ER(ER_INCONSISTENT_TYPE_OF_FUNCTIONS_ERROR)); + YYABORT; + } + item_value= part_expr->val_int(); + $$= item_value; + } + ; + +opt_sub_partition: + /* empty */ {} + | '(' sub_part_list ')' + { + LEX *lex= Lex; + partition_info *part_info= lex->part_info; + if (part_info->no_subparts != 0) + { + if (part_info->no_subparts != + part_info->count_curr_subparts) + { + yyerror(ER(ER_PARTITION_WRONG_NO_SUBPART_ERROR)); + YYABORT; + } + } + else if (part_info->count_curr_subparts > 0) + { + part_info->no_subparts= part_info->count_curr_subparts; + } + part_info->count_curr_subparts= 0; + } + ; + +sub_part_list: + sub_part_definition {} + | sub_part_list ',' sub_part_definition {} + ; + +sub_part_definition: + SUBPARTITION_SYM + { + LEX *lex= Lex; + partition_info *part_info= lex->part_info; + partition_element *p_elem= new partition_element(); + if (!p_elem) + { + my_error(ER_OUTOFMEMORY, MYF(0), sizeof(partition_element)); + YYABORT; + } + part_info->curr_part_elem= p_elem; + part_info->current_partition->subpartitions.push_back(p_elem); + part_info->use_default_subpartitions= FALSE; + part_info->count_curr_subparts++; + p_elem->engine_type= NULL; + } + sub_name opt_part_options {} + ; + +sub_name: + ident_or_text + { Lex->part_info->curr_part_elem->partition_name= $1.str; } + ; + +opt_part_options: + /* empty */ {} + | opt_part_option_list {} + ; + +opt_part_option_list: + opt_part_option_list opt_part_option {} + | opt_part_option {} + ; + +opt_part_option: + TABLESPACE opt_equal ident_or_text + { Lex->part_info->curr_part_elem->tablespace_name= $3.str; } + | opt_storage ENGINE_SYM opt_equal storage_engines + { + LEX *lex= Lex; + lex->part_info->curr_part_elem->engine_type= $4; + lex->part_info->default_engine_type= $4; + } + | NODEGROUP_SYM opt_equal ulong_num + { Lex->part_info->curr_part_elem->nodegroup_id= $3; } + | MAX_ROWS opt_equal ulonglong_num + { Lex->part_info->curr_part_elem->part_max_rows= $3; } + | MIN_ROWS opt_equal ulonglong_num + { Lex->part_info->curr_part_elem->part_min_rows= $3; } + | DATA_SYM DIRECTORY_SYM opt_equal TEXT_STRING_sys + { Lex->part_info->curr_part_elem->data_file_name= $4.str; } + | INDEX_SYM DIRECTORY_SYM opt_equal TEXT_STRING_sys + { Lex->part_info->curr_part_elem->index_file_name= $4.str; } + | COMMENT_SYM opt_equal TEXT_STRING_sys + { Lex->part_info->curr_part_elem->part_comment= $3.str; } + ; + +/* + End of partition parser part +*/ + create_select: SELECT_SYM { @@ -2723,8 +3235,10 @@ default_collation: storage_engines: ident_or_text { - $$ = ha_resolve_by_name($1.str,$1.length); - if ($$ == DB_TYPE_UNKNOWN) { + $$ = ha_resolve_by_name(YYTHD, &$1); + if ($$ == NULL && + test(YYTHD->variables.sql_mode & MODE_NO_ENGINE_SUBSTITUTION)) + { my_error(ER_UNKNOWN_STORAGE_ENGINE, MYF(0), $1.str); YYABORT; } @@ -2781,10 +3295,15 @@ column_def: ; key_def: - key_type opt_ident key_alg '(' key_list ')' + key_type opt_ident key_alg '(' key_list ')' opt_fulltext_parser { LEX *lex=Lex; - lex->key_list.push_back(new Key($1,$2, $3, 0, lex->col_list)); + if ($1 != Key::FULLTEXT && $7) + { + yyerror(ER(ER_SYNTAX_ERROR)); + YYABORT; + } + lex->key_list.push_back(new Key($1,$2, $3, 0, lex->col_list, $7)); lex->col_list.empty(); /* Alloced by sql_alloc */ } | opt_constraint constraint_key_type opt_ident key_alg '(' key_list ')' @@ -2819,6 +3338,20 @@ key_def: } ; +opt_fulltext_parser: + /* empty */ { $$= (LEX_STRING *)0; } + | WITH PARSER_SYM IDENT_sys + { + if (plugin_is_ready(&$3, MYSQL_FTPARSER_PLUGIN)) + $$= (LEX_STRING *)sql_memdup(&$3, sizeof(LEX_STRING)); + else + { + my_error(ER_FUNCTION_NOT_DEFINED, MYF(0), $3.str); + YYABORT; + } + } + ; + opt_check_constraint: /* empty */ | check_constraint @@ -3354,13 +3887,13 @@ alter: lex->select_lex.init_order(); lex->select_lex.db=lex->name=0; bzero((char*) &lex->create_info,sizeof(lex->create_info)); - lex->create_info.db_type= DB_TYPE_DEFAULT; + lex->create_info.db_type= (handlerton*) &default_hton; lex->create_info.default_table_charset= NULL; lex->create_info.row_type= ROW_TYPE_NOT_USED; lex->alter_info.reset(); lex->alter_info.flags= 0; } - alter_list + alter_commands {} | ALTER DATABASE ident_or_empty { @@ -3427,11 +3960,102 @@ ident_or_empty: /* empty */ { $$= 0; } | ident { $$= $1.str; }; -alter_list: +alter_commands: | DISCARD TABLESPACE { Lex->alter_info.tablespace_op= DISCARD_TABLESPACE; } | IMPORT TABLESPACE { Lex->alter_info.tablespace_op= IMPORT_TABLESPACE; } - | alter_list_item - | alter_list ',' alter_list_item; + | alter_list + opt_partitioning + | partitioning +/* + This part was added for release 5.1 by Mikael Ronström. + From here we insert a number of commands to manage the partitions of a + partitioned table such as adding partitions, dropping partitions, + reorganising partitions in various manners. In future releases the list + will be longer and also include moving partitions to a + new table and so forth. +*/ + | add_partition_rule + | DROP PARTITION_SYM alt_part_name_list + { + Lex->alter_info.flags|= ALTER_DROP_PARTITION; + } + | COALESCE PARTITION_SYM ulong_num + { + LEX *lex= Lex; + lex->alter_info.flags|= ALTER_COALESCE_PARTITION; + lex->alter_info.no_parts= $3; + } + | reorg_partition_rule + ; + +add_partition_rule: + ADD PARTITION_SYM + { + LEX *lex= Lex; + lex->part_info= new partition_info(); + if (!lex->part_info) + { + my_error(ER_OUTOFMEMORY, MYF(0), sizeof(partition_info)); + YYABORT; + } + lex->alter_info.flags|= ALTER_ADD_PARTITION; + } + add_part_extra + {} + ; + +add_part_extra: + | '(' part_def_list ')' + { + LEX *lex= Lex; + lex->part_info->no_parts= lex->part_info->count_curr_parts; + } + | PARTITIONS_SYM ulong_num + { + LEX *lex= Lex; + lex->part_info->no_parts= $2; + } + ; + +reorg_partition_rule: + REORGANISE_SYM PARTITION_SYM + { + LEX *lex= Lex; + lex->part_info= new partition_info(); + if (!lex->part_info) + { + my_error(ER_OUTOFMEMORY, MYF(0), sizeof(partition_info)); + YYABORT; + } + lex->alter_info.flags|= ALTER_REORGANISE_PARTITION; + } + alt_part_name_list INTO '(' part_def_list ')' + { + LEX *lex= Lex; + lex->part_info->no_parts= lex->part_info->count_curr_parts; + } + ; + +alt_part_name_list: + alt_part_name_item {} + | alt_part_name_list ',' alt_part_name_item {} + ; + +alt_part_name_item: + ident + { + Lex->alter_info.partition_names.push_back($1.str); + } + ; + +/* + End of management of partition commands +*/ + +alter_list: + alter_list_item + | alter_list ',' alter_list_item + ; add_column: ADD opt_column @@ -3777,6 +4401,14 @@ analyze: {} ; +binlog_base64_event: + BINLOG_SYM TEXT_STRING_sys + { + Lex->sql_command = SQLCOM_BINLOG_BASE64_EVENT; + Lex->comment= $2; + } + ; + check: CHECK_SYM table_or_tables { @@ -4049,7 +4681,7 @@ select_options: /* empty*/ | select_option_list { - if (test_all_bits(Select->options, SELECT_ALL | SELECT_DISTINCT)) + if (Select->options & SELECT_DISTINCT && Select->options & SELECT_ALL) { my_error(ER_WRONG_USAGE, MYF(0), "ALL", "DISTINCT"); YYABORT; @@ -6411,9 +7043,20 @@ show_param: if (prepare_schema_table(YYTHD, lex, 0, SCH_OPEN_TABLES)) YYABORT; } + | PLUGIN_SYM + { + LEX *lex= Lex; + lex->sql_command= SQLCOM_SELECT; + lex->orig_sql_command= SQLCOM_SHOW_PLUGINS; + if (prepare_schema_table(YYTHD, lex, 0, SCH_PLUGINS)) + YYABORT; + } | ENGINE_SYM storage_engines { Lex->create_info.db_type= $2; } show_engine_param + | ENGINE_SYM ALL + { Lex->create_info.db_type= NULL; } + show_engine_param | opt_full COLUMNS from_or_in table_ident opt_db wild_and_where { LEX *lex= Lex; @@ -6472,6 +7115,14 @@ show_param: { LEX *lex=Lex; lex->sql_command= SQLCOM_SHOW_STORAGE_ENGINES; + lex->orig_sql_command= SQLCOM_SHOW_AUTHORS; + if (prepare_schema_table(YYTHD, lex, 0, SCH_ENGINES)) + YYABORT; + } + | AUTHORS_SYM + { + LEX *lex=Lex; + lex->sql_command= SQLCOM_SHOW_AUTHORS; } | PRIVILEGES { @@ -6496,9 +7147,29 @@ show_param: YYABORT; } | INNOBASE_SYM STATUS_SYM - { Lex->sql_command = SQLCOM_SHOW_INNODB_STATUS; WARN_DEPRECATED("SHOW INNODB STATUS", "SHOW ENGINE INNODB STATUS"); } + { + LEX *lex= Lex; + lex->sql_command = SQLCOM_SHOW_ENGINE_STATUS; + if (!(lex->create_info.db_type= + ha_resolve_by_legacy_type(YYTHD, DB_TYPE_INNODB))) + { + my_error(ER_UNKNOWN_STORAGE_ENGINE, MYF(0), "InnoDB"); + YYABORT; + } + WARN_DEPRECATED("SHOW INNODB STATUS", "SHOW ENGINE INNODB STATUS"); + } | MUTEX_SYM STATUS_SYM - { Lex->sql_command = SQLCOM_SHOW_MUTEX_STATUS; } + { + LEX *lex= Lex; + lex->sql_command = SQLCOM_SHOW_ENGINE_MUTEX; + if (!(lex->create_info.db_type= + ha_resolve_by_legacy_type(YYTHD, DB_TYPE_INNODB))) + { + my_error(ER_UNKNOWN_STORAGE_ENGINE, MYF(0), "InnoDB"); + YYABORT; + } + WARN_DEPRECATED("SHOW MUTEX STATUS", "SHOW ENGINE INNODB MUTEX"); + } | opt_full PROCESSLIST_SYM { Lex->sql_command= SQLCOM_SHOW_PROCESSLIST;} | opt_var_type VARIABLES wild_and_where @@ -6527,9 +7198,29 @@ show_param: YYABORT; } | BERKELEY_DB_SYM LOGS_SYM - { Lex->sql_command= SQLCOM_SHOW_LOGS; WARN_DEPRECATED("SHOW BDB LOGS", "SHOW ENGINE BDB LOGS"); } + { + LEX *lex= Lex; + lex->sql_command= SQLCOM_SHOW_ENGINE_LOGS; + if (!(lex->create_info.db_type= + ha_resolve_by_legacy_type(YYTHD, DB_TYPE_BERKELEY_DB))) + { + my_error(ER_UNKNOWN_STORAGE_ENGINE, MYF(0), "BerkeleyDB"); + YYABORT; + } + WARN_DEPRECATED("SHOW BDB LOGS", "SHOW ENGINE BDB LOGS"); + } | LOGS_SYM - { Lex->sql_command= SQLCOM_SHOW_LOGS; WARN_DEPRECATED("SHOW LOGS", "SHOW ENGINE BDB LOGS"); } + { + LEX *lex= Lex; + lex->sql_command= SQLCOM_SHOW_ENGINE_LOGS; + if (!(lex->create_info.db_type= + ha_resolve_by_legacy_type(YYTHD, DB_TYPE_BERKELEY_DB))) + { + my_error(ER_UNKNOWN_STORAGE_ENGINE, MYF(0), "BerkeleyDB"); + YYABORT; + } + WARN_DEPRECATED("SHOW LOGS", "SHOW ENGINE BDB LOGS"); + } | GRANTS { LEX *lex=Lex; @@ -6649,30 +7340,11 @@ show_param: show_engine_param: STATUS_SYM - { - switch (Lex->create_info.db_type) { - case DB_TYPE_NDBCLUSTER: - Lex->sql_command = SQLCOM_SHOW_NDBCLUSTER_STATUS; - break; - case DB_TYPE_INNODB: - Lex->sql_command = SQLCOM_SHOW_INNODB_STATUS; - break; - default: - my_error(ER_NOT_SUPPORTED_YET, MYF(0), "STATUS"); - YYABORT; - } - } + { Lex->sql_command= SQLCOM_SHOW_ENGINE_STATUS; } + | MUTEX_SYM + { Lex->sql_command= SQLCOM_SHOW_ENGINE_MUTEX; } | LOGS_SYM - { - switch (Lex->create_info.db_type) { - case DB_TYPE_BERKELEY_DB: - Lex->sql_command = SQLCOM_SHOW_LOGS; - break; - default: - my_error(ER_NOT_SUPPORTED_YET, MYF(0), "LOGS"); - YYABORT; - } - }; + { Lex->sql_command= SQLCOM_SHOW_ENGINE_LOGS; }; master_or_binary: MASTER_SYM @@ -6748,8 +7420,10 @@ describe_command: opt_extended_describe: /* empty */ {} | EXTENDED_SYM { Lex->describe|= DESCRIBE_EXTENDED; } + | PARTITIONS_SYM { Lex->describe|= DESCRIBE_PARTITIONS; } ; + opt_describe_column: /* empty */ {} | text_string { Lex->wild= $1; } @@ -7495,6 +8169,7 @@ user: keyword: keyword_sp {} | ASCII_SYM {} + | AUTHORS_SYM {} | BACKUP_SYM {} | BEGIN_SYM {} | BYTE_SYM {} @@ -7512,9 +8187,13 @@ keyword: | FLUSH_SYM {} | HANDLER_SYM {} | HELP_SYM {} + | INSTALL_SYM {} | LANGUAGE_SYM {} | NO_SYM {} | OPEN_SYM {} + | PARSER_SYM {} + | PARTITION_SYM {} + | PLUGIN_SYM {} | PREPARE_SYM {} | REPAIR {} | RESET_SYM {} @@ -7524,10 +8203,12 @@ keyword: | SECURITY_SYM {} | SIGNED_SYM {} | SLAVE {} + | SONAME_SYM {} | START_SYM {} | STOP_SYM {} | TRUNCATE_SYM {} | UNICODE_SYM {} + | UNINSTALL_SYM {} | XA_SYM {} ; @@ -7559,7 +8240,8 @@ keyword_sp: | CHANGED {} | CIPHER_SYM {} | CLIENT_SYM {} - | CODE_SYM {} + | COALESCE {} + | CODE_SYM {} | COLLATION_SYM {} | COLUMNS {} | COMMITTED_SYM {} @@ -7616,8 +8298,10 @@ keyword_sp: | RELAY_THREAD {} | LAST_SYM {} | LEAVES {} + | LESS_SYM {} | LEVEL_SYM {} | LINESTRING {} + | LIST_SYM {} | LOCAL_SYM {} | LOCKS_SYM {} | LOGS_SYM {} @@ -7641,6 +8325,7 @@ keyword_sp: | MAX_QUERIES_PER_HOUR {} | MAX_UPDATES_PER_HOUR {} | MAX_USER_CONNECTIONS_SYM {} + | MAX_VALUE_SYM {} | MEDIUM_SYM {} | MERGE_SYM {} | MICROSECOND_SYM {} @@ -7661,6 +8346,7 @@ keyword_sp: | NDBCLUSTER_SYM {} | NEXT_SYM {} | NEW_SYM {} + | NODEGROUP_SYM {} | NONE_SYM {} | NVARCHAR_SYM {} | OFFSET_SYM {} @@ -7669,6 +8355,7 @@ keyword_sp: | ONE_SYM {} | PACK_KEYS_SYM {} | PARTIAL {} + | PARTITIONS_SYM {} | PASSWORD {} | PHASE_SYM {} | POINT_SYM {} @@ -7690,6 +8377,7 @@ keyword_sp: | RELAY_LOG_FILE_SYM {} | RELAY_LOG_POS_SYM {} | RELOAD {} + | REORGANISE_SYM {} | REPEATABLE_SYM {} | REPLICATION {} | RESOURCES {} @@ -7719,6 +8407,8 @@ keyword_sp: | STRING_SYM {} | SUBDATE_SYM {} | SUBJECT_SYM {} + | SUBPARTITION_SYM {} + | SUBPARTITIONS_SYM {} | SUPER_SYM {} | SUSPEND_SYM {} | TABLES {} @@ -7726,6 +8416,7 @@ keyword_sp: | TEMPORARY {} | TEMPTABLE_SYM {} | TEXT_SYM {} + | THAN_SYM {} | TRANSACTION_SYM {} | TRIGGERS_SYM {} | TIMESTAMP {} @@ -7878,7 +8569,7 @@ sys_option_value: { LEX *lex=Lex; - if ($2.var == &trg_new_row_fake_var) + if ($2.var == trg_new_row_fake_var) { /* We are in trigger and assigning value to field of new row */ Item *it; @@ -8102,7 +8793,7 @@ internal_variable_name: YYABORT; } /* This special combination will denote field of NEW row */ - $$.var= &trg_new_row_fake_var; + $$.var= trg_new_row_fake_var; $$.base_name= $3; } else @@ -9213,4 +9904,19 @@ opt_migrate: | FOR_SYM MIGRATE_SYM { Lex->xa_opt=XA_FOR_MIGRATE; } ; +install: + INSTALL_SYM PLUGIN_SYM ident SONAME_SYM TEXT_STRING_sys + { + LEX *lex= Lex; + lex->sql_command= SQLCOM_INSTALL_PLUGIN; + lex->comment= $3; + lex->ident= $5; + }; +uninstall: + UNINSTALL_SYM PLUGIN_SYM ident + { + LEX *lex= Lex; + lex->sql_command= SQLCOM_UNINSTALL_PLUGIN; + lex->comment= $3; + }; diff --git a/sql/strfunc.cc b/sql/strfunc.cc index c822d10af46..2525703172f 100644 --- a/sql/strfunc.cc +++ b/sql/strfunc.cc @@ -235,3 +235,80 @@ uint check_word(TYPELIB *lib, const char *val, const char *end, *end_of_word= ptr; return res; } + + +/* + Converts a string between character sets + + SYNOPSIS + strconvert() + from_cs source character set + from source, a null terminated string + to destination buffer + to_length destination buffer length + + NOTES + 'to' is always terminated with a '\0' character. + If there is no enough space to convert whole string, + only prefix is converted, and terminated with '\0'. + + RETURN VALUES + result string length +*/ + + +uint strconvert(CHARSET_INFO *from_cs, const char *from, + CHARSET_INFO *to_cs, char *to, uint to_length, uint *errors) +{ + int cnvres; + my_wc_t wc; + char *to_start= to; + uchar *to_end= (uchar*) to + to_length - 1; + int (*mb_wc)(struct charset_info_st *, my_wc_t *, const uchar *, + const uchar *)= from_cs->cset->mb_wc; + int (*wc_mb)(struct charset_info_st *, my_wc_t, uchar *s, uchar *e)= + to_cs->cset->wc_mb; + uint error_count= 0; + + while (1) + { + /* + Using 'from + 10' is safe: + - it is enough to scan a single character in any character set. + - if remaining string is shorter than 10, then mb_wc will return + with error because of unexpected '\0' character. + */ + if ((cnvres= (*mb_wc)(from_cs, &wc, + (uchar*) from, (uchar*) from + 10)) > 0) + { + if (!wc) + break; + from+= cnvres; + } + else if (cnvres == MY_CS_ILSEQ) + { + error_count++; + from++; + wc= '?'; + } + else + break; // Impossible char. + +outp: + + if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0) + to+= cnvres; + else if (cnvres == MY_CS_ILUNI && wc != '?') + { + error_count++; + wc= '?'; + goto outp; + } + else + break; + } + *to= '\0'; + *errors= error_count; + return (uint32) (to - to_start); + +} diff --git a/sql/structs.h b/sql/structs.h index 9421ebdc2af..85af73794ae 100644 --- a/sql/structs.h +++ b/sql/structs.h @@ -87,6 +87,15 @@ typedef struct st_key { uint extra_length; uint usable_key_parts; /* Should normally be = key_parts */ enum ha_key_alg algorithm; + /* + Note that parser is used when the table is opened for use, and + parser_name is used when the table is being created. + */ + union + { + struct st_plugin_int *parser; /* Fulltext [pre]parser */ + LEX_STRING *parser_name; /* Fulltext [pre]parser name */ + }; KEY_PART_INFO *key_part; char *name; /* Name of key */ /* @@ -165,8 +174,8 @@ enum SHOW_TYPE { SHOW_UNDEF, SHOW_LONG, SHOW_LONGLONG, SHOW_INT, SHOW_CHAR, SHOW_CHAR_PTR, - SHOW_DOUBLE_STATUS, - SHOW_BOOL, SHOW_MY_BOOL, SHOW_OPENTABLES, SHOW_STARTTIME, SHOW_QUESTION, + SHOW_DOUBLE_STATUS, SHOW_BOOL, SHOW_MY_BOOL, + SHOW_OPEN_TABLES, SHOW_TABLE_DEFINITIONS, SHOW_STARTTIME, SHOW_QUESTION, SHOW_LONG_CONST, SHOW_INT_CONST, SHOW_HAVE, SHOW_SYS, SHOW_HA_ROWS, SHOW_VARS, #ifdef HAVE_OPENSSL diff --git a/sql/table.cc b/sql/table.cc index fc75568b615..f1b1c00f87c 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -24,134 +24,355 @@ /* Functions defined in this file */ -static void frm_error(int error,TABLE *form,const char *name, - int errortype, int errarg); +void open_table_error(TABLE_SHARE *share, int error, int db_errno, + myf errortype, int errarg); +static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head, + File file); static void fix_type_pointers(const char ***array, TYPELIB *point_to_type, uint types, char **names); -static uint find_field(TABLE *form,uint start,uint length); +static uint find_field(Field **fields, uint start, uint length); -static byte* get_field_name(Field **buff,uint *length, +/* Get column name from column hash */ + +static byte *get_field_name(Field **buff, uint *length, my_bool not_used __attribute__((unused))) { *length= (uint) strlen((*buff)->field_name); return (byte*) (*buff)->field_name; } + + +/* + Returns pointer to '.frm' extension of the file name. + + SYNOPSIS + fn_rext() + name file name + + DESCRIPTION + Checks file name part starting with the rightmost '.' character, + and returns it if it is equal to '.frm'. + + TODO + It is a good idea to get rid of this function modifying the code + to garantee that the functions presently calling fn_rext() always + get arguments in the same format: either with '.frm' or without '.frm'. + + RETURN VALUES + Pointer to the '.frm' extension. If there is no extension, + or extension is not '.frm', pointer at the end of file name. +*/ + +char *fn_rext(char *name) +{ + char *res= strrchr(name, '.'); + if (res && !strcmp(res, ".frm")) + return res; + return name + strlen(name); +} + + +/* + Allocate a setup TABLE_SHARE structure + + SYNOPSIS + alloc_table_share() + TABLE_LIST Take database and table name from there + key Table cache key (db \0 table_name \0...) + key_length Length of key + + RETURN + 0 Error (out of memory) + # Share +*/ + +TABLE_SHARE *alloc_table_share(TABLE_LIST *table_list, char *key, + uint key_length) +{ + MEM_ROOT mem_root; + TABLE_SHARE *share; + char path[FN_REFLEN], normalized_path[FN_REFLEN]; + uint path_length, normalized_length; + + path_length= build_table_filename(path, sizeof(path) - 1, + table_list->db, + table_list->table_name, ""); + normalized_length= build_table_filename(normalized_path, + sizeof(normalized_path) - 1, + table_list->db, + table_list->table_name, ""); + + init_sql_alloc(&mem_root, TABLE_ALLOC_BLOCK_SIZE, 0); + if ((share= (TABLE_SHARE*) alloc_root(&mem_root, + sizeof(*share) + key_length + + path_length + normalized_length +2))) + { + bzero((char*) share, sizeof(*share)); + share->table_cache_key.str= (char*) (share+1); + share->table_cache_key.length= key_length; + memcpy(share->table_cache_key.str, key, key_length); + + /* Use the fact the key is db/0/table_name/0 */ + share->db.str= share->table_cache_key.str; + share->db.length= strlen(share->db.str); + share->table_name.str= share->db.str + share->db.length + 1; + share->table_name.length= strlen(share->table_name.str); + + share->path.str= share->table_cache_key.str+ key_length; + share->path.length= path_length; + strmov(share->path.str, path); + share->normalized_path.str= share->path.str+ path_length+1; + share->normalized_path.length= normalized_length; + strmov(share->normalized_path.str, normalized_path); + + share->version= refresh_version; + share->flush_version= flush_version; + + memcpy((char*) &share->mem_root, (char*) &mem_root, sizeof(mem_root)); + pthread_mutex_init(&share->mutex, MY_MUTEX_INIT_FAST); + pthread_cond_init(&share->cond, NULL); + } + return share; +} + + /* - Open a .frm file + Initialize share for temporary tables + + SYNOPSIS + init_tmp_table_share() + share Share to fill + key Table_cache_key, as generated from create_table_def_key. + must start with db name. + key_length Length of key + table_name Table name + path Path to file (possible in lower case) without .frm + + NOTES + This is different from alloc_table_share() because temporary tables + don't have to be shared between threads or put into the table def + cache, so we can do some things notable simpler and faster + + If table is not put in thd->temporary_tables (happens only when + one uses OPEN TEMPORARY) then one can specify 'db' as key and + use key_length= 0 as neither table_cache_key or key_length will be used). +*/ + +void init_tmp_table_share(TABLE_SHARE *share, const char *key, + uint key_length, const char *table_name, + const char *path) +{ + DBUG_ENTER("init_tmp_table_share"); + + bzero((char*) share, sizeof(*share)); + init_sql_alloc(&share->mem_root, TABLE_ALLOC_BLOCK_SIZE, 0); + share->tmp_table= INTERNAL_TMP_TABLE; + share->db.str= (char*) key; + share->db.length= strlen(key); + share->table_cache_key.str= (char*) key; + share->table_cache_key.length= key_length; + share->table_name.str= (char*) table_name; + share->table_name.length= strlen(table_name); + share->path.str= (char*) path; + share->normalized_path.str= (char*) path; + share->path.length= share->normalized_path.length= strlen(path); + share->frm_version= FRM_VER_TRUE_VARCHAR; + + DBUG_VOID_RETURN; +} + + +/* + Free table share and memory used by it + + SYNOPSIS + free_table_share() + share Table share + + NOTES + share->mutex must be locked when we come here if it's not a temp table +*/ + +void free_table_share(TABLE_SHARE *share) +{ + MEM_ROOT mem_root; + DBUG_ENTER("free_table_share"); + DBUG_PRINT("enter", ("table: %s.%s", share->db.str, share->table_name.str)); + DBUG_ASSERT(share->ref_count == 0); + + /* + If someone is waiting for this to be deleted, inform it about this. + Don't do a delete until we know that no one is refering to this anymore. + */ + if (share->tmp_table == NO_TMP_TABLE) + { + /* share->mutex is locked in release_table_share() */ + while (share->waiting_on_cond) + { + pthread_cond_broadcast(&share->cond); + pthread_cond_wait(&share->cond, &share->mutex); + } + /* No thread refers to this anymore */ + pthread_mutex_unlock(&share->mutex); + pthread_mutex_destroy(&share->mutex); + pthread_cond_destroy(&share->cond); + } + hash_free(&share->name_hash); + /* We must copy mem_root from share because share is allocated through it */ + memcpy((char*) &mem_root, (char*) &share->mem_root, sizeof(mem_root)); + free_root(&mem_root, MYF(0)); // Free's share + DBUG_VOID_RETURN; +} + + +/* + Read table definition from a binary / text based .frm file + SYNOPSIS - openfrm() + open_table_def() + thd Thread handler + share Fill this with table definition + db_flags Bit mask of the following flags: OPEN_VIEW - name path to table-file "db/name" - alias alias for table - db_stat open flags (for example HA_OPEN_KEYFILE|HA_OPEN_RNDFILE..) - can be 0 (example in ha_example_table) - prgflag READ_ALL etc.. - ha_open_flags HA_OPEN_ABORT_IF_LOCKED etc.. - outparam result table + NOTES + This function is called when the table definition is not cached in + table_def_cache + The data is returned in 'share', which is alloced by + alloc_table_share().. The code assumes that share is initialized. RETURN VALUES 0 ok - 1 Error (see frm_error) - 2 Error (see frm_error) + 1 Error (see open_table_error) + 2 Error (see open_table_error) 3 Wrong data in .frm file - 4 Error (see frm_error) - 5 Error (see frm_error: charset unavailable) + 4 Error (see open_table_error) + 5 Error (see open_table_error: charset unavailable) 6 Unknown .frm version */ -int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, - uint prgflag, uint ha_open_flags, TABLE *outparam) -{ - reg1 uint i; - reg2 uchar *strpos; - int j,error, errarg= 0; - uint rec_buff_length,n_length,int_length,records,key_parts,keys, - interval_count,interval_parts,read_length,db_create_options; - uint key_info_length, com_length; - ulong pos, record_offset; - char index_file[FN_REFLEN], *names, *keynames, *comment_pos; - uchar head[288],*disk_buff,new_field_pack_flag; - my_string record; - const char **int_array; - bool use_hash, null_field_first; - bool error_reported= FALSE; - File file; - Field **field_ptr,*reg_field; - KEY *keyinfo; - KEY_PART_INFO *key_part; - uchar *null_pos; - uint null_bit_pos, new_frm_ver, field_pack_length; - SQL_CRYPT *crypted=0; +int open_table_def(THD *thd, TABLE_SHARE *share, uint db_flags) +{ + int error, table_type; + bool error_given; + File file; + uchar head[288], *disk_buff; + char path[FN_REFLEN]; MEM_ROOT **root_ptr, *old_root; - TABLE_SHARE *share; - DBUG_ENTER("openfrm"); - DBUG_PRINT("enter",("name: '%s' form: 0x%lx",name,outparam)); + DBUG_ENTER("open_table_def"); + DBUG_PRINT("enter", ("name: '%s.%s'",share->db.str, share->table_name.str)); error= 1; + error_given= 0; disk_buff= NULL; - root_ptr= my_pthread_getspecific_ptr(MEM_ROOT**, THR_MALLOC); - old_root= *root_ptr; - - bzero((char*) outparam,sizeof(*outparam)); - outparam->in_use= thd; - outparam->s= share= &outparam->share_not_to_be_used; - if ((file=my_open(fn_format(index_file, name, "", reg_ext, - MY_UNPACK_FILENAME), - O_RDONLY | O_SHARE, - MYF(0))) - < 0) - goto err; + strxmov(path, share->normalized_path.str, reg_ext, NullS); + if ((file= my_open(path, O_RDONLY | O_SHARE, MYF(0))) < 0) + goto err_not_open; error= 4; - if (my_read(file,(byte*) head,64,MYF(MY_NABP))) + if (my_read(file,(byte*) head, 64, MYF(MY_NABP))) goto err; - if (memcmp(head, STRING_WITH_LEN("TYPE=")) == 0) + if (head[0] == (uchar) 254 && head[1] == 1) { - // new .frm - my_close(file,MYF(MY_WME)); - - if (db_stat & NO_ERR_ON_NEW_FRM) - DBUG_RETURN(5); - file= -1; - // caller can't process new .frm + if (head[2] == FRM_VER || head[2] == FRM_VER+1 || + (head[2] >= FRM_VER+3 && head[2] <= FRM_VER+4)) + table_type= 1; + else + { + error= 6; // Unkown .frm version + goto err; + } + } + else if (memcmp(head, STRING_WITH_LEN("TYPE=")) == 0) + { + error= 5; + if (memcmp(head+5,"VIEW",4) == 0) + { + share->is_view= 1; + if (db_flags & OPEN_VIEW) + error= 0; + } goto err; } + else + goto err; + + /* No handling of text based files yet */ + if (table_type == 1) + { + root_ptr= my_pthread_getspecific_ptr(MEM_ROOT**, THR_MALLOC); + old_root= *root_ptr; + *root_ptr= &share->mem_root; + error= open_binary_frm(thd, share, head, file); + *root_ptr= old_root; - share->blob_ptr_size= sizeof(char*); - outparam->db_stat= db_stat; - init_sql_alloc(&outparam->mem_root, TABLE_ALLOC_BLOCK_SIZE, 0); - *root_ptr= &outparam->mem_root; + /* + We can't mark all tables in 'mysql' database as system since we don't + allow to lock such tables for writing with any other tables (even with + other system tables) and some privilege tables need this. + */ + if (share->db.length == 5 && + !my_strcasecmp(system_charset_info, share->db.str, "mysql") && + !my_strcasecmp(system_charset_info, share->table_name.str, "proc")) + share->system_table= 1; + error_given= 1; + } - share->table_name= strdup_root(&outparam->mem_root, - name+dirname_length(name)); - share->path= strdup_root(&outparam->mem_root, name); - outparam->alias= my_strdup(alias, MYF(MY_WME)); - if (!share->table_name || !share->path || !outparam->alias) - goto err; - *fn_ext(share->table_name)='\0'; // Remove extension - *fn_ext(share->path)='\0'; // Remove extension + if (!error) + thd->status_var.opened_shares++; - if (head[0] != (uchar) 254 || head[1] != 1) - goto err; /* purecov: inspected */ - if (head[2] != FRM_VER && head[2] != FRM_VER+1 && - ! (head[2] >= FRM_VER+3 && head[2] <= FRM_VER+4)) +err: + my_close(file, MYF(MY_WME)); + +err_not_open: + if (error && !error_given) { - error= 6; - goto err; /* purecov: inspected */ + share->error= error; + open_table_error(share, error, (share->open_errno= my_errno), 0); } - new_field_pack_flag=head[27]; + DBUG_RETURN(error); +} + + +/* + Read data from a binary .frm file from MySQL 3.23 - 5.0 into TABLE_SHARE +*/ + +static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head, + File file) +{ + int error, errarg= 0; + uint new_frm_ver, field_pack_length, new_field_pack_flag; + uint interval_count, interval_parts, read_length, int_length; + uint db_create_options, keys, key_parts, n_length; + uint key_info_length, com_length, null_bit_pos; + uint extra_rec_buf_length; + uint i,j; + bool use_hash; + char *keynames, *record, *names, *comment_pos; + uchar *disk_buff, *strpos, *null_flags, *null_pos; + ulong pos, record_offset, *rec_per_key, rec_buff_length; + handler *handler_file= 0; + KEY *keyinfo; + KEY_PART_INFO *key_part; + SQL_CRYPT *crypted=0; + Field **field_ptr, *reg_field; + const char **interval_array; + enum legacy_db_type legacy_db_type; + DBUG_ENTER("open_binary_frm"); + + new_field_pack_flag= head[27]; new_frm_ver= (head[2] - FRM_VER); field_pack_length= new_frm_ver < 2 ? 11 : 17; + disk_buff= 0; - error=3; + error= 3; if (!(pos=get_form_pos(file,head,(TYPELIB*) 0))) goto err; /* purecov: inspected */ - *fn_ext(index_file)='\0'; // Remove .frm extension share->frm_version= head[2]; /* @@ -163,11 +384,16 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, if (share->frm_version == FRM_VER_TRUE_VARCHAR -1 && head[33] == 5) share->frm_version= FRM_VER_TRUE_VARCHAR; - share->db_type= ha_checktype(thd,(enum db_type) (uint) *(head+3),0,0); - share->db_create_options= db_create_options=uint2korr(head+30); +#ifdef WITH_PARTITION_STORAGE_ENGINE + share->default_part_db_type= + ha_checktype(thd, (enum legacy_db_type) (uint) *(head+61), 0, 0); +#endif + legacy_db_type= (enum legacy_db_type) (uint) *(head+3); + share->db_type= ha_checktype(thd, legacy_db_type, 0, 0); + share->db_create_options= db_create_options= uint2korr(head+30); share->db_options_in_use= share->db_create_options; share->mysql_version= uint4korr(head+51); - null_field_first= 0; + share->null_field_first= 0; if (!head[32]) // New frm file in 3.23 { share->avg_row_length= uint4korr(head+34); @@ -176,7 +402,7 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, share->raid_chunks= head[42]; share->raid_chunksize= uint4korr(head+43); share->table_charset= get_charset((uint) head[38],MYF(0)); - null_field_first= 1; + share->null_field_first= 1; } if (!share->table_charset) { @@ -187,7 +413,7 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, sql_print_warning("'%s' had no or invalid character set, " "and default character set is multi-byte, " "so character column sizes may have changed", - name); + share->path); } share->table_charset= default_charset_info; } @@ -195,7 +421,7 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, if (db_create_options & HA_OPTION_LONG_BLOB_PTR) share->blob_ptr_size= portable_sizeof_char_ptr; /* Set temporarily a good value for db_low_byte_first */ - share->db_low_byte_first= test(share->db_type != DB_TYPE_ISAM); + share->db_low_byte_first= test(legacy_db_type != DB_TYPE_ISAM); error=4; share->max_rows= uint4korr(head+18); share->min_rows= uint4korr(head+22); @@ -217,27 +443,23 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, } share->keys_for_keyread.init(0); share->keys_in_use.init(keys); - outparam->quick_keys.init(); - outparam->used_keys.init(); - outparam->keys_in_use_for_query.init(); n_length=keys*sizeof(KEY)+key_parts*sizeof(KEY_PART_INFO); - if (!(keyinfo = (KEY*) alloc_root(&outparam->mem_root, - n_length+uint2korr(disk_buff+4)))) + if (!(keyinfo = (KEY*) alloc_root(&share->mem_root, + n_length + uint2korr(disk_buff+4)))) goto err; /* purecov: inspected */ bzero((char*) keyinfo,n_length); - outparam->key_info=keyinfo; + share->key_info= keyinfo; key_part= my_reinterpret_cast(KEY_PART_INFO*) (keyinfo+keys); strpos=disk_buff+6; - ulong *rec_per_key; - if (!(rec_per_key= (ulong*) alloc_root(&outparam->mem_root, + if (!(rec_per_key= (ulong*) alloc_root(&share->mem_root, sizeof(ulong*)*key_parts))) goto err; for (i=0 ; i < keys ; i++, keyinfo++) { - keyinfo->table= outparam; + keyinfo->table= 0; // Updated in open_frm if (new_frm_ver >= 3) { keyinfo->flags= (uint) uint2korr(strpos) ^ HA_NOSAME; @@ -293,10 +515,8 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, #ifdef HAVE_CRYPTED_FRM else if (*(head+26) == 2) { - *root_ptr= old_root - crypted=get_crypt_for_frm(); - *root_ptr= &outparam->mem_root; - outparam->crypted=1; + crypted= get_crypt_for_frm(); + share->crypted= 1; } #endif @@ -308,9 +528,9 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, { /* Read extra data segment */ char *buff, *next_chunk, *buff_end; + DBUG_PRINT("info", ("extra segment size is %u bytes", n_length)); if (!(next_chunk= buff= my_malloc(n_length, MYF(MY_WME)))) goto err; - buff_end= buff + n_length; if (my_pread(file, (byte*)buff, n_length, record_offset + share->reclength, MYF(MY_NABP))) { @@ -318,78 +538,105 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, goto err; } share->connect_string.length= uint2korr(buff); - if (! (share->connect_string.str= strmake_root(&outparam->mem_root, + if (! (share->connect_string.str= strmake_root(&share->mem_root, next_chunk + 2, share->connect_string.length))) { my_free(buff, MYF(0)); goto err; } next_chunk+= share->connect_string.length + 2; + buff_end= buff + n_length; if (next_chunk + 2 < buff_end) { uint str_db_type_length= uint2korr(next_chunk); - share->db_type= ha_resolve_by_name(next_chunk + 2, str_db_type_length); - DBUG_PRINT("enter", ("Setting dbtype to: %d - %d - '%.*s'\n", - share->db_type, - str_db_type_length, str_db_type_length, - next_chunk + 2)); + LEX_STRING name= { next_chunk + 2, str_db_type_length }; + handlerton *tmp_db_type= ha_resolve_by_name(thd, &name); + if (tmp_db_type != NULL) + { + share->db_type= tmp_db_type; + DBUG_PRINT("info", ("setting dbtype to '%.*s' (%d)", + str_db_type_length, next_chunk + 2, + ha_legacy_type(share->db_type))); + } +#ifdef WITH_PARTITION_STORAGE_ENGINE + else + { + if (!strncmp(next_chunk + 2, "partition", str_db_type_length)) + { + /* Use partition handler */ + share->db_type= &partition_hton; + DBUG_PRINT("info", ("setting dbtype to '%.*s' (%d)", + str_db_type_length, next_chunk + 2, + ha_legacy_type(share->db_type))); + } + } +#endif next_chunk+= str_db_type_length + 2; } + if (next_chunk + 5 < buff_end) + { + uint32 partition_info_len = uint4korr(next_chunk); +#ifdef WITH_PARTITION_STORAGE_ENGINE + if ((share->partition_info_len= partition_info_len)) + { + if (!(share->partition_info= + (uchar*) memdup_root(&share->mem_root, next_chunk + 4, + partition_info_len + 1))) + { + my_free(buff, MYF(0)); + goto err; + } + next_chunk++; + } +#else + if (partition_info_len) + { + DBUG_PRINT("info", ("WITH_PARTITION_STORAGE_ENGINE is not defined")); + my_free(buff, MYF(0)); + goto err; + } +#endif + next_chunk+= 5 + partition_info_len; + } + keyinfo= share->key_info; + for (i= 0; i < keys; i++, keyinfo++) + { + if (keyinfo->flags & HA_USES_PARSER) + { + LEX_STRING parser_name; + if (next_chunk >= buff_end) + { + DBUG_PRINT("error", + ("fulltext key uses parser that is not defined in .frm")); + my_free(buff, MYF(0)); + goto err; + } + parser_name.str= next_chunk; + parser_name.length= strlen(next_chunk); + keyinfo->parser= plugin_lock(&parser_name, MYSQL_FTPARSER_PLUGIN); + if (! keyinfo->parser) + { + my_free(buff, MYF(0)); + my_error(ER_PLUGIN_IS_NOT_LOADED, MYF(0), parser_name.str); + goto err; + } + } + } my_free(buff, MYF(0)); } - /* Allocate handler */ - if (!(outparam->file= get_new_handler(outparam, &outparam->mem_root, - share->db_type))) - goto err; error=4; - outparam->reginfo.lock_type= TL_UNLOCK; - outparam->current_lock=F_UNLCK; - if ((db_stat & HA_OPEN_KEYFILE) || (prgflag & DELAYED_OPEN)) - records=2; - else - records=1; - if (prgflag & (READ_ALL+EXTRA_RECORD)) - records++; - /* QQ: TODO, remove the +1 from below */ - rec_buff_length= ALIGN_SIZE(share->reclength + 1 + - outparam->file->extra_rec_buf_length()); + extra_rec_buf_length= uint2korr(head+59); + rec_buff_length= ALIGN_SIZE(share->reclength + 1 + extra_rec_buf_length); share->rec_buff_length= rec_buff_length; - if (!(record= (char *) alloc_root(&outparam->mem_root, - rec_buff_length * records))) + if (!(record= (char *) alloc_root(&share->mem_root, + rec_buff_length))) goto err; /* purecov: inspected */ share->default_values= (byte *) record; - if (my_pread(file,(byte*) record, (uint) share->reclength, record_offset, MYF(MY_NABP))) - goto err; /* purecov: inspected */ + goto err; /* purecov: inspected */ - if (records == 1) - { - /* We are probably in hard repair, and the buffers should not be used */ - outparam->record[0]= outparam->record[1]= share->default_values; - } - else - { - outparam->record[0]= (byte *) record+ rec_buff_length; - if (records > 2) - outparam->record[1]= (byte *) record+ rec_buff_length*2; - else - outparam->record[1]= outparam->record[0]; // Safety - } - -#ifdef HAVE_purify - /* - We need this because when we read var-length rows, we are not updating - bytes after end of varchar - */ - if (records > 1) - { - memcpy(outparam->record[0], share->default_values, rec_buff_length); - if (records > 2) - memcpy(outparam->record[1], share->default_values, rec_buff_length); - } -#endif VOID(my_seek(file,pos,MY_SEEK_SET,MYF(0))); if (my_read(file,(byte*) head,288,MYF(MY_NABP))) goto err; @@ -410,12 +657,12 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, int_length= uint2korr(head+274); share->null_fields= uint2korr(head+282); com_length= uint2korr(head+284); - share->comment= strdup_root(&outparam->mem_root, (char*) head+47); + share->comment= strdup_root(&share->mem_root, (char*) head+47); DBUG_PRINT("info",("i_count: %d i_parts: %d index: %d n_length: %d int_length: %d com_length: %d", interval_count,interval_parts, share->keys,n_length,int_length, com_length)); if (!(field_ptr = (Field **) - alloc_root(&outparam->mem_root, + alloc_root(&share->mem_root, (uint) ((share->fields+1)*sizeof(Field*)+ interval_count*sizeof(TYPELIB)+ (share->fields+interval_parts+ @@ -423,7 +670,7 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, (n_length+int_length+com_length))))) goto err; /* purecov: inspected */ - outparam->field=field_ptr; + share->field= field_ptr; read_length=(uint) (share->fields * field_pack_length + pos+ (uint) (n_length+int_length+com_length)); if (read_string(file,(gptr*) &disk_buff,read_length)) @@ -439,8 +686,8 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, strpos= disk_buff+pos; share->intervals= (TYPELIB*) (field_ptr+share->fields+1); - int_array= (const char **) (share->intervals+interval_count); - names= (char*) (int_array+share->fields+interval_parts+keys+3); + interval_array= (const char **) (share->intervals+interval_count); + names= (char*) (interval_array+share->fields+interval_parts+keys+3); if (!interval_count) share->intervals= 0; // For better debugging memcpy((char*) names, strpos+(share->fields*field_pack_length), @@ -448,8 +695,8 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, comment_pos= names+(n_length+int_length); memcpy(comment_pos, disk_buff+read_length-com_length, com_length); - fix_type_pointers(&int_array, &share->fieldnames, 1, &names); - fix_type_pointers(&int_array, share->intervals, interval_count, + fix_type_pointers(&interval_array, &share->fieldnames, 1, &names); + fix_type_pointers(&interval_array, share->intervals, interval_count, &names); { @@ -460,7 +707,7 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, interval++) { uint count= (uint) (interval->count + 1) * sizeof(uint); - if (!(interval->type_lengths= (uint *) alloc_root(&outparam->mem_root, + if (!(interval->type_lengths= (uint *) alloc_root(&share->mem_root, count))) goto err; for (count= 0; count < interval->count; count++) @@ -470,14 +717,17 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, } if (keynames) - fix_type_pointers(&int_array, &share->keynames, 1, &keynames); - VOID(my_close(file,MYF(MY_WME))); - file= -1; + fix_type_pointers(&interval_array, &share->keynames, 1, &keynames); + + /* Allocate handler */ + if (!(handler_file= get_new_handler(share, thd->mem_root, + share->db_type))) + goto err; - record= (char*) outparam->record[0]-1; /* Fieldstart = 1 */ - if (null_field_first) + record= (char*) share->default_values-1; /* Fieldstart = 1 */ + if (share->null_field_first) { - outparam->null_flags=null_pos=(uchar*) record+1; + null_flags= null_pos= (uchar*) record+1; null_bit_pos= (db_create_options & HA_OPTION_PACK_RECORD) ? 0 : 1; /* null_bytes below is only correct under the condition that @@ -486,13 +736,15 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, */ share->null_bytes= (share->null_fields + null_bit_pos + 7) / 8; } +#ifndef WE_WANT_TO_SUPPORT_VERY_OLD_FRM_FILES else { share->null_bytes= (share->null_fields+7)/8; - outparam->null_flags= null_pos= - (uchar*) (record+1+share->reclength-share->null_bytes); + null_flags= null_pos= (uchar*) (record + 1 +share->reclength - + share->null_bytes); null_bit_pos= 0; } +#endif use_hash= share->fields >= MAX_FIELDS_BEFORE_HASH; if (use_hash) @@ -607,16 +859,23 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, field_length= my_decimal_precision_to_length(field_length, decimals, f_is_dec(pack_flag) == 0); - sql_print_error("Found incompatible DECIMAL field '%s' in %s; Please do \"ALTER TABLE '%s' FORCE\" to fix it!", share->fieldnames.type_names[i], name, share->table_name); + sql_print_error("Found incompatible DECIMAL field '%s' in %s; " + "Please do \"ALTER TABLE '%s' FORCE\" to fix it!", + share->fieldnames.type_names[i], share->table_name.str, + share->table_name.str); push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_ERROR, ER_CRASHED_ON_USAGE, - "Found incompatible DECIMAL field '%s' in %s; Please do \"ALTER TABLE '%s' FORCE\" to fix it!", share->fieldnames.type_names[i], name, share->table_name); + "Found incompatible DECIMAL field '%s' in %s; " + "Please do \"ALTER TABLE '%s' FORCE\" to fix it!", + share->fieldnames.type_names[i], + share->table_name.str, + share->table_name.str); share->crashed= 1; // Marker for CHECK TABLE } #endif - *field_ptr=reg_field= - make_field(record+recpos, + *field_ptr= reg_field= + make_field(share, record+recpos, (uint32) field_length, null_pos, null_bit_pos, pack_flag, @@ -627,14 +886,14 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, (interval_nr ? share->intervals+interval_nr-1 : (TYPELIB*) 0), - share->fieldnames.type_names[i], - outparam); + share->fieldnames.type_names[i]); if (!reg_field) // Not supported field type { error= 4; goto err; /* purecov: inspected */ } + reg_field->fieldnr= i+1; //Set field number reg_field->field_index= i; reg_field->comment=comment; if (field_type == FIELD_TYPE_BIT && !f_bit_as_char(pack_flag)) @@ -652,12 +911,15 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, } if (f_no_default(pack_flag)) reg_field->flags|= NO_DEFAULT_VALUE_FLAG; + if (reg_field->unireg_check == Field::NEXT_NUMBER) - outparam->found_next_number_field= reg_field; - if (outparam->timestamp_field == reg_field) + share->found_next_number_field= field_ptr; + if (share->timestamp_field == reg_field) share->timestamp_field_offset= i; + if (use_hash) - (void) my_hash_insert(&share->name_hash,(byte*) field_ptr); // never fail + (void) my_hash_insert(&share->name_hash, + (byte*) field_ptr); // never fail } *field_ptr=0; // End marker @@ -666,17 +928,17 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, { uint primary_key=(uint) (find_type((char*) primary_key_name, &share->keynames, 3) - 1); - uint ha_option=outparam->file->table_flags(); - keyinfo=outparam->key_info; - key_part=keyinfo->key_part; + uint ha_option= handler_file->table_flags(); + keyinfo= share->key_info; + key_part= keyinfo->key_part; for (uint key=0 ; key < share->keys ; key++,keyinfo++) { - uint usable_parts=0; + uint usable_parts= 0; keyinfo->name=(char*) share->keynames.type_names[key]; /* Fix fulltext keys for old .frm files */ - if (outparam->key_info[key].flags & HA_FULLTEXT) - outparam->key_info[key].algorithm= HA_KEY_ALG_FULLTEXT; + if (share->key_info[key].flags & HA_FULLTEXT) + share->key_info[key].algorithm= HA_KEY_ALG_FULLTEXT; if (primary_key >= MAX_KEY && (keyinfo->flags & HA_NOSAME)) { @@ -689,8 +951,8 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, { uint fieldnr= key_part[i].fieldnr; if (!fieldnr || - outparam->field[fieldnr-1]->null_ptr || - outparam->field[fieldnr-1]->key_length() != + share->field[fieldnr-1]->null_ptr || + share->field[fieldnr-1]->key_length() != key_part[i].length) { primary_key=MAX_KEY; // Can't be used @@ -701,129 +963,123 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, for (i=0 ; i < keyinfo->key_parts ; key_part++,i++) { + Field *field; if (new_field_pack_flag <= 1) - key_part->fieldnr=(uint16) find_field(outparam, - (uint) key_part->offset, - (uint) key_part->length); -#ifdef EXTRA_DEBUG - if (key_part->fieldnr > share->fields) - goto err; // sanity check -#endif - if (key_part->fieldnr) - { // Should always be true ! - Field *field=key_part->field=outparam->field[key_part->fieldnr-1]; - if (field->null_ptr) - { - key_part->null_offset=(uint) ((byte*) field->null_ptr - - outparam->record[0]); - key_part->null_bit= field->null_bit; - key_part->store_length+=HA_KEY_NULL_LENGTH; - keyinfo->flags|=HA_NULL_PART_KEY; - keyinfo->extra_length+= HA_KEY_NULL_LENGTH; - keyinfo->key_length+= HA_KEY_NULL_LENGTH; - } - if (field->type() == FIELD_TYPE_BLOB || - field->real_type() == MYSQL_TYPE_VARCHAR) - { - if (field->type() == FIELD_TYPE_BLOB) - key_part->key_part_flag|= HA_BLOB_PART; - else - key_part->key_part_flag|= HA_VAR_LENGTH_PART; - keyinfo->extra_length+=HA_KEY_BLOB_LENGTH; - key_part->store_length+=HA_KEY_BLOB_LENGTH; - keyinfo->key_length+= HA_KEY_BLOB_LENGTH; - /* - Mark that there may be many matching values for one key - combination ('a', 'a ', 'a '...) - */ - if (!(field->flags & BINARY_FLAG)) - keyinfo->flags|= HA_END_SPACE_KEY; - } - if (field->type() == MYSQL_TYPE_BIT) - key_part->key_part_flag|= HA_BIT_PART; - - if (i == 0 && key != primary_key) - field->flags |= ((keyinfo->flags & HA_NOSAME) && - (keyinfo->key_parts == 1)) ? - UNIQUE_KEY_FLAG : MULTIPLE_KEY_FLAG; - if (i == 0) - field->key_start.set_bit(key); - if (field->key_length() == key_part->length && - !(field->flags & BLOB_FLAG)) - { - if (outparam->file->index_flags(key, i, 0) & HA_KEYREAD_ONLY) - { - share->keys_for_keyread.set_bit(key); - field->part_of_key.set_bit(key); - } - if (outparam->file->index_flags(key, i, 1) & HA_READ_ORDER) - field->part_of_sortkey.set_bit(key); - } - if (!(key_part->key_part_flag & HA_REVERSE_SORT) && - usable_parts == i) - usable_parts++; // For FILESORT - field->flags|= PART_KEY_FLAG; - if (key == primary_key) - { - field->flags|= PRI_KEY_FLAG; - /* - If this field is part of the primary key and all keys contains - the primary key, then we can use any key to find this column - */ - if (ha_option & HA_PRIMARY_KEY_IN_READ_INDEX) - field->part_of_key= share->keys_in_use; - } - if (field->key_length() != key_part->length) - { + key_part->fieldnr= (uint16) find_field(share->field, + (uint) key_part->offset, + (uint) key_part->length); + if (!key_part->fieldnr) + { + error= 4; // Wrong file + goto err; + } + field= key_part->field= share->field[key_part->fieldnr-1]; + if (field->null_ptr) + { + key_part->null_offset=(uint) ((byte*) field->null_ptr - + share->default_values); + key_part->null_bit= field->null_bit; + key_part->store_length+=HA_KEY_NULL_LENGTH; + keyinfo->flags|=HA_NULL_PART_KEY; + keyinfo->extra_length+= HA_KEY_NULL_LENGTH; + keyinfo->key_length+= HA_KEY_NULL_LENGTH; + } + if (field->type() == FIELD_TYPE_BLOB || + field->real_type() == MYSQL_TYPE_VARCHAR) + { + if (field->type() == FIELD_TYPE_BLOB) + key_part->key_part_flag|= HA_BLOB_PART; + else + key_part->key_part_flag|= HA_VAR_LENGTH_PART; + keyinfo->extra_length+=HA_KEY_BLOB_LENGTH; + key_part->store_length+=HA_KEY_BLOB_LENGTH; + keyinfo->key_length+= HA_KEY_BLOB_LENGTH; + /* + Mark that there may be many matching values for one key + combination ('a', 'a ', 'a '...) + */ + if (!(field->flags & BINARY_FLAG)) + keyinfo->flags|= HA_END_SPACE_KEY; + } + if (field->type() == MYSQL_TYPE_BIT) + key_part->key_part_flag|= HA_BIT_PART; + + if (i == 0 && key != primary_key) + field->flags |= (((keyinfo->flags & HA_NOSAME) && + (keyinfo->key_parts == 1)) ? + UNIQUE_KEY_FLAG : MULTIPLE_KEY_FLAG); + if (i == 0) + field->key_start.set_bit(key); + if (field->key_length() == key_part->length && + !(field->flags & BLOB_FLAG)) + { + if (handler_file->index_flags(key, i, 0) & HA_KEYREAD_ONLY) + { + share->keys_for_keyread.set_bit(key); + field->part_of_key.set_bit(key); + } + if (handler_file->index_flags(key, i, 1) & HA_READ_ORDER) + field->part_of_sortkey.set_bit(key); + } + if (!(key_part->key_part_flag & HA_REVERSE_SORT) && + usable_parts == i) + usable_parts++; // For FILESORT + field->flags|= PART_KEY_FLAG; + if (key == primary_key) + { + field->flags|= PRI_KEY_FLAG; + /* + If this field is part of the primary key and all keys contains + the primary key, then we can use any key to find this column + */ + if (ha_option & HA_PRIMARY_KEY_IN_READ_INDEX) + field->part_of_key= share->keys_in_use; + } + if (field->key_length() != key_part->length) + { #ifndef TO_BE_DELETED_ON_PRODUCTION - if (field->type() == FIELD_TYPE_NEWDECIMAL) - { - /* - Fix a fatal error in decimal key handling that causes crashes - on Innodb. We fix it by reducing the key length so that - InnoDB never gets a too big key when searching. - This allows the end user to do an ALTER TABLE to fix the - error. - */ - keyinfo->key_length-= (key_part->length - field->key_length()); - key_part->store_length-= (uint16)(key_part->length - - field->key_length()); - key_part->length= (uint16)field->key_length(); - sql_print_error("Found wrong key definition in %s; Please do \"ALTER TABLE '%s' FORCE \" to fix it!", name, share->table_name); - push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_ERROR, - ER_CRASHED_ON_USAGE, - "Found wrong key definition in %s; Please do \"ALTER TABLE '%s' FORCE\" to fix it!", name, share->table_name); - - share->crashed= 1; // Marker for CHECK TABLE - goto to_be_deleted; - } + if (field->type() == FIELD_TYPE_NEWDECIMAL) + { + /* + Fix a fatal error in decimal key handling that causes crashes + on Innodb. We fix it by reducing the key length so that + InnoDB never gets a too big key when searching. + This allows the end user to do an ALTER TABLE to fix the + error. + */ + keyinfo->key_length-= (key_part->length - field->key_length()); + key_part->store_length-= (uint16)(key_part->length - + field->key_length()); + key_part->length= (uint16)field->key_length(); + sql_print_error("Found wrong key definition in %s; " + "Please do \"ALTER TABLE '%s' FORCE \" to fix it!", + share->table_name.str, + share->table_name.str); + push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_ERROR, + ER_CRASHED_ON_USAGE, + "Found wrong key definition in %s; " + "Please do \"ALTER TABLE '%s' FORCE\" to fix " + "it!", + share->table_name.str, + share->table_name.str); + share->crashed= 1; // Marker for CHECK TABLE + goto to_be_deleted; + } #endif - key_part->key_part_flag|= HA_PART_KEY_SEG; - if (!(field->flags & BLOB_FLAG)) - { // Create a new field - field=key_part->field=field->new_field(&outparam->mem_root, - outparam); - field->field_length=key_part->length; - } - } + key_part->key_part_flag|= HA_PART_KEY_SEG; + } to_be_deleted: - /* - If the field can be NULL, don't optimize away the test - key_part_column = expression from the WHERE clause - as we need to test for NULL = NULL. - */ - if (field->real_maybe_null()) - key_part->key_part_flag|= HA_PART_KEY_SEG; - } - else - { // Error: shorten key - keyinfo->key_parts=usable_parts; - keyinfo->flags=0; - } + /* + If the field can be NULL, don't optimize away the test + key_part_column = expression from the WHERE clause + as we need to test for NULL = NULL. + */ + if (field->real_maybe_null()) + key_part->key_part_flag|= HA_PART_KEY_SEG; } - keyinfo->usable_key_parts=usable_parts; // Filesort + keyinfo->usable_key_parts= usable_parts; // Filesort set_if_bigger(share->max_key_length,keyinfo->key_length+ keyinfo->key_parts); @@ -844,11 +1100,15 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, If we are using an integer as the primary key then allow the user to refer to it as '_rowid' */ - if (outparam->key_info[primary_key].key_parts == 1) + if (share->key_info[primary_key].key_parts == 1) { - Field *field= outparam->key_info[primary_key].key_part[0].field; + Field *field= share->key_info[primary_key].key_part[0].field; if (field && field->result_type() == INT_RESULT) - outparam->rowid_field=field; + { + /* note that fieldnr here (and rowid_field_offset) starts from 1 */ + share->rowid_field_offset= (share->key_info[primary_key].key_part[0]. + fieldnr); + } } } else @@ -862,21 +1122,30 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, { /* Old file format with default as not null */ uint null_length= (share->null_fields+7)/8; - bfill(share->default_values + (outparam->null_flags - (uchar*) record), + bfill(share->default_values + (null_flags - (uchar*) record), null_length, 255); } - if ((reg_field=outparam->found_next_number_field)) + if (share->found_next_number_field) { + /* + We must have a table object for find_ref_key to calculate field offset + */ + TABLE tmp_table; + tmp_table.record[0]= share->default_values; + + reg_field= *share->found_next_number_field; + reg_field->table= &tmp_table; if ((int) (share->next_number_index= (uint) - find_ref_key(outparam,reg_field, + find_ref_key(share->key_info, share->keys, reg_field, &share->next_number_key_offset)) < 0) { - reg_field->unireg_check=Field::NONE; /* purecov: inspected */ - outparam->found_next_number_field=0; + reg_field->unireg_check= Field::NONE; /* purecov: inspected */ + share->found_next_number_field= 0; } else - reg_field->flags|=AUTO_INCREMENT_FLAG; + reg_field->flags |= AUTO_INCREMENT_FLAG; + reg_field->table= 0; } if (share->blob_fields) @@ -886,10 +1155,10 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, /* Store offsets to blob fields to find them fast */ if (!(share->blob_field= save= - (uint*) alloc_root(&outparam->mem_root, + (uint*) alloc_root(&share->mem_root, (uint) (share->blob_fields* sizeof(uint))))) goto err; - for (i=0, ptr= outparam->field ; *ptr ; ptr++, i++) + for (i=0, ptr= share->field ; *ptr ; ptr++, i++) { if ((*ptr)->flags & BLOB_FLAG) (*save++)= i; @@ -900,18 +1169,233 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, the correct null_bytes can now be set, since bitfields have been taken into account */ - share->null_bytes= (null_pos - (uchar*) outparam->null_flags + + share->null_bytes= (null_pos - (uchar*) null_flags + (null_bit_pos + 7) / 8); share->last_null_bit_pos= null_bit_pos; + share->db_low_byte_first= handler_file->low_byte_first(); + delete handler_file; +#ifndef DBUG_OFF + if (use_hash) + (void) hash_check(&share->name_hash); +#endif + DBUG_RETURN (0); + + err: + share->error= error; + share->open_errno= my_errno; + share->errarg= errarg; + x_free((gptr) disk_buff); + delete crypted; + delete handler_file; + hash_free(&share->name_hash); + + open_table_error(share, error, share->open_errno, errarg); + DBUG_RETURN(error); +} /* open_binary_frm */ + + +/* + Open a table based on a TABLE_SHARE + + SYNOPSIS + open_table_from_share() + thd Thread handler + share Table definition + alias Alias for table + db_stat open flags (for example HA_OPEN_KEYFILE| + HA_OPEN_RNDFILE..) can be 0 (example in + ha_example_table) + prgflag READ_ALL etc.. + ha_open_flags HA_OPEN_ABORT_IF_LOCKED etc.. + outparam result table + + RETURN VALUES + 0 ok + 1 Error (see open_table_error) + 2 Error (see open_table_error) + 3 Wrong data in .frm file + 4 Error (see open_table_error) + 5 Error (see open_table_error: charset unavailable) + 7 Table definition has changed in engine +*/ + +int open_table_from_share(THD *thd, TABLE_SHARE *share, const char *alias, + uint db_stat, uint prgflag, uint ha_open_flags, + TABLE *outparam) +{ + int error; + uint records, i; + bool error_reported= FALSE; + byte *record; + Field **field_ptr; + MEM_ROOT **root_ptr, *old_root; + DBUG_ENTER("open_table_from_share"); + DBUG_PRINT("enter",("name: '%s.%s' form: 0x%lx", share->db.str, + share->table_name.str, outparam)); + + error= 1; + root_ptr= my_pthread_getspecific_ptr(MEM_ROOT**, THR_MALLOC); + old_root= *root_ptr; + bzero((char*) outparam, sizeof(*outparam)); + outparam->in_use= thd; + outparam->s= share; + outparam->db_stat= db_stat; + outparam->write_row_record= NULL; + + init_sql_alloc(&outparam->mem_root, TABLE_ALLOC_BLOCK_SIZE, 0); + *root_ptr= &outparam->mem_root; + + if (!(outparam->alias= my_strdup(alias, MYF(MY_WME)))) + goto err; + outparam->quick_keys.init(); + outparam->used_keys.init(); + outparam->keys_in_use_for_query.init(); + + /* Allocate handler */ + if (!(outparam->file= get_new_handler(share, &outparam->mem_root, + share->db_type))) + goto err; + + error= 4; + outparam->reginfo.lock_type= TL_UNLOCK; + outparam->current_lock= F_UNLCK; + records=0; + if ((db_stat & HA_OPEN_KEYFILE) || (prgflag & DELAYED_OPEN)) + records=1; + if (prgflag & (READ_ALL+EXTRA_RECORD)) + records++; + + if (!(record= (byte*) alloc_root(&outparam->mem_root, + share->rec_buff_length * records))) + goto err; /* purecov: inspected */ + + if (records == 0) + { + /* We are probably in hard repair, and the buffers should not be used */ + outparam->record[0]= outparam->record[1]= share->default_values; + } + else + { + outparam->record[0]= record; + if (records > 1) + outparam->record[1]= record+ share->rec_buff_length; + else + outparam->record[1]= outparam->record[0]; // Safety + } + +#ifdef HAVE_purify + /* + We need this because when we read var-length rows, we are not updating + bytes after end of varchar + */ + if (records > 1) + { + memcpy(outparam->record[0], share->default_values, share->rec_buff_length); + if (records > 2) + memcpy(outparam->record[1], share->default_values, + share->rec_buff_length); + } +#endif + + if (!(field_ptr = (Field **) alloc_root(&outparam->mem_root, + (uint) ((share->fields+1)* + sizeof(Field*))))) + goto err; /* purecov: inspected */ + + outparam->field= field_ptr; + + record= (byte*) outparam->record[0]-1; /* Fieldstart = 1 */ + if (share->null_field_first) + outparam->null_flags= (uchar*) record+1; + else + outparam->null_flags= (uchar*) (record+ 1+ share->reclength - + share->null_bytes); + + /* Setup copy of fields from share, but use the right alias and record */ + for (i=0 ; i < share->fields; i++, field_ptr++) + { + if (!((*field_ptr)= share->field[i]->clone(&outparam->mem_root, outparam))) + goto err; + } + (*field_ptr)= 0; // End marker + + if (share->found_next_number_field) + outparam->found_next_number_field= + outparam->field[(uint) (share->found_next_number_field - share->field)]; + if (share->timestamp_field) + outparam->timestamp_field= (Field_timestamp*) outparam->field[share->timestamp_field_offset]; + + + /* Fix key->name and key_part->field */ + if (share->key_parts) + { + KEY *key_info, *key_info_end; + KEY_PART_INFO *key_part; + uint n_length; + n_length= share->keys*sizeof(KEY) + share->key_parts*sizeof(KEY_PART_INFO); + if (!(key_info= (KEY*) alloc_root(&outparam->mem_root, n_length))) + goto err; + outparam->key_info= key_info; + key_part= (my_reinterpret_cast(KEY_PART_INFO*) (key_info+share->keys)); + + memcpy(key_info, share->key_info, sizeof(*key_info)*share->keys); + memcpy(key_part, share->key_info[0].key_part, (sizeof(*key_part) * + share->key_parts)); + + for (key_info_end= key_info + share->keys ; + key_info < key_info_end ; + key_info++) + { + KEY_PART_INFO *key_part_end; + + key_info->table= outparam; + key_info->key_part= key_part; + + for (key_part_end= key_part+ key_info->key_parts ; + key_part < key_part_end ; + key_part++) + { + Field *field= key_part->field= outparam->field[key_part->fieldnr-1]; + + if (field->key_length() != key_part->length && + !(field->flags & BLOB_FLAG)) + { + /* + We are using only a prefix of the column as a key: + Create a new field for the key part that matches the index + */ + field= key_part->field=field->new_field(&outparam->mem_root, + outparam); + field->field_length= key_part->length; + } + } + } + } + +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (share->partition_info_len) + { + if (mysql_unpack_partition(thd, share->partition_info, + share->partition_info_len, + outparam, share->default_part_db_type)) + goto err; + /* + Fix the partition functions and ensure they are not constant + functions + */ + if (fix_partition_func(thd, share->normalized_path.str, outparam)) + goto err; + } +#endif + /* The table struct is now initialized; Open the table */ - error=2; + error= 2; if (db_stat) { int ha_err; - unpack_filename(index_file,index_file); if ((ha_err= (outparam->file-> - ha_open(index_file, + ha_open(outparam, share->normalized_path.str, (db_stat & HA_READ_ONLY ? O_RDONLY : O_RDWR), (db_stat & HA_OPEN_TEMPORARY ? HA_OPEN_TMP_TABLE : ((db_stat & HA_WAIT_IF_LOCKED) || @@ -928,8 +1412,10 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, if (ha_err == HA_ERR_NO_SUCH_TABLE) { - /* The table did not exists in storage engine, use same error message - as if the .frm file didn't exist */ + /* + The table did not exists in storage engine, use same error message + as if the .frm file didn't exist + */ error= 1; my_errno= ENOENT; } @@ -937,47 +1423,71 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, { outparam->file->print_error(ha_err, MYF(0)); error_reported= TRUE; + if (ha_err == HA_ERR_TABLE_DEF_CHANGED) + error= 7; } goto err; /* purecov: inspected */ } } - share->db_low_byte_first= outparam->file->low_byte_first(); *root_ptr= old_root; thd->status_var.opened_tables++; -#ifndef DBUG_OFF - if (use_hash) - (void) hash_check(&share->name_hash); +#ifdef HAVE_REPLICATION + + /* + This constant is used to mark that no table map version has been + assigned. No arithmetic is done on the value: it will be + overwritten with a value taken from MYSQL_BIN_LOG. + */ + share->table_map_version= ~(ulonglong)0; + + /* + Since openfrm() can be called without any locking (for example, + ha_create_table... functions), we do not assign a table map id + here. Instead we assign a value that is not used elsewhere, and + then assign a table map id inside open_table() under the + protection of the LOCK_open mutex. + */ + share->table_map_id= ULONG_MAX; #endif + DBUG_RETURN (0); err: - x_free((gptr) disk_buff); - if (file > 0) - VOID(my_close(file,MYF(MY_WME))); - - delete crypted; *root_ptr= old_root; if (! error_reported) - frm_error(error,outparam,name,ME_ERROR+ME_WAITTANG, errarg); + open_table_error(share, error, my_errno, 0); delete outparam->file; - outparam->file=0; // For easier errorchecking +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (outparam->part_info) + free_items(outparam->part_info->item_free_list); +#endif + outparam->file= 0; // For easier error checking outparam->db_stat=0; - hash_free(&share->name_hash); free_root(&outparam->mem_root, MYF(0)); // Safe to call on bzero'd root my_free((char*) outparam->alias, MYF(MY_ALLOW_ZERO_PTR)); DBUG_RETURN (error); -} /* openfrm */ - +} /* close a .frm file and it's tables */ -int closefrm(register TABLE *table) +int closefrm(register TABLE *table, bool free_share) { int error=0; + uint idx; + KEY *key_info; DBUG_ENTER("closefrm"); if (table->db_stat) error=table->file->close(); + key_info= table->key_info; + for (idx= table->s->keys; idx; idx--, key_info++) + { + if (key_info->flags & HA_USES_PARSER) + { + plugin_unlock(key_info->parser); + key_info->flags= 0; + } + } my_free((char*) table->alias, MYF(MY_ALLOW_ZERO_PTR)); table->alias= 0; if (table->field) @@ -988,7 +1498,20 @@ int closefrm(register TABLE *table) } delete table->file; table->file= 0; /* For easier errorchecking */ - hash_free(&table->s->name_hash); +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (table->part_info) + { + free_items(table->part_info->item_free_list); + table->part_info= 0; + } +#endif + if (free_share) + { + if (table->s->tmp_table == NO_TMP_TABLE) + release_table_share(table->s, RELEASE_NORMAL); + else + free_table_share(table->s); + } free_root(&table->mem_root, MYF(0)); DBUG_RETURN(error); } @@ -1144,37 +1667,43 @@ ulong make_new_entry(File file, uchar *fileinfo, TYPELIB *formnames, /* error message when opening a form file */ -static void frm_error(int error, TABLE *form, const char *name, - myf errortype, int errarg) +void open_table_error(TABLE_SHARE *share, int error, int db_errno, int errarg) { int err_no; char buff[FN_REFLEN]; - const char *form_dev="",*datext; - const char *real_name= (char*) name+dirname_length(name); - DBUG_ENTER("frm_error"); + myf errortype= ME_ERROR+ME_WAITTANG; + DBUG_ENTER("open_table_error"); switch (error) { + case 7: case 1: - if (my_errno == ENOENT) + if (db_errno == ENOENT) + my_error(ER_NO_SUCH_TABLE, MYF(0), share->db.str, share->table_name.str); + else { - char *db; - uint length=dirname_part(buff,name); - buff[length-1]=0; - db=buff+dirname_length(buff); - my_error(ER_NO_SUCH_TABLE, MYF(0), db, real_name); + strxmov(buff, share->normalized_path.str, reg_ext, NullS); + my_error(ER_FILE_NOT_FOUND, errortype, buff, db_errno); } - else - my_error(ER_FILE_NOT_FOUND, errortype, - fn_format(buff, name, form_dev, reg_ext, 0), my_errno); break; case 2: { - datext= form->file ? *form->file->bas_ext() : ""; - datext= datext==NullS ? "" : datext; - err_no= (my_errno == ENOENT) ? ER_FILE_NOT_FOUND : (my_errno == EAGAIN) ? + handler *file= 0; + const char *datext= ""; + + if (share->db_type != NULL) + { + if ((file= get_new_handler(share, current_thd->mem_root, + share->db_type))) + { + if (!(datext= *file->bas_ext())) + datext= ""; + } + } + err_no= (db_errno == ENOENT) ? ER_FILE_NOT_FOUND : (db_errno == EAGAIN) ? ER_FILE_USED : ER_CANT_OPEN_FILE; - my_error(err_no,errortype, - fn_format(buff,real_name,form_dev,datext,2),my_errno); + strxmov(buff, share->normalized_path.str, datext, NullS); + my_error(err_no,errortype, buff, db_errno); + delete file; break; } case 5: @@ -1188,23 +1717,24 @@ static void frm_error(int error, TABLE *form, const char *name, } my_printf_error(ER_UNKNOWN_COLLATION, "Unknown collation '%s' in table '%-.64s' definition", - MYF(0), csname, real_name); + MYF(0), csname, share->table_name.str); break; } case 6: + strxmov(buff, share->normalized_path.str, reg_ext, NullS); my_printf_error(ER_NOT_FORM_FILE, "Table '%-.64s' was created with a different version " - "of MySQL and cannot be read", - MYF(0), name); + "of MySQL and cannot be read", + MYF(0), buff); break; default: /* Better wrong error than none */ case 4: - my_error(ER_NOT_FORM_FILE, errortype, - fn_format(buff, name, form_dev, reg_ext, 0)); + strxmov(buff, share->normalized_path.str, reg_ext, NullS); + my_error(ER_NOT_FORM_FILE, errortype, buff, 0); break; } DBUG_VOID_RETURN; -} /* frm_error */ +} /* open_table_error */ /* @@ -1284,22 +1814,21 @@ TYPELIB *typelib(MEM_ROOT *mem_root, List<String> &strings) # field number +1 */ -static uint find_field(TABLE *form,uint start,uint length) +static uint find_field(Field **fields, uint start, uint length) { Field **field; - uint i, pos, fields; + uint i, pos; - pos=0; - fields= form->s->fields; - for (field=form->field, i=1 ; i<= fields ; i++,field++) + pos= 0; + for (field= fields, i=1 ; *field ; i++,field++) { if ((*field)->offset() == start) { if ((*field)->key_length() == length) return (i); - if (!pos || form->field[pos-1]->pack_length() < + if (!pos || fields[pos-1]->pack_length() < (*field)->pack_length()) - pos=i; + pos= i; } } return (pos); @@ -1389,13 +1918,15 @@ void append_unescaped(String *res, const char *pos, uint length) res->append('\''); } + /* Create a .frm file */ -File create_frm(THD *thd, my_string name, const char *db, +File create_frm(THD *thd, const char *name, const char *db, const char *table, uint reclength, uchar *fileinfo, - HA_CREATE_INFO *create_info, uint keys) + HA_CREATE_INFO *create_info, uint keys) { register File file; + uint key_length; ulong length; char fill[IO_SIZE]; int create_flags= O_RDWR | O_TRUNC; @@ -1425,7 +1956,8 @@ File create_frm(THD *thd, my_string name, const char *db, fileinfo[1]= 1; fileinfo[2]= FRM_VER+3+ test(create_info->varchar); - fileinfo[3]= (uchar) ha_checktype(thd,create_info->db_type,0,0); + fileinfo[3]= (uchar) ha_legacy_type( + ha_checktype(thd,ha_legacy_type(create_info->db_type),0,0)); fileinfo[4]=1; int2store(fileinfo+6,IO_SIZE); /* Next block starts here */ key_length=keys*(7+NAME_LEN+MAX_REF_PARTS*9)+16; @@ -1607,9 +2139,6 @@ bool check_db_name(char *name) #else last_char_is_space= *name==' '; #endif - if (*name == '/' || *name == '\\' || *name == FN_LIBCHAR || - *name == FN_EXTCHAR) - return 1; name++; } return last_char_is_space || (uint) (name - start) > NAME_LEN; @@ -1618,8 +2147,7 @@ bool check_db_name(char *name) /* Allow anything as a table name, as long as it doesn't contain an - a '/', or a '.' character - or ' ' at the end + ' ' at the end returns 1 on error */ @@ -1650,8 +2178,6 @@ bool check_table_name(const char *name, uint length) } } #endif - if (*name == '/' || *name == '\\' || *name == FN_EXTCHAR) - return 1; name++; } #if defined(USE_MB) && defined(USE_MB_IDENT) @@ -2592,7 +3118,7 @@ const char *Natural_join_column::db_name() return table_ref->view_db.str; DBUG_ASSERT(!strcmp(table_ref->db, - table_ref->table->s->db)); + table_ref->table->s->db.str)); return table_ref->db; } @@ -2782,7 +3308,7 @@ const char *Field_iterator_table_ref::table_name() return natural_join_it.column_ref()->table_name(); DBUG_ASSERT(!strcmp(table_ref->table_name, - table_ref->table->s->table_name)); + table_ref->table->s->table_name.str)); return table_ref->table_name; } @@ -2794,7 +3320,7 @@ const char *Field_iterator_table_ref::db_name() else if (table_ref->is_natural_join) return natural_join_it.column_ref()->db_name(); - DBUG_ASSERT(!strcmp(table_ref->db, table_ref->table->s->db)); + DBUG_ASSERT(!strcmp(table_ref->db, table_ref->table->s->db.str)); return table_ref->db; } diff --git a/sql/table.h b/sql/table.h index ce0616a6833..8f514f248b4 100644 --- a/sql/table.h +++ b/sql/table.h @@ -21,6 +21,7 @@ class Item; /* Needed by ORDER */ class GRANT_TABLE; class st_select_lex_unit; class st_select_lex; +class partition_info; class COND_EQUAL; class Security_context; @@ -55,8 +56,11 @@ typedef struct st_grant_info ulong orig_want_privilege; } GRANT_INFO; -enum tmp_table_type {NO_TMP_TABLE=0, TMP_TABLE=1, TRANSACTIONAL_TMP_TABLE=2, - SYSTEM_TMP_TABLE=3}; +enum tmp_table_type +{ + NO_TMP_TABLE, TMP_TABLE, TRANSACTIONAL_TMP_TABLE, + INTERNAL_TMP_TABLE, SYSTEM_TMP_TABLE +}; enum frm_type_enum { @@ -65,6 +69,8 @@ enum frm_type_enum FRMTYPE_VIEW }; +enum release_type { RELEASE_NORMAL, RELEASE_WAIT_FOR_DROP }; + typedef struct st_filesort_info { IO_CACHE *io_cache; /* If sorted through filebyte */ @@ -112,43 +118,49 @@ typedef struct st_table_share TYPELIB keynames; /* Pointers to keynames */ TYPELIB fieldnames; /* Pointer to fieldnames */ TYPELIB *intervals; /* pointer to interval info */ -#ifdef NOT_YET pthread_mutex_t mutex; /* For locking the share */ pthread_cond_t cond; /* To signal that share is ready */ + struct st_table_share *next, /* Link to unused shares */ + **prev; +#ifdef NOT_YET struct st_table *open_tables; /* link to open tables */ - struct st_table *used_next, /* Link to used tables */ - **used_prev; +#endif + /* The following is copied to each TABLE on OPEN */ Field **field; + Field **found_next_number_field; + Field *timestamp_field; /* Used only during open */ KEY *key_info; /* data of keys in database */ -#endif uint *blob_field; /* Index to blobs in Field arrray*/ + byte *default_values; /* row with default values */ char *comment; /* Comment about table */ CHARSET_INFO *table_charset; /* Default charset of string fields */ /* A pair "database_name\0table_name\0", widely used as simply a db name */ - char *table_cache_key; - const char *db; /* Pointer to db */ - const char *table_name; /* Table name (for open) */ - const char *path; /* Path to .frm file (from datadir) */ + LEX_STRING table_cache_key; + LEX_STRING db; /* Pointer to db */ + LEX_STRING table_name; /* Table name (for open) */ + LEX_STRING path; /* Path to .frm file (from datadir) */ + LEX_STRING normalized_path; /* unpack_filename(path) */ LEX_STRING connect_string; key_map keys_in_use; /* Keys in use for table */ key_map keys_for_keyread; + ha_rows min_rows, max_rows; /* create information */ ulong avg_row_length; /* create information */ ulong raid_chunksize; ulong version, flush_version, mysql_version; ulong timestamp_offset; /* Set to offset+1 of record */ ulong reclength; /* Recordlength */ - ha_rows min_rows, max_rows; /* create information */ - enum db_type db_type; /* table_type for handler */ + handlerton *db_type; /* table_type for handler */ enum row_type row_type; /* How rows are stored */ enum tmp_table_type tmp_table; + uint ref_count; /* How many TABLE objects uses this */ + uint open_count; /* Number of tables in open list */ uint blob_ptr_size; /* 4 or 8 */ uint null_bytes, last_null_bit_pos; - uint key_length; /* Length of table_cache_key */ uint fields; /* Number of fields */ uint rec_buff_length; /* Size of table->record[] buffer */ uint keys, key_parts; @@ -156,31 +168,41 @@ typedef struct st_table_share uint uniques; /* Number of UNIQUE index */ uint null_fields; /* number of null fields */ uint blob_fields; /* number of blob fields */ + uint timestamp_field_offset; /* Field number for timestamp field */ uint varchar_fields; /* number of varchar fields */ uint db_create_options; /* Create options from database */ uint db_options_in_use; /* Options in use */ uint db_record_offset; /* if HA_REC_IN_SEQ */ uint raid_type, raid_chunks; - uint open_count; /* Number of tables in open list */ + uint rowid_field_offset; /* Field_nr +1 to rowid field */ /* Index of auto-updated TIMESTAMP field in field array */ uint primary_key; - uint timestamp_field_offset; uint next_number_index; uint next_number_key_offset; - uchar frm_version; - my_bool system; /* Set if system record */ - my_bool crypted; /* If .frm file is crypted */ - my_bool db_low_byte_first; /* Portable row format */ - my_bool crashed; - my_bool is_view; - my_bool name_lock, replace_with_name_lock; + uint error, open_errno, errarg; /* error from open_table_def() */ + uchar frm_version; + bool null_field_first; + bool system; /* Set if system table (one record) */ + bool crypted; /* If .frm file is crypted */ + bool db_low_byte_first; /* Portable row format */ + bool crashed; + bool is_view; + bool name_lock, replace_with_name_lock; + bool waiting_on_cond; /* Protection against free */ + ulong table_map_id; /* for row-based replication */ + ulonglong table_map_version; /* TRUE if this is a system table like 'mysql.proc', which we want to be able to open and lock even when we already have some tables open and locked. To avoid deadlocks we have to put certain restrictions on locking of this table for writing. FALSE - otherwise. */ - my_bool system_table; + bool system_table; +#ifdef WITH_PARTITION_STORAGE_ENGINE + const uchar *partition_info; + uint partition_info_len; + handlerton *default_part_db_type; +#endif } TABLE_SHARE; @@ -191,21 +213,22 @@ struct st_table { handler *file; #ifdef NOT_YET struct st_table *used_next, **used_prev; /* Link to used tables */ - struct st_table *open_next, **open_prev; /* Link to open tables */ #endif + struct st_table *open_next, **open_prev; /* Link to open tables */ struct st_table *next, *prev; THD *in_use; /* Which thread uses this */ Field **field; /* Pointer to fields */ byte *record[2]; /* Pointer to records */ + byte *write_row_record; /* Used as optimisation in + THD::write_row */ byte *insert_values; /* used by INSERT ... UPDATE */ key_map quick_keys, used_keys, keys_in_use_for_query; KEY *key_info; /* data of keys in database */ - Field *next_number_field, /* Set if next_number is activated */ - *found_next_number_field, /* Set on open */ - *rowid_field; + Field *next_number_field; /* Set if next_number is activated */ + Field *found_next_number_field; /* Set on open */ Field_timestamp *timestamp_field; /* Table's triggers, 0 if there are no of them */ @@ -214,6 +237,8 @@ struct st_table { ORDER *group; const char *alias; /* alias or table name */ uchar *null_flags; + MY_BITMAP *read_set; + MY_BITMAP *write_set; query_id_t query_id; ha_rows quick_rows[MAX_KEY]; @@ -267,12 +292,16 @@ struct st_table { my_bool auto_increment_field_not_null; my_bool insert_or_update; /* Can be used by the handler */ my_bool alias_name_used; /* true if table_name is alias */ + my_bool get_fields_in_item_tree; /* Signal to fix_field */ REGINFO reginfo; /* field connections */ MEM_ROOT mem_root; GRANT_INFO grant; FILESORT_INFO sort; - TABLE_SHARE share_not_to_be_used; /* To be deleted when true shares */ +#ifdef WITH_PARTITION_STORAGE_ENGINE + partition_info *part_info; /* Partition related information */ + bool no_partitions_used; /* If true, all partitions have been pruned away */ +#endif bool fill_item_list(List<Item> *item_list) const; void reset_item_list(List<Item> *item_list) const; @@ -289,6 +318,9 @@ typedef struct st_foreign_key_info List<LEX_STRING> referenced_fields; } FOREIGN_KEY_INFO; +/* + Make sure that the order of schema_tables and enum_schema_tables are the same. +*/ enum enum_schema_tables { @@ -297,8 +329,10 @@ enum enum_schema_tables SCH_COLLATION_CHARACTER_SET_APPLICABILITY, SCH_COLUMNS, SCH_COLUMN_PRIVILEGES, + SCH_ENGINES, SCH_KEY_COLUMN_USAGE, SCH_OPEN_TABLES, + SCH_PLUGINS, SCH_PROCEDURES, SCH_SCHEMATA, SCH_SCHEMA_PRIVILEGES, @@ -614,6 +648,7 @@ typedef struct st_table_list bool where_processed; /* FRMTYPE_ERROR if any type is acceptable */ enum frm_type_enum required_type; + handlerton *db_type; /* table_type for handler */ char timestamp_buffer[20]; /* buffer for timestamp (19+1) */ /* This TABLE_LIST object is just placeholder for prelocking, it will be diff --git a/sql/time.cc b/sql/time.cc index 5069031081d..480cafaab34 100644 --- a/sql/time.cc +++ b/sql/time.cc @@ -711,9 +711,15 @@ void make_truncated_value_warning(THD *thd, const char *str_val, type_str, str.c_ptr(), field_name, (ulong) thd->row_count); else - cs->cset->snprintf(cs, warn_buff, sizeof(warn_buff), - ER(ER_TRUNCATED_WRONG_VALUE), - type_str, str.c_ptr()); + { + if (time_type > MYSQL_TIMESTAMP_ERROR) + cs->cset->snprintf(cs, warn_buff, sizeof(warn_buff), + ER(ER_TRUNCATED_WRONG_VALUE), + type_str, str.c_ptr()); + else + cs->cset->snprintf(cs, warn_buff, sizeof(warn_buff), + ER(ER_WRONG_VALUE), type_str, str.c_ptr()); + } push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_TRUNCATED_WRONG_VALUE, warn_buff); } diff --git a/sql/tztime.cc b/sql/tztime.cc index 3a9d9a60aed..99955c21ede 100644 --- a/sql/tztime.cc +++ b/sql/tztime.cc @@ -1624,7 +1624,7 @@ my_tz_init(THD *org_thd, const char *default_tzname, my_bool bootstrap) mysql.time_zone* tables are MyISAM and these operations always succeed for MyISAM. */ - (void)table->file->ha_index_init(0); + (void)table->file->ha_index_init(0, 1); tz_leapcnt= 0; res= table->file->index_first(table->record[0]); @@ -1801,7 +1801,7 @@ tz_load_from_open_tables(const String *tz_name, TABLE_LIST *tz_tables) mysql.time_zone* tables are MyISAM and these operations always succeed for MyISAM. */ - (void)table->file->ha_index_init(0); + (void)table->file->ha_index_init(0, 1); if (table->file->index_read(table->record[0], (byte*)table->field[0]->ptr, 0, HA_READ_KEY_EXACT)) @@ -1828,7 +1828,7 @@ tz_load_from_open_tables(const String *tz_name, TABLE_LIST *tz_tables) table= tz_tables->table; tz_tables= tz_tables->next_local; table->field[0]->store((longlong) tzid, TRUE); - (void)table->file->ha_index_init(0); + (void)table->file->ha_index_init(0, 1); if (table->file->index_read(table->record[0], (byte*)table->field[0]->ptr, 0, HA_READ_KEY_EXACT)) @@ -1855,7 +1855,7 @@ tz_load_from_open_tables(const String *tz_name, TABLE_LIST *tz_tables) table= tz_tables->table; tz_tables= tz_tables->next_local; table->field[0]->store((longlong) tzid, TRUE); - (void)table->file->ha_index_init(0); + (void)table->file->ha_index_init(0, 1); // FIXME Is there any better approach than explicitly specifying 4 ??? res= table->file->index_read(table->record[0], (byte*)table->field[0]->ptr, @@ -1927,7 +1927,7 @@ tz_load_from_open_tables(const String *tz_name, TABLE_LIST *tz_tables) */ table= tz_tables->table; table->field[0]->store((longlong) tzid, TRUE); - (void)table->file->ha_index_init(0); + (void)table->file->ha_index_init(0, 1); // FIXME Is there any better approach than explicitly specifying 4 ??? res= table->file->index_read(table->record[0], (byte*)table->field[0]->ptr, diff --git a/sql/unireg.cc b/sql/unireg.cc index 0ab77462f61..7b15e14bdaf 100644 --- a/sql/unireg.cc +++ b/sql/unireg.cc @@ -35,7 +35,7 @@ static uchar * pack_screens(List<create_field> &create_fields, uint *info_length, uint *screens, bool small_file); static uint pack_keys(uchar *keybuff,uint key_count, KEY *key_info, ulong data_offset); -static bool pack_header(uchar *forminfo,enum db_type table_type, +static bool pack_header(uchar *forminfo,enum legacy_db_type table_type, List<create_field> &create_fields, uint info_length, uint screens, uint table_options, ulong data_offset, handler *file); @@ -43,10 +43,11 @@ static uint get_interval_id(uint *int_count,List<create_field> &create_fields, create_field *last_field); static bool pack_fields(File file, List<create_field> &create_fields, ulong data_offset); -static bool make_empty_rec(THD *thd, int file, enum db_type table_type, +static bool make_empty_rec(THD *thd, int file, enum legacy_db_type table_type, uint table_options, List<create_field> &create_fields, - uint reclength, ulong data_offset); + uint reclength, ulong data_offset, + handler *handler); /* Create a frm (table definition) file @@ -54,7 +55,7 @@ static bool make_empty_rec(THD *thd, int file, enum db_type table_type, SYNOPSIS mysql_create_frm() thd Thread handler - file_name Name of file (including database and .frm) + file_name Path for file (including database and .frm) db Name of database table Name of table create_info create info parameters @@ -68,7 +69,7 @@ static bool make_empty_rec(THD *thd, int file, enum db_type table_type, 1 error */ -bool mysql_create_frm(THD *thd, my_string file_name, +bool mysql_create_frm(THD *thd, const char *file_name, const char *db, const char *table, HA_CREATE_INFO *create_info, List<create_field> &create_fields, @@ -76,28 +77,35 @@ bool mysql_create_frm(THD *thd, my_string file_name, handler *db_file) { LEX_STRING str_db_type; - uint reclength,info_length,screens,key_info_length,maxlength; + uint reclength,info_length,screens,key_info_length,maxlength,i; ulong key_buff_length; File file; ulong filepos, data_offset; uchar fileinfo[64],forminfo[288],*keybuff; TYPELIB formnames; uchar *screen_buff; - char buff[2]; + char buff[5]; +#ifdef WITH_PARTITION_STORAGE_ENGINE + partition_info *part_info= thd->lex->part_info; +#endif DBUG_ENTER("mysql_create_frm"); +#ifdef WITH_PARTITION_STORAGE_ENGINE + thd->lex->part_info= NULL; +#endif + DBUG_ASSERT(*fn_rext((char*)file_name)); // Check .frm extension formnames.type_names=0; if (!(screen_buff=pack_screens(create_fields,&info_length,&screens,0))) DBUG_RETURN(1); - if (db_file == NULL) - db_file= get_new_handler((TABLE*) 0, thd->mem_root, create_info->db_type); + DBUG_ASSERT(db_file != NULL); /* If fixed row records, we need one bit to check for deleted rows */ if (!(create_info->table_options & HA_OPTION_PACK_RECORD)) create_info->null_bits++; data_offset= (create_info->null_bits + 7) / 8; - if (pack_header(forminfo, create_info->db_type,create_fields,info_length, + if (pack_header(forminfo, ha_legacy_type(create_info->db_type), + create_fields,info_length, screens, create_info->table_options, data_offset, db_file)) { @@ -109,7 +117,8 @@ bool mysql_create_frm(THD *thd, my_string file_name, thd->net.last_error[0]=0; if (!(screen_buff=pack_screens(create_fields,&info_length,&screens,1))) DBUG_RETURN(1); - if (pack_header(forminfo, create_info->db_type, create_fields,info_length, + if (pack_header(forminfo, ha_legacy_type(create_info->db_type), + create_fields,info_length, screens, create_info->table_options, data_offset, db_file)) { my_free((gptr) screen_buff,MYF(0)); @@ -119,10 +128,23 @@ bool mysql_create_frm(THD *thd, my_string file_name, reclength=uint2korr(forminfo+266); /* Calculate extra data segment length */ - str_db_type.str= (char *) ha_get_storage_engine(create_info->db_type); + str_db_type.str= (char *) ha_resolve_storage_engine_name(create_info->db_type); str_db_type.length= strlen(str_db_type.str); + /* str_db_type */ create_info->extra_size= (2 + str_db_type.length + 2 + create_info->connect_string.length); + /* Partition */ + create_info->extra_size+= 5; +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (part_info) + create_info->extra_size+= part_info->part_info_len; +#endif + + for (i= 0; i < keys; i++) + { + if (key_info[i].parser_name) + create_info->extra_size+= key_info[i].parser_name->length + 1; + } if ((file=create_frm(thd, file_name, db, table, reclength, fileinfo, create_info, keys)) < 0) @@ -147,6 +169,11 @@ bool mysql_create_frm(THD *thd, my_string file_name, 60); forminfo[46]=(uchar) strlen((char*)forminfo+47); // Length of comment +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (part_info) + fileinfo[61]= (uchar) ha_legacy_type(part_info->default_engine_type); +#endif + int2store(fileinfo+59,db_file->extra_rec_buf_length()); if (my_pwrite(file,(byte*) fileinfo,64,0L,MYF_RW) || my_pwrite(file,(byte*) keybuff,key_info_length, (ulong) uint2korr(fileinfo+6),MYF_RW)) @@ -154,22 +181,49 @@ bool mysql_create_frm(THD *thd, my_string file_name, VOID(my_seek(file, (ulong) uint2korr(fileinfo+6)+ (ulong) key_buff_length, MY_SEEK_SET,MYF(0))); - if (make_empty_rec(thd,file,create_info->db_type,create_info->table_options, - create_fields,reclength, data_offset)) + if (make_empty_rec(thd,file,ha_legacy_type(create_info->db_type), + create_info->table_options, + create_fields,reclength, data_offset, db_file)) goto err; int2store(buff, create_info->connect_string.length); - if (my_write(file, (const byte*)buff, sizeof(buff), MYF(MY_NABP)) || + if (my_write(file, (const byte*)buff, 2, MYF(MY_NABP)) || my_write(file, (const byte*)create_info->connect_string.str, create_info->connect_string.length, MYF(MY_NABP))) goto err; int2store(buff, str_db_type.length); - if (my_write(file, (const byte*)buff, sizeof(buff), MYF(MY_NABP)) || + if (my_write(file, (const byte*)buff, 2, MYF(MY_NABP)) || my_write(file, (const byte*)str_db_type.str, str_db_type.length, MYF(MY_NABP))) goto err; - + +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (part_info) + { + int4store(buff, part_info->part_info_len); + if (my_write(file, (const byte*)buff, 4, MYF_RW) || + my_write(file, (const byte*)part_info->part_info_string, + part_info->part_info_len + 1, MYF_RW)) + goto err; + } + else +#endif + { + bzero(buff, 5); + if (my_write(file, (byte*) buff, 5, MYF_RW)) + goto err; + } + for (i= 0; i < keys; i++) + { + if (key_info[i].parser_name) + { + if (my_write(file, (const byte*)key_info[i].parser_name->str, + key_info[i].parser_name->length + 1, MYF(MY_NABP))) + goto err; + } + } + VOID(my_seek(file,filepos,MY_SEEK_SET,MYF(0))); if (my_write(file,(byte*) forminfo,288,MYF_RW) || my_write(file,(byte*) screen_buff,info_length,MYF_RW) || @@ -236,37 +290,47 @@ err3: SYNOPSIS rea_create_table() thd Thread handler - file_name Name of file (including database and .frm) - db Name of database - table Name of table + path Name of file (including database, without .frm) + db Data base name + table_name Table name create_info create info parameters create_fields Fields to create keys number of keys to create key_info Keys to create - db_file Handler to use. May be zero, in which case we use - create_info->db_type + file Handler to use + RETURN 0 ok 1 error */ -int rea_create_table(THD *thd, my_string file_name, - const char *db, const char *table, - HA_CREATE_INFO *create_info, - List<create_field> &create_fields, - uint keys, KEY *key_info) +int rea_create_table(THD *thd, const char *path, + const char *db, const char *table_name, + HA_CREATE_INFO *create_info, + List<create_field> &create_fields, + uint keys, KEY *key_info, handler *file) { DBUG_ENTER("rea_create_table"); - if (mysql_create_frm(thd, file_name, db, table, create_info, - create_fields, keys, key_info, NULL)) - DBUG_RETURN(1); - if (!create_info->frm_only && ha_create_table(file_name,create_info,0)) - { - my_delete(file_name,MYF(0)); + char frm_name[FN_REFLEN]; + strxmov(frm_name, path, reg_ext, NullS); + if (mysql_create_frm(thd, frm_name, db, table_name, create_info, + create_fields, keys, key_info, file)) + DBUG_RETURN(1); - } + + // Make sure mysql_create_frm din't remove extension + DBUG_ASSERT(*fn_rext(frm_name)); + if (file->create_handler_files(path)) + goto err_handler; + if (!create_info->frm_only && ha_create_table(thd, path, db, table_name, + create_info,0)) + goto err_handler; DBUG_RETURN(0); + +err_handler: + my_delete(frm_name, MYF(0)); + DBUG_RETURN(1); } /* rea_create_table */ @@ -421,7 +485,7 @@ static uint pack_keys(uchar *keybuff, uint key_count, KEY *keyinfo, /* Make formheader */ -static bool pack_header(uchar *forminfo, enum db_type table_type, +static bool pack_header(uchar *forminfo, enum legacy_db_type table_type, List<create_field> &create_fields, uint info_length, uint screens, uint table_options, ulong data_offset, handler *file) @@ -680,31 +744,30 @@ static bool pack_fields(File file, List<create_field> &create_fields, /* save an empty record on start of formfile */ -static bool make_empty_rec(THD *thd, File file,enum db_type table_type, +static bool make_empty_rec(THD *thd, File file,enum legacy_db_type table_type, uint table_options, List<create_field> &create_fields, uint reclength, - ulong data_offset) + ulong data_offset, + handler *handler) { - int error; + int error= 0; Field::utype type; uint null_count; uchar *buff,*null_pos; TABLE table; + TABLE_SHARE share; create_field *field; - handler *handler; enum_check_fields old_count_cuted_fields= thd->count_cuted_fields; DBUG_ENTER("make_empty_rec"); /* We need a table to generate columns for default values */ - bzero((char*) &table,sizeof(table)); - table.s= &table.share_not_to_be_used; - handler= get_new_handler((TABLE*) 0, thd->mem_root, table_type); + bzero((char*) &table, sizeof(table)); + bzero((char*) &share, sizeof(share)); + table.s= &share; - if (!handler || - !(buff=(uchar*) my_malloc((uint) reclength,MYF(MY_WME | MY_ZEROFILL)))) + if (!(buff=(uchar*) my_malloc((uint) reclength,MYF(MY_WME | MY_ZEROFILL)))) { - delete handler; DBUG_RETURN(1); } @@ -727,21 +790,24 @@ static bool make_empty_rec(THD *thd, File file,enum db_type table_type, /* regfield don't have to be deleted as it's allocated with sql_alloc() */ - Field *regfield=make_field((char*) buff+field->offset + data_offset, - field->length, - null_pos + null_count / 8, - null_count & 7, - field->pack_flag, - field->sql_type, - field->charset, - field->geom_type, - field->unireg_check, - field->interval, - field->field_name, - &table); + Field *regfield= make_field(&share, + (char*) buff+field->offset + data_offset, + field->length, + null_pos + null_count / 8, + null_count & 7, + field->pack_flag, + field->sql_type, + field->charset, + field->geom_type, + field->unireg_check, + field->interval, + field->field_name); if (!regfield) goto err; // End of memory + /* save_in_field() will access regfield->table->in_use */ + regfield->init(&table); + if (!(field->flags & NOT_NULL_FLAG)) { *regfield->null_ptr|= regfield->null_bit; @@ -761,6 +827,7 @@ static bool make_empty_rec(THD *thd, File file,enum db_type table_type, { my_error(ER_INVALID_DEFAULT, MYF(0), regfield->field_name); error= 1; + delete regfield; //To avoid memory leak goto err; } } @@ -790,7 +857,6 @@ static bool make_empty_rec(THD *thd, File file,enum db_type table_type, err: my_free((gptr) buff,MYF(MY_FAE)); - delete handler; thd->count_cuted_fields= old_count_cuted_fields; DBUG_RETURN(error); } /* make_empty_rec */ diff --git a/sql/unireg.h b/sql/unireg.h index b932a2f320c..9ab8753af84 100644 --- a/sql/unireg.h +++ b/sql/unireg.h @@ -36,6 +36,9 @@ #ifndef SHAREDIR #define SHAREDIR "share/" #endif +#ifndef LIBDIR +#define LIBDIR "lib/" +#endif #define ER(X) errmesg[(X) - ER_ERROR_FIRST] #define ER_SAFE(X) (((X) >= ER_ERROR_FIRST && (X) <= ER_ERROR_LAST) ? ER(X) : "Invalid error code") @@ -80,6 +83,7 @@ #define PSEUDO_TABLE_BITS (PARAM_TABLE_BIT | OUTER_REF_TABLE_BIT | \ RAND_TABLE_BIT) #define MAX_FIELDS 4096 /* Limit in the .frm file */ +#define MAX_PARTITIONS 1024 #define MAX_SORT_MEMORY (2048*1024-MALLOC_OVERHEAD) #define MIN_SORT_MEMORY (32*1024-MALLOC_OVERHEAD) @@ -146,13 +150,13 @@ #define DONT_GIVE_ERROR 256 /* Don't do frm_error on openfrm */ #define READ_SCREENS 1024 /* Read screens, info and helpfile */ #define DELAYED_OPEN 4096 /* Open table later */ -#define NO_ERR_ON_NEW_FRM 8192 /* stop error sending on new format */ +#define OPEN_VIEW 8192 /* Allow open on view */ #define SC_INFO_LENGTH 4 /* Form format constant */ #define TE_INFO_LENGTH 3 #define MTYP_NOEMPTY_BIT 128 -#define FRM_VER_TRUE_VARCHAR (FRM_VER+4) +#define FRM_VER_TRUE_VARCHAR (FRM_VER+4) /* 10 */ /* Minimum length pattern before Turbo Boyer-Moore is used for SELECT "text" LIKE "%pattern%", excluding the two |