diff options
Diffstat (limited to 'sql')
89 files changed, 15534 insertions, 2485 deletions
diff --git a/sql/Makefile.am b/sql/Makefile.am index 1437751bf2f..78d5e262fde 100644 --- a/sql/Makefile.am +++ b/sql/Makefile.am @@ -19,8 +19,8 @@ MYSQLDATAdir = $(localstatedir) MYSQLSHAREdir = $(pkgdatadir) MYSQLBASEdir= $(prefix) +MYSQLLIBdir= $(pkglibdir) INCLUDES = @ZLIB_INCLUDES@ \ - @bdb_includes@ @innodb_includes@ @ndbcluster_includes@ \ -I$(top_builddir)/include -I$(top_srcdir)/include \ -I$(top_srcdir)/regex -I$(srcdir) $(yassl_includes) \ $(openssl_includes) @@ -30,19 +30,18 @@ libexec_PROGRAMS = mysqld noinst_PROGRAMS = gen_lex_hash bin_PROGRAMS = mysql_tzinfo_to_sql gen_lex_hash_LDFLAGS = @NOINST_LDFLAGS@ -LDADD = $(top_builddir)/myisam/libmyisam.a \ - $(top_builddir)/myisammrg/libmyisammrg.a \ - $(top_builddir)/heap/libheap.a \ +LDADD = $(top_builddir)/storage/myisam/libmyisam.a \ + $(top_builddir)/storage/myisammrg/libmyisammrg.a \ + $(top_builddir)/storage/heap/libheap.a \ $(top_builddir)/vio/libvio.a \ $(top_builddir)/mysys/libmysys.a \ $(top_builddir)/dbug/libdbug.a \ $(top_builddir)/regex/libregex.a \ - $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ @NDB_SCI_LIBS@ + $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ mysqld_LDADD = @MYSQLD_EXTRA_LDFLAGS@ \ - @bdb_libs@ @innodb_libs@ @pstack_libs@ \ - @innodb_system_libs@ \ - @ndbcluster_libs@ @ndbcluster_system_libs@ \ + @pstack_libs@ \ + @mysql_se_objs@ @mysql_se_libs@ \ $(LDADD) $(CXXLDFLAGS) $(WRAPLIBS) @LIBDL@ \ @yassl_libs@ @openssl_libs@ noinst_HEADERS = item.h item_func.h item_sum.h item_cmpfunc.h \ @@ -52,22 +51,19 @@ noinst_HEADERS = item.h item_func.h item_sum.h item_cmpfunc.h \ procedure.h sql_class.h sql_lex.h sql_list.h \ sql_manager.h sql_map.h sql_string.h unireg.h \ sql_error.h field.h handler.h mysqld_suffix.h \ - ha_myisammrg.h\ - ha_heap.h ha_myisam.h ha_berkeley.h ha_innodb.h \ - ha_ndbcluster.h opt_range.h protocol.h \ + ha_heap.h ha_myisam.h ha_myisammrg.h ha_partition.h \ + opt_range.h protocol.h \ sql_select.h structs.h table.h sql_udf.h hash_filo.h\ lex.h lex_symbol.h sql_acl.h sql_crypt.h \ - log_event.h sql_repl.h slave.h \ + log_event.h sql_repl.h slave.h rpl_filter.h \ stacktrace.h sql_sort.h sql_cache.h set_var.h \ spatial.h gstream.h client_settings.h tzfile.h \ tztime.h my_decimal.h\ sp_head.h sp_pcontext.h sp_rcontext.h sp.h sp_cache.h \ parse_file.h sql_view.h sql_trigger.h \ sql_array.h sql_cursor.h \ - examples/ha_example.h ha_archive.h \ - examples/ha_tina.h ha_blackhole.h \ - ha_federated.h -mysqld_SOURCES = sql_lex.cc sql_handler.cc \ + sql_plugin.h authors.h +mysqld_SOURCES = sql_lex.cc sql_handler.cc sql_partition.cc \ item.cc item_sum.cc item_buff.cc item_func.cc \ item_cmpfunc.cc item_strfunc.cc item_timefunc.cc \ thr_malloc.cc item_create.cc item_subselect.cc \ @@ -86,13 +82,12 @@ mysqld_SOURCES = sql_lex.cc sql_handler.cc \ unireg.cc des_key_file.cc \ discover.cc time.cc opt_range.cc opt_sum.cc \ records.cc filesort.cc handler.cc \ - ha_heap.cc ha_myisam.cc ha_myisammrg.cc \ - ha_berkeley.cc ha_innodb.cc \ - ha_ndbcluster.cc \ + ha_heap.cc ha_myisam.cc ha_myisammrg.cc \ sql_db.cc sql_table.cc sql_rename.cc sql_crypt.cc \ sql_load.cc mf_iocache.cc field_conv.cc sql_show.cc \ sql_udf.cc sql_analyse.cc sql_analyse.h sql_cache.cc \ - slave.cc sql_repl.cc sql_union.cc sql_derived.cc \ + slave.cc sql_repl.cc rpl_filter.cc \ + sql_union.cc sql_derived.cc \ client.c sql_client.cc mini_client_errors.c pack.c\ stacktrace.c repl_failsafe.h repl_failsafe.cc \ sql_olap.cc sql_view.cc \ @@ -100,10 +95,16 @@ mysqld_SOURCES = sql_lex.cc sql_handler.cc \ tztime.cc my_time.c my_decimal.cc\ sp_head.cc sp_pcontext.cc sp_rcontext.cc sp.cc \ sp_cache.cc parse_file.cc sql_trigger.cc \ - examples/ha_example.cc ha_archive.cc \ - examples/ha_tina.cc ha_blackhole.cc \ - ha_federated.cc - + sql_plugin.cc\ + handlerton.cc +EXTRA_mysqld_SOURCES = ha_innodb.cc ha_berkeley.cc ha_archive.cc \ + ha_innodb.h ha_berkeley.h ha_archive.h \ + ha_blackhole.cc ha_federated.cc ha_ndbcluster.cc \ + ha_blackhole.h ha_federated.h ha_ndbcluster.h \ + ha_partition.cc ha_partition.h \ + examples/ha_tina.cc examples/ha_example.cc \ + examples/ha_tina.h examples/ha_example.h +mysqld_DEPENDENCIES = @mysql_se_objs@ gen_lex_hash_SOURCES = gen_lex_hash.cc gen_lex_hash_LDADD = $(LDADD) $(CXXLDFLAGS) mysql_tzinfo_to_sql_SOURCES = mysql_tzinfo_to_sql.cc @@ -113,6 +114,7 @@ DEFS = -DMYSQL_SERVER \ -DDEFAULT_MYSQL_HOME="\"$(MYSQLBASEdir)\"" \ -DDATADIR="\"$(MYSQLDATAdir)\"" \ -DSHAREDIR="\"$(MYSQLSHAREdir)\"" \ + -DLIBDIR="\"$(MYSQLLIBdir)\"" \ @DEFS@ BUILT_SOURCES = sql_yacc.cc sql_yacc.h lex_hash.h @@ -151,6 +153,16 @@ sql_yacc.o: sql_yacc.cc sql_yacc.h $(HEADERS) lex_hash.h: gen_lex_hash$(EXEEXT) ./gen_lex_hash$(EXEEXT) > $@ +ha_berkeley.o: ha_berkeley.cc ha_berkeley.h + $(CXXCOMPILE) @bdb_includes@ $(LM_CFLAGS) -c $< + +ha_ndbcluster.o:ha_ndbcluster.cc ha_ndbcluster.h + $(CXXCOMPILE) @ndbcluster_includes@ $(LM_CFLAGS) -c $< + +#Until we can get rid of dependencies on ha_ndbcluster.h +handler.o: handler.cc ha_ndbcluster.h + $(CXXCOMPILE) @ndbcluster_includes@ $(CXXFLAGS) -c $< + # For testing of udf_example.so; Works on platforms with gcc # (This is not part of our build process but only provided as an example) udf_example.so: udf_example.cc diff --git a/sql/authors.h b/sql/authors.h new file mode 100644 index 00000000000..ab5e367d707 --- /dev/null +++ b/sql/authors.h @@ -0,0 +1,34 @@ +/*************************************************************************** +** Output from "SHOW AUTHORS" +** If you can update it, you get to be in it :) +** Dont be offended if your name is not in here, just add it! +** IMPORTANT: Names should be added in alphabetical order +***************************************************************************/ + +struct show_table_authors_st { + const char *name; + const char *location; + const char *comment; +}; + +struct show_table_authors_st show_table_authors[]= { + { "Brian \"Krow\" Aker", "Seattle, WA. USA", + "Architecture, archive, federated, buncha of little stuff :)" }, + { "David Axmark", "Uppsala, Sweden", "Small stuff long time ago, Monty ripped it out!"}, + { "Omer BarNir", "Sunnyvale, CA. USA", "Testing (sometimes) and general QA stuff"}, + { "Nikolay Grishakin", "Austin, TX. USA", "Testing - Server"}, + { "Serge Kozlov", "Velikie Luki, Russia", "Testing - Cluster"}, + { "Matthias Leich", "Berlin, Germany", "Testing - Server"}, + { "Jonathan (Jeb) Miller", "Kyle, TX. USA", "Testing - Cluster, Replication"}, + { "Carsten Segieth (Pino)", "Fredersdorf, Germany", "Testing - Server"}, + { "Punita Srivastava", "Austin, TX. USA", "Testing - Merlin"}, + { "Alexey Stroganov (Ranger)", "Lugansk, Ukraine", "Testing - Benchmarks"}, + { "Oleksandr Byelkin", "Lugansk, Ukraine", "Query Cache (4.0), Subqueries (4.1), Views (5.0)"}, + { "Dmitri Lenev", "Moscow, Russia", "Time zones support (4.1), Triggers (5.0)"}, + { "Konstantin Osipov", "Moscow, Russia", "Prepared statements (4.1), Cursors (5.0)"}, + { "Petr Chardin", "Moscow, Russia", "Instance Manager (5.0)" }, + { "Sergey Vojtovich", "Izhevsk, Russia", "Plugins infrastructure (5.1)" }, + { "Alexander Nozdrin", "Moscow, Russia", "Bugfixing (Stored Procedures, 5.0)" }, + { "Per-Erik Martin", "Uppsala, Sweden", "Stored Procedures (5.0)" }, + {NULL, NULL, NULL} +}; diff --git a/sql/examples/ha_example.cc b/sql/examples/ha_example.cc index d340b9289ec..68aed7c6483 100644 --- a/sql/examples/ha_example.cc +++ b/sql/examples/ha_example.cc @@ -69,9 +69,9 @@ #include "../mysql_priv.h" -#ifdef HAVE_EXAMPLE_DB #include "ha_example.h" +static handler* example_create_handler(TABLE *table); handlerton example_hton= { "EXAMPLE", @@ -94,6 +94,15 @@ handlerton example_hton= { NULL, /* create_cursor_read_view */ NULL, /* set_cursor_read_view */ NULL, /* close_cursor_read_view */ + example_create_handler, /* Create a new handler */ + NULL, /* Drop a database */ + NULL, /* Panic call */ + NULL, /* Release temporary latches */ + NULL, /* Update Statistics */ + NULL, /* Start Consistent Snapshot */ + NULL, /* Flush logs */ + NULL, /* Show status */ + NULL, /* Replication Report Sent Binlog */ HTON_CAN_RECREATE }; @@ -204,6 +213,12 @@ static int free_share(EXAMPLE_SHARE *share) } +static handler* example_create_handler(TABLE *table) +{ + return new ha_example(table); +} + + ha_example::ha_example(TABLE *table_arg) :handler(&example_hton, table_arg) {} @@ -696,4 +711,3 @@ int ha_example::create(const char *name, TABLE *table_arg, /* This is not implemented but we want someone to be able that it works. */ DBUG_RETURN(0); } -#endif /* HAVE_EXAMPLE_DB */ diff --git a/sql/examples/ha_example.h b/sql/examples/ha_example.h index 37f38fe5210..d2ec83a5837 100644 --- a/sql/examples/ha_example.h +++ b/sql/examples/ha_example.h @@ -152,3 +152,4 @@ public: THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to, enum thr_lock_type lock_type); //required }; + diff --git a/sql/examples/ha_tina.cc b/sql/examples/ha_tina.cc index e00657c66ba..6bb883f91e0 100644 --- a/sql/examples/ha_tina.cc +++ b/sql/examples/ha_tina.cc @@ -17,18 +17,22 @@ /* Make sure to look at ha_tina.h for more details. - First off, this is a play thing for me, there are a number of things wrong with it: - *) It was designed for csv and therefor its performance is highly questionable. - *) Indexes have not been implemented. This is because the files can be traded in - and out of the table directory without having to worry about rebuilding anything. - *) NULLs and "" are treated equally (like a spreadsheet). - *) There was in the beginning no point to anyone seeing this other then me, so there - is a good chance that I haven't quite documented it well. - *) Less design, more "make it work" - - Now there are a few cool things with it: - *) Errors can result in corrupted data files. - *) Data files can be read by spreadsheets directly. + First off, this is a play thing for me, there are a number of things + wrong with it: + *) It was designed for csv and therefore its performance is highly + questionable. + *) Indexes have not been implemented. This is because the files can + be traded in and out of the table directory without having to worry + about rebuilding anything. + *) NULLs and "" are treated equally (like a spreadsheet). + *) There was in the beginning no point to anyone seeing this other + then me, so there is a good chance that I haven't quite documented + it well. + *) Less design, more "make it work" + + Now there are a few cool things with it: + *) Errors can result in corrupted data files. + *) Data files can be read by spreadsheets directly. TODO: *) Move to a block system for larger files @@ -44,8 +48,6 @@ TODO: #include "mysql_priv.h" -#ifdef HAVE_CSV_DB - #include "ha_tina.h" #include <sys/mman.h> @@ -53,6 +55,7 @@ TODO: pthread_mutex_t tina_mutex; static HASH tina_open_tables; static int tina_init= 0; +static handler* tina_create_handler(TABLE *table); handlerton tina_hton= { "CSV", @@ -75,6 +78,15 @@ handlerton tina_hton= { NULL, /* create_cursor_read_view */ NULL, /* set_cursor_read_view */ NULL, /* close_cursor_read_view */ + tina_create_handler, /* Create a new handler */ + NULL, /* Drop a database */ + tina_end, /* Panic call */ + NULL, /* Release temporary latches */ + NULL, /* Update Statistics */ + NULL, /* Start Consistent Snapshot */ + NULL, /* Flush logs */ + NULL, /* Show status */ + NULL, /* Replication Report Sent Binlog */ HTON_CAN_RECREATE }; @@ -107,23 +119,24 @@ static byte* tina_get_key(TINA_SHARE *share,uint *length, int get_mmap(TINA_SHARE *share, int write) { DBUG_ENTER("ha_tina::get_mmap"); - if (share->mapped_file && munmap(share->mapped_file, share->file_stat.st_size)) + if (share->mapped_file && my_munmap(share->mapped_file, + share->file_stat.st_size)) DBUG_RETURN(1); if (my_fstat(share->data_file, &share->file_stat, MYF(MY_WME)) == -1) DBUG_RETURN(1); - if (share->file_stat.st_size) + if (share->file_stat.st_size) { if (write) - share->mapped_file= (byte *)mmap(NULL, share->file_stat.st_size, - PROT_READ|PROT_WRITE, MAP_SHARED, - share->data_file, 0); + share->mapped_file= (byte *)my_mmap(NULL, share->file_stat.st_size, + PROT_READ|PROT_WRITE, MAP_SHARED, + share->data_file, 0); else - share->mapped_file= (byte *)mmap(NULL, share->file_stat.st_size, - PROT_READ, MAP_PRIVATE, - share->data_file, 0); - if ((share->mapped_file ==(caddr_t)-1)) + share->mapped_file= (byte *)my_mmap(NULL, share->file_stat.st_size, + PROT_READ, MAP_PRIVATE, + share->data_file, 0); + if ((share->mapped_file ==(caddr_t)-1)) { /* Bad idea you think? See the problem is that nothing actually checks @@ -136,7 +149,7 @@ int get_mmap(TINA_SHARE *share, int write) DBUG_RETURN(1); } } - else + else share->mapped_file= NULL; DBUG_RETURN(0); @@ -174,17 +187,18 @@ static TINA_SHARE *get_share(const char *table_name, TABLE *table) if (!my_multi_malloc(MYF(MY_WME | MY_ZEROFILL), &share, sizeof(*share), &tmp_name, length+1, - NullS)) + NullS)) { pthread_mutex_unlock(&tina_mutex); return NULL; } - share->use_count=0; - share->table_name_length=length; - share->table_name=tmp_name; - strmov(share->table_name,table_name); - fn_format(data_file_name, table_name, "", ".CSV",MY_REPLACE_EXT|MY_UNPACK_FILENAME); + share->use_count= 0; + share->table_name_length= length; + share->table_name= tmp_name; + strmov(share->table_name, table_name); + fn_format(data_file_name, table_name, "", ".CSV", + MY_REPLACE_EXT|MY_UNPACK_FILENAME); if (my_hash_insert(&tina_open_tables, (byte*) share)) goto error; thr_lock_init(&share->lock); @@ -194,11 +208,14 @@ static TINA_SHARE *get_share(const char *table_name, TABLE *table) MYF(0))) == -1) goto error2; - /* We only use share->data_file for writing, so we scan to the end to append */ + /* + We only use share->data_file for writing, so we scan to + the end to append + */ if (my_seek(share->data_file, 0, SEEK_END, MYF(0)) == MY_FILEPOS_ERROR) goto error2; - share->mapped_file= NULL; // We don't know the state since we just allocated it + share->mapped_file= NULL; // We don't know the state as we just allocated it if (get_mmap(share, 0) > 0) goto error3; } @@ -220,7 +237,7 @@ error: } -/* +/* Free lock controls. */ static int free_share(TINA_SHARE *share) @@ -230,8 +247,8 @@ static int free_share(TINA_SHARE *share) int result_code= 0; if (!--share->use_count){ /* Drop the mapped file */ - if (share->mapped_file) - munmap(share->mapped_file, share->file_stat.st_size); + if (share->mapped_file) + my_munmap(share->mapped_file, share->file_stat.st_size); result_code= my_close(share->data_file,MYF(0)); hash_delete(&tina_open_tables, (byte*) share); thr_lock_delete(&share->lock); @@ -243,7 +260,7 @@ static int free_share(TINA_SHARE *share) DBUG_RETURN(result_code); } -bool tina_end() +int tina_end(ha_panic_function type) { if (tina_init) { @@ -251,16 +268,16 @@ bool tina_end() VOID(pthread_mutex_destroy(&tina_mutex)); } tina_init= 0; - return FALSE; + return 0; } -/* +/* Finds the end of a line. Currently only supports files written on a UNIX OS. */ -byte * find_eoln(byte *data, off_t begin, off_t end) +byte * find_eoln(byte *data, off_t begin, off_t end) { - for (off_t x= begin; x < end; x++) + for (off_t x= begin; x < end; x++) if (data[x] == '\n') return data + x; @@ -268,13 +285,20 @@ byte * find_eoln(byte *data, off_t begin, off_t end) } +static handler* tina_create_handler(TABLE *table) +{ + return new ha_tina(table); +} + + ha_tina::ha_tina(TABLE *table_arg) :handler(&tina_hton, table_arg), /* These definitions are found in hanler.h These are not probably completely right. */ - current_position(0), next_position(0), chain_alloced(0), chain_size(DEFAULT_CHAIN_LENGTH) + current_position(0), next_position(0), chain_alloced(0), + chain_size(DEFAULT_CHAIN_LENGTH) { /* Set our original buffers from pre-allocated memory */ buffer.set(byte_buffer, IO_SIZE, system_charset_info); @@ -284,7 +308,7 @@ ha_tina::ha_tina(TABLE *table_arg) /* Encode a buffer into the quoted format. */ -int ha_tina::encode_quote(byte *buf) +int ha_tina::encode_quote(byte *buf) { char attribute_buffer[1024]; String attribute(attribute_buffer, sizeof(attribute_buffer), &my_charset_bin); @@ -342,13 +366,15 @@ int ha_tina::encode_quote(byte *buf) } /* - chain_append() adds delete positions to the chain that we use to keep track of space. + chain_append() adds delete positions to the chain that we use to keep + track of space. Then the chain will be used to cleanup "holes", occured + due to deletes and updates. */ int ha_tina::chain_append() { if ( chain_ptr != chain && (chain_ptr -1)->end == current_position) (chain_ptr -1)->end= next_position; - else + else { /* We set up for the next position */ if ((off_t)(chain_ptr - chain) == (chain_size -1)) @@ -358,12 +384,14 @@ int ha_tina::chain_append() if (chain_alloced) { /* Must cast since my_malloc unlike malloc doesn't have a void ptr */ - if ((chain= (tina_set *)my_realloc((gptr)chain,chain_size,MYF(MY_WME))) == NULL) + if ((chain= (tina_set *) my_realloc((gptr)chain, + chain_size, MYF(MY_WME))) == NULL) return -1; } else { - tina_set *ptr= (tina_set *)my_malloc(chain_size * sizeof(tina_set),MYF(MY_WME)); + tina_set *ptr= (tina_set *) my_malloc(chain_size * sizeof(tina_set), + MYF(MY_WME)); memcpy(ptr, chain, DEFAULT_CHAIN_LENGTH * sizeof(tina_set)); chain= ptr; chain_alloced++; @@ -379,7 +407,7 @@ int ha_tina::chain_append() } -/* +/* Scans for a row. */ int ha_tina::find_current_row(byte *buf) @@ -389,7 +417,8 @@ int ha_tina::find_current_row(byte *buf) DBUG_ENTER("ha_tina::find_current_row"); /* EOF should be counted as new line */ - if ((end_ptr= find_eoln(share->mapped_file, current_position, share->file_stat.st_size)) == 0) + if ((end_ptr= find_eoln(share->mapped_file, current_position, + share->file_stat.st_size)) == 0) DBUG_RETURN(HA_ERR_END_OF_FILE); for (Field **field=table->field ; *field ; field++) @@ -398,14 +427,15 @@ int ha_tina::find_current_row(byte *buf) mapped_ptr++; // Increment past the first quote for(;mapped_ptr != end_ptr; mapped_ptr++) { - //Need to convert line feeds! - if (*mapped_ptr == '"' && - (((mapped_ptr[1] == ',') && (mapped_ptr[2] == '"')) || (mapped_ptr == end_ptr -1 ))) + // Need to convert line feeds! + if (*mapped_ptr == '"' && + (((mapped_ptr[1] == ',') && (mapped_ptr[2] == '"')) || + (mapped_ptr == end_ptr -1 ))) { mapped_ptr += 2; // Move past the , and the " break; - } - if (*mapped_ptr == '\\' && mapped_ptr != (end_ptr - 1)) + } + if (*mapped_ptr == '\\' && mapped_ptr != (end_ptr - 1)) { mapped_ptr++; if (*mapped_ptr == 'r') @@ -419,7 +449,7 @@ int ha_tina::find_current_row(byte *buf) buffer.append('\\'); buffer.append(*mapped_ptr); } - } + } else buffer.append(*mapped_ptr); } @@ -447,7 +477,7 @@ const char **ha_tina::bas_ext() const } -/* +/* Open a database file. Keep in mind that tables are caches, so this will not be called for every request. Any sort of positions that need to be reset should be kept in the ::extra() call. @@ -475,7 +505,7 @@ int ha_tina::close(void) DBUG_RETURN(free_share(share)); } -/* +/* This is an INSERT. At the moment this handler just seeks to the end of the file and appends the data. In an error case it really should just truncate to the original position (this is not done yet). @@ -495,22 +525,22 @@ int ha_tina::write_row(byte * buf) if (my_write(share->data_file, buffer.ptr(), size, MYF(MY_WME | MY_NABP))) DBUG_RETURN(-1); - /* - Ok, this is means that we will be doing potentially bad things + /* + Ok, this is means that we will be doing potentially bad things during a bulk insert on some OS'es. What we need is a cleanup call for ::write_row that would let us fix up everything after the bulk insert. The archive handler does this with an extra mutx call, which might be a solution for this. */ - if (get_mmap(share, 0) > 0) + if (get_mmap(share, 0) > 0) DBUG_RETURN(-1); DBUG_RETURN(0); } -/* +/* This is called for an update. - Make sure you put in code to increment the auto increment, also + Make sure you put in code to increment the auto increment, also update any timestamp data. Currently auto increment is not being fixed since autoincrements have yet to be added to this table handler. This will be called in a table scan right before the previous ::rnd_next() @@ -538,18 +568,20 @@ int ha_tina::update_row(const byte * old_data, byte * new_data) } -/* - Deletes a row. First the database will find the row, and then call this method. - In the case of a table scan, the previous call to this will be the ::rnd_next() - that found this row. - The exception to this is an ORDER BY. This will cause the table handler to walk - the table noting the positions of all rows that match a query. The table will - then be deleted/positioned based on the ORDER (so RANDOM, DESC, ASC). +/* + Deletes a row. First the database will find the row, and then call this + method. In the case of a table scan, the previous call to this will be + the ::rnd_next() that found this row. + The exception to this is an ORDER BY. This will cause the table handler + to walk the table noting the positions of all rows that match a query. + The table will then be deleted/positioned based on the ORDER (so RANDOM, + DESC, ASC). */ int ha_tina::delete_row(const byte * buf) { DBUG_ENTER("ha_tina::delete_row"); - statistic_increment(table->in_use->status_var.ha_delete_count,&LOCK_status); + statistic_increment(table->in_use->status_var.ha_delete_count, + &LOCK_status); if (chain_append()) DBUG_RETURN(-1); @@ -560,7 +592,7 @@ int ha_tina::delete_row(const byte * buf) } /* - Fill buf with value from key. Simply this is used for a single index read + Fill buf with value from key. Simply this is used for a single index read with a key. */ int ha_tina::index_read(byte * buf, const byte * key, @@ -574,7 +606,7 @@ int ha_tina::index_read(byte * buf, const byte * key, } /* - Fill buf with value from key. Simply this is used for a single index read + Fill buf with value from key. Simply this is used for a single index read with a key. Whatever the current key is we will use it. This is what will be in "index". */ @@ -631,8 +663,8 @@ int ha_tina::index_last(byte * buf) DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED); } -/* - All table scans call this first. +/* + All table scans call this first. The order of a table scan is: ha_tina::store_lock @@ -656,8 +688,8 @@ int ha_tina::index_last(byte * buf) ha_tina::extra ENUM HA_EXTRA_RESET Reset database to after open - Each call to ::rnd_next() represents a row returned in the can. When no more - rows can be returned, rnd_next() returns a value of HA_ERR_END_OF_FILE. + Each call to ::rnd_next() represents a row returned in the can. When no more + rows can be returned, rnd_next() returns a value of HA_ERR_END_OF_FILE. The ::info() call is just for the optimizer. */ @@ -671,22 +703,26 @@ int ha_tina::rnd_init(bool scan) chain_ptr= chain; #ifdef HAVE_MADVISE if (scan) - (void)madvise(share->mapped_file,share->file_stat.st_size,MADV_SEQUENTIAL); + (void) madvise(share->mapped_file, share->file_stat.st_size, + MADV_SEQUENTIAL); #endif DBUG_RETURN(0); } /* - ::rnd_next() does all the heavy lifting for a table scan. You will need to populate *buf - with the correct field data. You can walk the field to determine at what position you - should store the data (take a look at how ::find_current_row() works). The structure - is something like: + ::rnd_next() does all the heavy lifting for a table scan. You will need to + populate *buf with the correct field data. You can walk the field to + determine at what position you should store the data (take a look at how + ::find_current_row() works). The structure is something like: 0Foo Dog Friend - The first offset is for the first attribute. All space before that is reserved for null count. - Basically this works as a mask for which rows are nulled (compared to just empty). - This table handler doesn't do nulls and does not know the difference between NULL and "". This - is ok since this table handler is for spreadsheets and they don't know about them either :) + The first offset is for the first attribute. All space before that is + reserved for null count. + Basically this works as a mask for which rows are nulled (compared to just + empty). + This table handler doesn't do nulls and does not know the difference between + NULL and "". This is ok since this table handler is for spreadsheets and + they don't know about them either :) */ int ha_tina::rnd_next(byte *buf) { @@ -696,9 +732,9 @@ int ha_tina::rnd_next(byte *buf) &LOCK_status); current_position= next_position; - if (!share->mapped_file) + if (!share->mapped_file) DBUG_RETURN(HA_ERR_END_OF_FILE); - if (HA_ERR_END_OF_FILE == find_current_row(buf) ) + if (HA_ERR_END_OF_FILE == find_current_row(buf) ) DBUG_RETURN(HA_ERR_END_OF_FILE); records++; @@ -707,11 +743,11 @@ int ha_tina::rnd_next(byte *buf) /* In the case of an order by rows will need to be sorted. - ::position() is called after each call to ::rnd_next(), + ::position() is called after each call to ::rnd_next(), the data it stores is to a byte array. You can store this - data via my_store_ptr(). ref_length is a variable defined to the - class that is the sizeof() of position being stored. In our case - its just a position. Look at the bdb code if you want to see a case + data via my_store_ptr(). ref_length is a variable defined to the + class that is the sizeof() of position being stored. In our case + its just a position. Look at the bdb code if you want to see a case where something other then a number is stored. */ void ha_tina::position(const byte *record) @@ -722,8 +758,8 @@ void ha_tina::position(const byte *record) } -/* - Used to fetch a row from a posiion stored with ::position(). +/* + Used to fetch a row from a posiion stored with ::position(). my_get_ptr() retrieves the data for you. */ @@ -745,7 +781,7 @@ void ha_tina::info(uint flag) { DBUG_ENTER("ha_tina::info"); /* This is a lie, but you don't want the optimizer to see zero or 1 */ - if (records < 2) + if (records < 2) records= 2; DBUG_VOID_RETURN; } @@ -761,7 +797,7 @@ int ha_tina::extra(enum ha_extra_function operation) DBUG_RETURN(0); } -/* +/* This is no longer used. */ int ha_tina::reset(void) @@ -773,8 +809,10 @@ int ha_tina::reset(void) /* - Called after deletes, inserts, and updates. This is where we clean up all of - the dead space we have collected while writing the file. + Called after each table scan. In particular after deletes, + and updates. In the last case we employ chain of deleted + slots to clean up all of the dead space we have collected while + performing deletes/updates. */ int ha_tina::rnd_end() { @@ -786,11 +824,11 @@ int ha_tina::rnd_end() tina_set *ptr; off_t length; - /* + /* Setting up writable map, this will contain all of the data after the get_mmap call that we have added to the file. */ - if (get_mmap(share, 1) > 0) + if (get_mmap(share, 1) > 0) DBUG_RETURN(-1); length= share->file_stat.st_size; @@ -799,7 +837,8 @@ int ha_tina::rnd_end() It also sorts so that we move the final blocks to the beginning so that we move the smallest amount of data possible. */ - qsort(chain, (size_t)(chain_ptr - chain), sizeof(tina_set), (qsort_cmp)sort_set); + qsort(chain, (size_t)(chain_ptr - chain), sizeof(tina_set), + (qsort_cmp)sort_set); for (ptr= chain; ptr < chain_ptr; ptr++) { memmove(share->mapped_file + ptr->begin, share->mapped_file + ptr->end, @@ -811,20 +850,20 @@ int ha_tina::rnd_end() if (my_chsize(share->data_file, length, 0, MYF(MY_WME))) DBUG_RETURN(-1); - if (munmap(share->mapped_file, length)) + if (my_munmap(share->mapped_file, length)) DBUG_RETURN(-1); /* We set it to null so that get_mmap() won't try to unmap it */ share->mapped_file= NULL; - if (get_mmap(share, 0) > 0) + if (get_mmap(share, 0) > 0) DBUG_RETURN(-1); } DBUG_RETURN(0); } -/* - Truncate table and others of its ilk call this. +/* + Truncate table and others of its ilk call this. */ int ha_tina::delete_all_rows() { @@ -832,7 +871,7 @@ int ha_tina::delete_all_rows() int rc= my_chsize(share->data_file, 0, 0, MYF(MY_WME)); - if (get_mmap(share, 0) > 0) + if (get_mmap(share, 0) > 0) DBUG_RETURN(-1); DBUG_RETURN(rc); @@ -847,7 +886,7 @@ int ha_tina::external_lock(THD *thd, int lock_type) DBUG_RETURN(0); // No external locking } -/* +/* Called by the database to lock the table. Keep in mind that this is an internal lock. */ @@ -866,13 +905,15 @@ THR_LOCK_DATA **ha_tina::store_lock(THD *thd, this (the database will call ::open() if it needs to). */ -int ha_tina::create(const char *name, TABLE *table_arg, HA_CREATE_INFO *create_info) +int ha_tina::create(const char *name, TABLE *table_arg, + HA_CREATE_INFO *create_info) { char name_buff[FN_REFLEN]; File create_file; DBUG_ENTER("ha_tina::create"); - if ((create_file= my_create(fn_format(name_buff,name,"",".CSV",MY_REPLACE_EXT|MY_UNPACK_FILENAME),0, + if ((create_file= my_create(fn_format(name_buff, name, "", ".CSV", + MY_REPLACE_EXT|MY_UNPACK_FILENAME),0, O_RDWR | O_TRUNC,MYF(MY_WME))) < 0) DBUG_RETURN(-1); @@ -881,4 +922,3 @@ int ha_tina::create(const char *name, TABLE *table_arg, HA_CREATE_INFO *create_i DBUG_RETURN(0); } -#endif /* enable CSV */ diff --git a/sql/examples/ha_tina.h b/sql/examples/ha_tina.h index 84854e868fa..2de6d8c8257 100644 --- a/sql/examples/ha_tina.h +++ b/sql/examples/ha_tina.h @@ -43,6 +43,11 @@ class ha_tina: public handler off_t next_position; /* Next position in the file scan */ byte byte_buffer[IO_SIZE]; String buffer; + /* + The chain contains "holes" in the file, occured because of + deletes/updates. It is used in rnd_end() to get rid of them + in the end of the query. + */ tina_set chain_buffer[DEFAULT_CHAIN_LENGTH]; tina_set *chain; tina_set *chain_ptr; @@ -120,5 +125,5 @@ public: int chain_append(); }; -bool tina_end(); +int tina_end(ha_panic_function type); diff --git a/sql/field.cc b/sql/field.cc index b70e2a92618..55d6bf7e3a2 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -67,6 +67,7 @@ inline int field_type2index (enum_field_types field_type) ((int)FIELDTYPE_TEAR_FROM) + (field_type - FIELDTYPE_TEAR_TO) - 1); } + static enum_field_types field_types_merge_rules [FIELDTYPE_NUM][FIELDTYPE_NUM]= { /* MYSQL_TYPE_DECIMAL -> */ @@ -1233,6 +1234,7 @@ Field::Field(char *ptr_arg,uint32 length_arg,uchar *null_ptr_arg, flags=null_ptr ? 0: NOT_NULL_FLAG; comment.str= (char*) ""; comment.length=0; + fieldnr= 0; } uint Field::offset() @@ -5951,6 +5953,26 @@ int Field_str::store(double nr) } +uint Field::is_equal(create_field *new_field) +{ + return (new_field->sql_type == type()); +} + + +uint Field_str::is_equal(create_field *new_field) +{ + if (((new_field->flags & (BINCMP_FLAG | BINARY_FLAG)) && + !(flags & (BINCMP_FLAG | BINARY_FLAG))) || + (!(new_field->flags & (BINCMP_FLAG | BINARY_FLAG)) && + (flags & (BINCMP_FLAG | BINARY_FLAG)))) + return 0; /* One of the fields is binary and the other one isn't */ + + return ((new_field->sql_type == type()) && + new_field->charset == field_charset && + new_field->length == max_length()); +} + + int Field_string::store(longlong nr, bool unsigned_val) { char buff[64]; @@ -6342,7 +6364,8 @@ my_decimal *Field_varstring::val_decimal(my_decimal *decimal_value) } -int Field_varstring::cmp(const char *a_ptr, const char *b_ptr) +int Field_varstring::cmp_max(const char *a_ptr, const char *b_ptr, + uint max_len) { uint a_length, b_length; int diff; @@ -6357,6 +6380,8 @@ int Field_varstring::cmp(const char *a_ptr, const char *b_ptr) a_length= uint2korr(a_ptr); b_length= uint2korr(b_ptr); } + set_if_smaller(a_length, max_len); + set_if_smaller(b_length, max_len); diff= field_charset->coll->strnncollsp(field_charset, (const uchar*) a_ptr+ length_bytes, @@ -6731,6 +6756,22 @@ Field *Field_varstring::new_key_field(MEM_ROOT *root, } +uint Field_varstring::is_equal(create_field *new_field) +{ + if (new_field->sql_type == type() && + new_field->charset == field_charset) + { + if (new_field->length == max_length()) + return IS_EQUAL_YES; + if (new_field->length > max_length() && + ((new_field->length <= 255 && max_length() <= 255) || + (new_field->length > 255 && max_length() > 255))) + return IS_EQUAL_PACK_LENGTH; // VARCHAR, longer variable length + } + return IS_EQUAL_NO; +} + + /**************************************************************************** ** blob type ** A blob is saved as a length and a pointer. The length is stored in the @@ -7002,13 +7043,16 @@ int Field_blob::cmp(const char *a,uint32 a_length, const char *b, } -int Field_blob::cmp(const char *a_ptr, const char *b_ptr) +int Field_blob::cmp_max(const char *a_ptr, const char *b_ptr, + uint max_length) { char *blob1,*blob2; memcpy_fixed(&blob1,a_ptr+packlength,sizeof(char*)); memcpy_fixed(&blob2,b_ptr+packlength,sizeof(char*)); - return Field_blob::cmp(blob1,get_length(a_ptr), - blob2,get_length(b_ptr)); + uint a_len= get_length(a_ptr), b_len= get_length(b_ptr); + set_if_smaller(a_len, max_length); + set_if_smaller(b_len, max_length); + return Field_blob::cmp(blob1,a_len,blob2,b_len); } @@ -7871,6 +7915,17 @@ bool Field_num::eq_def(Field *field) } +uint Field_num::is_equal(create_field *new_field) +{ + return ((new_field->sql_type == type()) && + ((new_field->flags & UNSIGNED_FLAG) == (uint) (flags & + UNSIGNED_FLAG)) && + ((new_field->flags & AUTO_INCREMENT_FLAG) == + (uint) (flags & AUTO_INCREMENT_FLAG)) && + (new_field->length >= max_length())); +} + + /* Bit field. @@ -8058,6 +8113,35 @@ my_decimal *Field_bit::val_decimal(my_decimal *deciaml_value) } +/* + Compare two bit fields using pointers within the record. + SYNOPSIS + cmp_max() + a Pointer to field->ptr in first record + b Pointer to field->ptr in second record + max_len Maximum length used in index + DESCRIPTION + This method is used from key_rec_cmp used by merge sorts used + by partitioned index read and later other similar places. + The a and b pointer must be pointers to the field in a record + (not the table->record[0] necessarily) +*/ +int Field_bit::cmp_max(const char *a, const char *b, uint max_len) +{ + my_ptrdiff_t a_diff= a - ptr; + my_ptrdiff_t b_diff= b - ptr; + if (bit_len) + { + int flag; + uchar bits_a= get_rec_bits(bit_ptr+a_diff, bit_ofs, bit_len); + uchar bits_b= get_rec_bits(bit_ptr+b_diff, bit_ofs, bit_len); + if ((flag= (int) (bits_a - bits_b))) + return flag; + } + return memcmp(a, b, field_length); +} + + int Field_bit::key_cmp(const byte *str, uint length) { if (bit_len) diff --git a/sql/field.h b/sql/field.h index ed6bf1c0a9c..7446875d864 100644 --- a/sql/field.h +++ b/sql/field.h @@ -29,6 +29,7 @@ class Send_field; class Protocol; +class create_field; struct st_cache_field; void field_conv(Field *to,Field *from); @@ -87,7 +88,11 @@ public: utype unireg_check; uint32 field_length; // Length of field uint field_index; // field number in fields array - uint16 flags; + uint32 flags; + /* fieldnr is the id of the field (first field = 1) as is also + used in key_part. + */ + uint16 fieldnr; uchar null_bit; // Bit used to test null bit Field(char *ptr_arg,uint32 length_arg,uchar *null_ptr_arg,uchar null_bit_arg, @@ -151,6 +156,8 @@ public: virtual enum_field_types type() const =0; virtual enum_field_types real_type() const { return type(); } inline int cmp(const char *str) { return cmp(ptr,str); } + virtual int cmp_max(const char *a, const char *b, uint max_len) + { return cmp(a, b); } virtual int cmp(const char *,const char *)=0; virtual int cmp_binary(const char *a,const char *b, uint32 max_length=~0L) { return memcmp(a,b,pack_length()); } @@ -180,6 +187,12 @@ public: return test(record[(uint) (null_ptr - (uchar*) table->record[0])] & null_bit); } + inline bool is_null_in_record_with_offset(my_ptrdiff_t offset) + { + if (!null_ptr) + return 0; + return test(null_ptr[offset] & null_bit); + } inline void set_null(int row_offset=0) { if (null_ptr) null_ptr[row_offset]|= null_bit; } inline void set_notnull(int row_offset=0) @@ -304,6 +317,8 @@ public: int warn_if_overflow(int op_result); /* maximum possible display length */ virtual uint32 max_length()= 0; + + virtual uint is_equal(create_field *new_field); /* convert decimal to longlong with overflow check */ longlong convert_decimal2longlong(const my_decimal *val, bool unsigned_flag, int *err); @@ -344,6 +359,7 @@ public: bool eq_def(Field *field); int store_decimal(const my_decimal *); my_decimal *val_decimal(my_decimal *); + uint is_equal(create_field *new_field); }; @@ -368,6 +384,7 @@ public: uint32 max_length() { return field_length; } friend class create_field; my_decimal *val_decimal(my_decimal *); + uint is_equal(create_field *new_field); }; @@ -1061,7 +1078,11 @@ public: longlong val_int(void); String *val_str(String*,String *); my_decimal *val_decimal(my_decimal *); - int cmp(const char *,const char*); + int cmp_max(const char *, const char *, uint max_length); + int cmp(const char *a,const char*b) + { + return cmp_max(a, b, ~0L); + } void sort_string(char *buff,uint length); void get_key_image(char *buff,uint length, imagetype type); void set_key_image(char *buff,uint length); @@ -1087,6 +1108,7 @@ public: Field *new_key_field(MEM_ROOT *root, struct st_table *new_table, char *new_ptr, uchar *new_null_ptr, uint new_null_bit); + uint is_equal(create_field *new_field); }; @@ -1117,7 +1139,9 @@ public: longlong val_int(void); String *val_str(String*,String *); my_decimal *val_decimal(my_decimal *); - int cmp(const char *,const char*); + int cmp_max(const char *, const char *, uint max_length); + int cmp(const char *a,const char*b) + { return cmp_max(a, b, ~0L); } int cmp(const char *a, uint32 a_length, const char *b, uint32 b_length); int cmp_binary(const char *a,const char *b, uint32 max_length=~0L); int key_cmp(const byte *,const byte*); @@ -1142,6 +1166,10 @@ public: { memcpy_fixed(str,ptr+packlength,sizeof(char*)); } + inline void get_ptr(char **str, uint row_offset) + { + memcpy_fixed(str,ptr+packlength+row_offset,sizeof(char*)); + } inline void set_ptr(char *length,char *data) { memcpy(ptr,length,packlength); @@ -1311,6 +1339,7 @@ public: my_decimal *val_decimal(my_decimal *); int cmp(const char *a, const char *b) { return cmp_binary(a, b); } + int cmp_max(const char *a, const char *b, uint max_length); int key_cmp(const byte *a, const byte *b) { return cmp_binary((char *) a, (char *) b); } int key_cmp(const byte *str, uint length); diff --git a/sql/ha_archive.cc b/sql/ha_archive.cc index 1e8fc582eb8..488343c5402 100644 --- a/sql/ha_archive.cc +++ b/sql/ha_archive.cc @@ -20,7 +20,6 @@ #include "mysql_priv.h" -#ifdef HAVE_ARCHIVE_DB #include "ha_archive.h" #include <my_dir.h> @@ -135,6 +134,10 @@ static HASH archive_open_tables; #define DATA_BUFFER_SIZE 2 // Size of the data used in the data file #define ARCHIVE_CHECK_HEADER 254 // The number we use to determine corruption +/* Static declarations for handerton */ +static handler *archive_create_handler(TABLE *table); + + /* dummy handlerton - only to have something to return from archive_db_init */ handlerton archive_hton = { "ARCHIVE", @@ -157,9 +160,22 @@ handlerton archive_hton = { NULL, /* create_cursor_read_view */ NULL, /* set_cursor_read_view */ NULL, /* close_cursor_read_view */ + archive_create_handler, /* Create a new handler */ + NULL, /* Drop a database */ + archive_db_end, /* Panic call */ + NULL, /* Release temporary latches */ + NULL, /* Update Statistics */ + NULL, /* Start Consistent Snapshot */ + NULL, /* Flush logs */ + NULL, /* Show status */ + NULL, /* Replication Report Sent Binlog */ HTON_NO_FLAGS }; +static handler *archive_create_handler(TABLE *table) +{ + return new ha_archive(table); +} /* Used for hash table that tracks open tables. @@ -215,7 +231,7 @@ error: FALSE OK */ -bool archive_db_end() +int archive_db_end(ha_panic_function type) { if (archive_inited) { @@ -223,7 +239,7 @@ bool archive_db_end() VOID(pthread_mutex_destroy(&archive_mutex)); } archive_inited= 0; - return FALSE; + return 0; } ha_archive::ha_archive(TABLE *table_arg) @@ -1130,4 +1146,3 @@ bool ha_archive::check_and_repair(THD *thd) DBUG_RETURN(HA_ADMIN_OK); } } -#endif /* HAVE_ARCHIVE_DB */ diff --git a/sql/ha_archive.h b/sql/ha_archive.h index 56a4b9d1e27..6d7a8c05ac9 100644 --- a/sql/ha_archive.h +++ b/sql/ha_archive.h @@ -109,5 +109,5 @@ public: }; bool archive_db_init(void); -bool archive_db_end(void); +int archive_db_end(ha_panic_function type); diff --git a/sql/ha_berkeley.cc b/sql/ha_berkeley.cc index 72af402a0dc..30d63f1d6f6 100644 --- a/sql/ha_berkeley.cc +++ b/sql/ha_berkeley.cc @@ -53,7 +53,6 @@ #include "mysql_priv.h" -#ifdef HAVE_BERKELEY_DB #include <m_ctype.h> #include <myisampack.h> #include <hash.h> @@ -72,13 +71,18 @@ #define STATUS_ROW_COUNT_INIT 2 #define STATUS_BDB_ANALYZE 4 +const u_int32_t bdb_DB_TXN_NOSYNC= DB_TXN_NOSYNC; +const u_int32_t bdb_DB_RECOVER= DB_RECOVER; +const u_int32_t bdb_DB_PRIVATE= DB_PRIVATE; const char *ha_berkeley_ext=".db"; bool berkeley_shared_data=0; u_int32_t berkeley_init_flags= DB_PRIVATE | DB_RECOVER, berkeley_env_flags=0, berkeley_lock_type=DB_LOCK_DEFAULT; -ulong berkeley_cache_size, berkeley_log_buffer_size, berkeley_log_file_size=0; +ulong berkeley_log_buffer_size=0 , berkeley_log_file_size=0; +ulonglong berkeley_cache_size= 0; char *berkeley_home, *berkeley_tmpdir, *berkeley_logdir; long berkeley_lock_scan_time=0; +ulong berkeley_region_size=0, berkeley_cache_parts=1; ulong berkeley_trans_retry=1; ulong berkeley_max_lock; pthread_mutex_t bdb_mutex; @@ -87,13 +91,17 @@ static DB_ENV *db_env; static HASH bdb_open_tables; const char *berkeley_lock_names[] = -{ "DEFAULT", "OLDEST","RANDOM","YOUNGEST",0 }; +{ "DEFAULT", "OLDEST", "RANDOM", "YOUNGEST", "EXPIRE", "MAXLOCKS", + "MAXWRITE", "MINLOCKS", "MINWRITE", 0 }; u_int32_t berkeley_lock_types[]= -{ DB_LOCK_DEFAULT, DB_LOCK_OLDEST, DB_LOCK_RANDOM }; +{ DB_LOCK_DEFAULT, DB_LOCK_OLDEST, DB_LOCK_RANDOM, DB_LOCK_YOUNGEST, + DB_LOCK_EXPIRE, DB_LOCK_MAXLOCKS, DB_LOCK_MAXWRITE, DB_LOCK_MINLOCKS, + DB_LOCK_MINWRITE }; TYPELIB berkeley_lock_typelib= {array_elements(berkeley_lock_names)-1,"", berkeley_lock_names, NULL}; -static void berkeley_print_error(const char *db_errpfx, char *buffer); +static void berkeley_print_error(const DB_ENV *db_env, const char *db_errpfx, + const char *buffer); static byte* bdb_get_key(BDB_SHARE *share,uint *length, my_bool not_used __attribute__((unused))); static BDB_SHARE *get_share(const char *table_name, TABLE *table); @@ -106,6 +114,7 @@ static void berkeley_noticecall(DB_ENV *db_env, db_notices notice); static int berkeley_close_connection(THD *thd); static int berkeley_commit(THD *thd, bool all); static int berkeley_rollback(THD *thd, bool all); +static handler *berkeley_create_handler(TABLE *table); handlerton berkeley_hton = { "BerkeleyDB", @@ -128,9 +137,23 @@ handlerton berkeley_hton = { NULL, /* create_cursor_read_view */ NULL, /* set_cursor_read_view */ NULL, /* close_cursor_read_view */ - HTON_CLOSE_CURSORS_AT_COMMIT + berkeley_create_handler, /* Create a new handler */ + NULL, /* Drop a database */ + berkeley_end, /* Panic call */ + NULL, /* Release temporary latches */ + NULL, /* Update Statistics */ + NULL, /* Start Consistent Snapshot */ + berkeley_flush_logs, /* Flush logs */ + berkeley_show_status, /* Show status */ + NULL, /* Replication Report Sent Binlog */ + HTON_CLOSE_CURSORS_AT_COMMIT | HTON_FLUSH_AFTER_RENAME }; +handler *berkeley_create_handler(TABLE *table) +{ + return new ha_berkeley(table); +} + typedef struct st_berkeley_trx_data { DB_TXN *all; DB_TXN *stmt; @@ -183,13 +206,20 @@ bool berkeley_init(void) if (opt_endinfo) db_env->set_verbose(db_env, - DB_VERB_CHKPOINT | DB_VERB_DEADLOCK | DB_VERB_RECOVERY, + DB_VERB_DEADLOCK | DB_VERB_RECOVERY, 1); - db_env->set_cachesize(db_env, 0, berkeley_cache_size, 0); + if (berkeley_cache_size > (uint) ~0) + db_env->set_cachesize(db_env, berkeley_cache_size / (1024*1024L*1024L), + berkeley_cache_size % (1024L*1024L*1024L), + berkeley_cache_parts); + else + db_env->set_cachesize(db_env, 0, berkeley_cache_size, berkeley_cache_parts); + db_env->set_lg_max(db_env, berkeley_log_file_size); db_env->set_lg_bsize(db_env, berkeley_log_buffer_size); db_env->set_lk_detect(db_env, berkeley_lock_type); + db_env->set_lg_regionmax(db_env, berkeley_region_size); if (berkeley_max_lock) db_env->set_lk_max(db_env, berkeley_max_lock); @@ -214,18 +244,19 @@ error: } -bool berkeley_end(void) +int berkeley_end(ha_panic_function type) { - int error; + int error= 0; DBUG_ENTER("berkeley_end"); - if (!db_env) - return 1; /* purecov: tested */ - berkeley_cleanup_log_files(); - error=db_env->close(db_env,0); // Error is logged - db_env=0; - hash_free(&bdb_open_tables); - pthread_mutex_destroy(&bdb_mutex); - DBUG_RETURN(error != 0); + if (db_env) + { + berkeley_cleanup_log_files(); + error= db_env->close(db_env,0); // Error is logged + db_env= 0; + hash_free(&bdb_open_tables); + pthread_mutex_destroy(&bdb_mutex); + } + DBUG_RETURN(error); } static int berkeley_close_connection(THD *thd) @@ -258,7 +289,7 @@ static int berkeley_commit(THD *thd, bool all) DBUG_PRINT("trans",("ending transaction %s", all ? "all" : "stmt")); berkeley_trx_data *trx=(berkeley_trx_data *)thd->ha_data[berkeley_hton.slot]; DB_TXN **txn= all ? &trx->all : &trx->stmt; - int error=txn_commit(*txn,0); + int error= (*txn)->commit(*txn,0); *txn=0; #ifndef DBUG_OFF if (error) @@ -273,13 +304,13 @@ static int berkeley_rollback(THD *thd, bool all) DBUG_PRINT("trans",("aborting transaction %s", all ? "all" : "stmt")); berkeley_trx_data *trx=(berkeley_trx_data *)thd->ha_data[berkeley_hton.slot]; DB_TXN **txn= all ? &trx->all : &trx->stmt; - int error=txn_abort(*txn); + int error= (*txn)->abort(*txn); *txn=0; DBUG_RETURN(error); } -int berkeley_show_logs(Protocol *protocol) +static bool berkeley_show_logs(THD *thd, stat_print_fn *stat_print) { char **all_logs, **free_logs, **a, **f; int error=1; @@ -306,21 +337,19 @@ int berkeley_show_logs(Protocol *protocol) { for (a = all_logs, f = free_logs; *a; ++a) { - protocol->prepare_for_resend(); - protocol->store(*a, system_charset_info); - protocol->store(STRING_WITH_LEN("BDB"), system_charset_info); + const char *status; if (f && *f && strcmp(*a, *f) == 0) { - f++; - protocol->store(SHOW_LOG_STATUS_FREE, system_charset_info); + f++; + status= SHOW_LOG_STATUS_FREE; } else - protocol->store(SHOW_LOG_STATUS_INUSE, system_charset_info); - - if (protocol->write()) + status= SHOW_LOG_STATUS_INUSE; + + if (stat_print(thd, berkeley_hton.name, *a, status)) { - error=1; - goto err; + error=1; + goto err; } } } @@ -330,8 +359,19 @@ err: DBUG_RETURN(error); } +bool berkeley_show_status(THD *thd, stat_print_fn *stat_print, + enum ha_stat_type stat_type) +{ + switch (stat_type) { + case HA_ENGINE_LOGS: + return berkeley_show_logs(thd, stat_print); + default: + return FALSE; + } +} -static void berkeley_print_error(const char *db_errpfx, char *buffer) +static void berkeley_print_error(const DB_ENV *db_env, const char *db_errpfx, + const char *buffer) { sql_print_error("%s: %s",db_errpfx,buffer); /* purecov: tested */ } @@ -342,9 +382,7 @@ static void berkeley_noticecall(DB_ENV *db_env, db_notices notice) switch (notice) { case DB_NOTICE_LOGFILE_CHANGED: /* purecov: tested */ - pthread_mutex_lock(&LOCK_manager); - manager_status |= MANAGER_BERKELEY_LOG_CLEANUP; - pthread_mutex_unlock(&LOCK_manager); + mysql_manager_submit(berkeley_cleanup_log_files); pthread_cond_signal(&COND_manager); break; } @@ -623,7 +661,7 @@ int ha_berkeley::open(const char *name, int mode, uint test_if_locked) berkeley_cmp_packed_key)); if (!hidden_primary_key) file->app_private= (void*) (table->key_info + table_share->primary_key); - if ((error= txn_begin(db_env, 0, (DB_TXN**) &transaction, 0)) || + if ((error= db_env->txn_begin(db_env, NULL, (DB_TXN**) &transaction, 0)) || (error= (file->open(file, transaction, fn_format(name_buff, name, "", ha_berkeley_ext, 2 | 4), @@ -662,7 +700,8 @@ int ha_berkeley::open(const char *name, int mode, uint test_if_locked) DBUG_PRINT("bdb",("Setting DB_DUP for key %u", i)); (*ptr)->set_flags(*ptr, DB_DUP); } - if ((error= txn_begin(db_env, 0, (DB_TXN**) &transaction, 0)) || + if ((error= db_env->txn_begin(db_env, NULL, (DB_TXN**) &transaction, + 0)) || (error=((*ptr)->open(*ptr, transaction, name_buff, part, DB_BTREE, open_mode, 0))) || (error= transaction->commit(transaction, 0))) @@ -1378,7 +1417,7 @@ int ha_berkeley::delete_row(const byte * record) } -int ha_berkeley::index_init(uint keynr) +int ha_berkeley::index_init(uint keynr, bool sorted) { int error; DBUG_ENTER("ha_berkeley::index_init"); @@ -1656,7 +1695,7 @@ int ha_berkeley::rnd_init(bool scan) { DBUG_ENTER("rnd_init"); current_row.flags=DB_DBT_REALLOC; - DBUG_RETURN(index_init(primary_key)); + DBUG_RETURN(index_init(primary_key, 0)); } int ha_berkeley::rnd_end() @@ -1855,7 +1894,7 @@ int ha_berkeley::external_lock(THD *thd, int lock_type) /* We have to start a master transaction */ DBUG_PRINT("trans",("starting transaction all: options: 0x%lx", (ulong) thd->options)); - if ((error=txn_begin(db_env, 0, &trx->all, 0))) + if ((error= db_env->txn_begin(db_env, NULL, &trx->all, 0))) { trx->bdb_lock_count--; // We didn't get the lock DBUG_RETURN(error); @@ -1865,7 +1904,7 @@ int ha_berkeley::external_lock(THD *thd, int lock_type) DBUG_RETURN(0); // Don't create stmt trans } DBUG_PRINT("trans",("starting transaction stmt")); - if ((error=txn_begin(db_env, trx->all, &trx->stmt, 0))) + if ((error= db_env->txn_begin(db_env, trx->all, &trx->stmt, 0))) { /* We leave the possible master transaction open */ trx->bdb_lock_count--; // We didn't get the lock @@ -1890,7 +1929,7 @@ int ha_berkeley::external_lock(THD *thd, int lock_type) We must in this case commit the work to keep the row locks */ DBUG_PRINT("trans",("commiting non-updating transaction")); - error= txn_commit(trx->stmt,0); + error= trx->stmt->commit(trx->stmt,0); trx->stmt= transaction= 0; } } @@ -1919,7 +1958,7 @@ int ha_berkeley::start_stmt(THD *thd, thr_lock_type lock_type) if (!trx->stmt) { DBUG_PRINT("trans",("starting transaction stmt")); - error=txn_begin(db_env, trx->all, &trx->stmt, 0); + error= db_env->txn_begin(db_env, trx->all, &trx->stmt, 0); trans_register_ha(thd, FALSE, &berkeley_hton); } transaction= trx->stmt; @@ -2164,7 +2203,7 @@ ulonglong ha_berkeley::get_auto_increment() (void) ha_berkeley::extra(HA_EXTRA_KEYREAD); /* Set 'active_index' */ - ha_berkeley::index_init(table->s->next_number_index); + ha_berkeley::index_init(table->s->next_number_index, 0); if (!table->s->next_number_key_offset) { // Autoincrement at key-start @@ -2300,7 +2339,7 @@ int ha_berkeley::analyze(THD* thd, HA_CHECK_OPT* check_opt) free(stat); stat=0; } - if ((key_file[i]->stat)(key_file[i], (void*) &stat, 0)) + if ((key_file[i]->stat)(key_file[i], NULL, (void*) &stat, 0)) goto err; /* purecov: inspected */ share->rec_per_key[i]= (stat->bt_ndata / (stat->bt_nkeys ? stat->bt_nkeys : 1)); @@ -2313,7 +2352,7 @@ int ha_berkeley::analyze(THD* thd, HA_CHECK_OPT* check_opt) free(stat); stat=0; } - if ((file->stat)(file, (void*) &stat, 0)) + if ((file->stat)(file, NULL, (void*) &stat, 0)) goto err; /* purecov: inspected */ } pthread_mutex_lock(&share->mutex); @@ -2503,7 +2542,7 @@ void ha_berkeley::get_status() if (!(share->status & STATUS_PRIMARY_KEY_INIT)) { (void) extra(HA_EXTRA_KEYREAD); - index_init(primary_key); + index_init(primary_key, 0); if (!index_last(table->record[1])) share->auto_ident=uint5korr(current_ident); index_end(); @@ -2656,4 +2695,13 @@ int ha_berkeley::cmp_ref(const byte *ref1, const byte *ref2) return 0; } -#endif /* HAVE_BERKELEY_DB */ + +bool ha_berkeley::check_if_incompatible_data(HA_CREATE_INFO *info, + uint table_changes) +{ + if (table_changes < IS_EQUAL_YES) + return COMPATIBLE_DATA_NO; + return COMPATIBLE_DATA_YES; +} + + diff --git a/sql/ha_berkeley.h b/sql/ha_berkeley.h index 16e4db59c10..25e6c31cb9e 100644 --- a/sql/ha_berkeley.h +++ b/sql/ha_berkeley.h @@ -92,7 +92,7 @@ class ha_berkeley: public handler const char **bas_ext() const; ulong table_flags(void) const { return int_table_flags; } uint max_supported_keys() const { return MAX_KEY-1; } - uint extra_rec_buf_length() { return BDB_HIDDEN_PRIMARY_KEY_LENGTH; } + uint extra_rec_buf_length() const { return BDB_HIDDEN_PRIMARY_KEY_LENGTH; } ha_rows estimate_rows_upper_bound(); uint max_supported_key_length() const { return UINT_MAX32; } uint max_supported_key_part_length() const { return UINT_MAX32; } @@ -106,7 +106,7 @@ class ha_berkeley: public handler int write_row(byte * buf); int update_row(const byte * old_data, byte * new_data); int delete_row(const byte * buf); - int index_init(uint index); + int index_init(uint index, bool sorted); int index_end(); int index_read(byte * buf, const byte * key, uint key_len, enum ha_rkey_function find_flag); @@ -154,17 +154,23 @@ class ha_berkeley: public handler uint8 table_cache_type() { return HA_CACHE_TBL_TRANSACT; } bool primary_key_is_clustered() { return true; } int cmp_ref(const byte *ref1, const byte *ref2); + bool check_if_incompatible_data(HA_CREATE_INFO *info, uint table_changes); }; +extern const u_int32_t bdb_DB_TXN_NOSYNC; +extern const u_int32_t bdb_DB_RECOVER; +extern const u_int32_t bdb_DB_PRIVATE; extern bool berkeley_shared_data; extern u_int32_t berkeley_init_flags,berkeley_env_flags, berkeley_lock_type, berkeley_lock_types[]; -extern ulong berkeley_cache_size, berkeley_max_lock, berkeley_log_buffer_size; +extern ulong berkeley_max_lock, berkeley_log_buffer_size; +extern ulonglong berkeley_cache_size; +extern ulong berkeley_region_size, berkeley_cache_parts; extern char *berkeley_home, *berkeley_tmpdir, *berkeley_logdir; extern long berkeley_lock_scan_time; extern TYPELIB berkeley_lock_typelib; bool berkeley_init(void); -bool berkeley_end(void); +int berkeley_end(ha_panic_function type); bool berkeley_flush_logs(void); -int berkeley_show_logs(Protocol *protocol); +bool berkeley_show_status(THD *thd, stat_print_fn *print, enum ha_stat_type); diff --git a/sql/ha_blackhole.cc b/sql/ha_blackhole.cc index 2505919af39..3503f5bec1b 100644 --- a/sql/ha_blackhole.cc +++ b/sql/ha_blackhole.cc @@ -20,9 +20,12 @@ #endif #include "mysql_priv.h" -#ifdef HAVE_BLACKHOLE_DB #include "ha_blackhole.h" +/* Static declarations for handlerton */ + +static handler *blackhole_create_handler(TABLE *table); + /* Blackhole storage engine handlerton */ @@ -47,9 +50,25 @@ handlerton blackhole_hton= { NULL, /* create_cursor_read_view */ NULL, /* set_cursor_read_view */ NULL, /* close_cursor_read_view */ + blackhole_create_handler, /* Create a new handler */ + NULL, /* Drop a database */ + NULL, /* Panic call */ + NULL, /* Release temporary latches */ + NULL, /* Update Statistics */ + NULL, /* Start Consistent Snapshot */ + NULL, /* Flush logs */ + NULL, /* Show status */ + NULL, /* Replication Report Sent Binlog */ HTON_CAN_RECREATE }; + +static handler *blackhole_create_handler(TABLE *table) +{ + return new ha_blackhole(table); +} + + /***************************************************************************** ** BLACKHOLE tables *****************************************************************************/ @@ -227,4 +246,3 @@ int ha_blackhole::index_last(byte * buf) DBUG_RETURN(HA_ERR_END_OF_FILE); } -#endif /* HAVE_BLACKHOLE_DB */ diff --git a/sql/ha_federated.cc b/sql/ha_federated.cc index 3ab246df012..badfe3cdf62 100644 --- a/sql/ha_federated.cc +++ b/sql/ha_federated.cc @@ -351,7 +351,6 @@ #pragma implementation // gcc: Class implementation #endif -#ifdef HAVE_FEDERATED_DB #include "ha_federated.h" #include "m_string.h" @@ -363,6 +362,11 @@ pthread_mutex_t federated_mutex; // This is the mutex we use to static int federated_init= FALSE; // Variable for checking the // init state of hash +/* Static declaration for handerton */ +static handler *federated_create_handler(TABLE *table); +static int federated_commit(THD *thd, bool all); +static int federated_rollback(THD *thd, bool all); + /* Federated storage engine handlerton */ handlerton federated_hton= { @@ -377,8 +381,8 @@ handlerton federated_hton= { NULL, /* savepoint */ NULL, /* rollback to savepoint */ NULL, /* release savepoint */ - NULL, /* commit */ - NULL, /* rollback */ + federated_commit, /* commit */ + federated_rollback, /* rollback */ NULL, /* prepare */ NULL, /* recover */ NULL, /* commit_by_xid */ @@ -386,10 +390,25 @@ handlerton federated_hton= { NULL, /* create_cursor_read_view */ NULL, /* set_cursor_read_view */ NULL, /* close_cursor_read_view */ + federated_create_handler, /* Create a new handler */ + NULL, /* Drop a database */ + federated_db_end, /* Panic call */ + NULL, /* Release temporary latches */ + NULL, /* Update Statistics */ + NULL, /* Start Consistent Snapshot */ + NULL, /* Flush logs */ + NULL, /* Show status */ + NULL, /* Replication Report Sent Binlog */ HTON_ALTER_NOT_SUPPORTED }; +static handler *federated_create_handler(TABLE *table) +{ + return new ha_federated(table); +} + + /* Function we use in the creation of our hash to get key. */ static byte *federated_get_key(FEDERATED_SHARE *share, uint *length, @@ -443,7 +462,7 @@ error: FALSE OK */ -bool federated_db_end() +int federated_db_end(ha_panic_function type) { if (federated_init) { @@ -451,7 +470,7 @@ bool federated_db_end() VOID(pthread_mutex_destroy(&federated_mutex)); } federated_init= 0; - return FALSE; + return 0; } /* @@ -628,8 +647,8 @@ static int parse_url(FEDERATED_SHARE *share, TABLE *table, share->port= 0; share->socket= 0; - DBUG_PRINT("info", ("Length %d \n", table->s->connect_string.length)); - DBUG_PRINT("info", ("String %.*s \n", table->s->connect_string.length, + DBUG_PRINT("info", ("Length: %d", table->s->connect_string.length)); + DBUG_PRINT("info", ("String: '%.*s'", table->s->connect_string.length, table->s->connect_string.str)); share->scheme= my_strdup_with_length((const byte*)table->s-> connect_string.str, @@ -721,7 +740,7 @@ static int parse_url(FEDERATED_SHARE *share, TABLE *table, DBUG_PRINT("info", ("scheme %s username %s password %s \ - hostname %s port %d database %s tablename %s\n", + hostname %s port %d database %s tablename %s", share->scheme, share->username, share->password, share->hostname, share->port, share->database, share->table_name)); @@ -741,7 +760,9 @@ ha_federated::ha_federated(TABLE *table_arg) :handler(&federated_hton, table_arg), mysql(0), stored_result(0), scan_flag(0), ref_length(sizeof(MYSQL_ROW_OFFSET)), current_position(0) -{} +{ + trx_next= 0; +} /* @@ -1469,6 +1490,7 @@ int ha_federated::open(const char *name, int mode, uint test_if_locked) with transactions */ mysql->reconnect= 1; + DBUG_RETURN(0); } @@ -1558,9 +1580,6 @@ inline uint field_in_record_is_null(TABLE *table, int ha_federated::write_row(byte *buf) { bool has_fields= FALSE; - uint all_fields_have_same_query_id= 1; - ulong current_query_id= 1; - ulong tmp_query_id= 1; char insert_buffer[FEDERATED_QUERY_BUFFER_SIZE]; char values_buffer[FEDERATED_QUERY_BUFFER_SIZE]; char insert_field_value_buffer[STRING_BUFFER_USUAL_SIZE]; @@ -1588,14 +1607,6 @@ int ha_federated::write_row(byte *buf) table->timestamp_field->set_time(); /* - get the current query id - the fields that we add to the insert - statement to send to the foreign will not be appended unless they match - this query id - */ - current_query_id= table->in_use->query_id; - DBUG_PRINT("info", ("current query id %d", current_query_id)); - - /* start both our field and field values strings */ insert_string.append(FEDERATED_INSERT); @@ -1608,21 +1619,8 @@ int ha_federated::write_row(byte *buf) values_string.append(FEDERATED_OPENPAREN); /* - Even if one field is different, all_fields_same_query_id can't remain - 0 if it remains 0, then that means no fields were specified in the query - such as in the case of INSERT INTO table VALUES (val1, val2, valN) - - */ - for (field= table->field; *field; field++) - { - if (field > table->field && tmp_query_id != (*field)->query_id) - all_fields_have_same_query_id= 0; - - tmp_query_id= (*field)->query_id; - } - /* loop through the field pointer array, add any fields to both the values - list and the fields list that match the current query id + list and the fields list that is part of the write set You might ask "Why an index variable (has_fields) ?" My answer is that we need to count how many fields we actually need @@ -1630,8 +1628,7 @@ int ha_federated::write_row(byte *buf) for (field= table->field; *field; field++) { /* if there is a query id and if it's equal to the current query id */ - if (((*field)->query_id && (*field)->query_id == current_query_id) - || all_fields_have_same_query_id) + if (ha_get_bit_in_write_set((*field)->fieldnr)) { /* There are some fields. This will be used later to determine @@ -2069,7 +2066,7 @@ error: } /* Initialized at each key walk (called multiple times unlike rnd_init()) */ -int ha_federated::index_init(uint keynr) +int ha_federated::index_init(uint keynr, bool sorted) { DBUG_ENTER("ha_federated::index_init"); DBUG_PRINT("info", @@ -2639,4 +2636,151 @@ bool ha_federated::get_error_message(int error, String* buf) DBUG_RETURN(FALSE); } -#endif /* HAVE_FEDERATED_DB */ +int ha_federated::external_lock(THD *thd, int lock_type) +{ + int error= 0; + ha_federated *trx= (ha_federated *)thd->ha_data[federated_hton.slot]; + DBUG_ENTER("ha_federated::external_lock"); + + if (lock_type != F_UNLCK) + { + DBUG_PRINT("info",("federated not lock F_UNLCK")); + if (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) + { + DBUG_PRINT("info",("federated autocommit")); + /* + This means we are doing an autocommit + */ + error= connection_autocommit(TRUE); + if (error) + { + DBUG_PRINT("info", ("error setting autocommit TRUE: %d", error)); + DBUG_RETURN(error); + } + trans_register_ha(thd, FALSE, &federated_hton); + } + else + { + DBUG_PRINT("info",("not autocommit")); + if (!trx) + { + /* + This is where a transaction gets its start + */ + error= connection_autocommit(FALSE); + if (error) + { + DBUG_PRINT("info", ("error setting autocommit FALSE: %d", error)); + DBUG_RETURN(error); + } + thd->ha_data[federated_hton.slot]= this; + trans_register_ha(thd, TRUE, &federated_hton); + /* + Send a lock table to the remote end. + We do not support this at the moment + */ + if (thd->options & (OPTION_TABLE_LOCK)) + { + DBUG_PRINT("info", ("We do not support lock table yet")); + } + } + else + { + ha_federated *ptr; + for (ptr= trx; ptr; ptr= ptr->trx_next) + if (ptr == this) + break; + else if (!ptr->trx_next) + ptr->trx_next= this; + } + } + } + DBUG_RETURN(0); +} + + +static int federated_commit(THD *thd, bool all) +{ + int return_val= 0; + ha_federated *trx= (ha_federated *)thd->ha_data[federated_hton.slot]; + DBUG_ENTER("federated_commit"); + + if (all) + { + int error= 0; + ha_federated *ptr, *old= NULL; + for (ptr= trx; ptr; old= ptr, ptr= ptr->trx_next) + { + if (old) + old->trx_next= NULL; + error= ptr->connection_commit(); + if (error && !return_val); + return_val= error; + } + thd->ha_data[federated_hton.slot]= NULL; + } + + DBUG_PRINT("info", ("error val: %d", return_val)); + DBUG_RETURN(return_val); +} + + +static int federated_rollback(THD *thd, bool all) +{ + int return_val= 0; + ha_federated *trx= (ha_federated *)thd->ha_data[federated_hton.slot]; + DBUG_ENTER("federated_rollback"); + + if (all) + { + int error= 0; + ha_federated *ptr, *old= NULL; + for (ptr= trx; ptr; old= ptr, ptr= ptr->trx_next) + { + if (old) + old->trx_next= NULL; + error= ptr->connection_rollback(); + if (error && !return_val) + return_val= error; + } + thd->ha_data[federated_hton.slot]= NULL; + } + + DBUG_PRINT("info", ("error val: %d", return_val)); + DBUG_RETURN(return_val); +} + +int ha_federated::connection_commit() +{ + DBUG_ENTER("ha_federated::connection_commit"); + DBUG_RETURN(execute_simple_query("COMMIT", 6)); +} + + +int ha_federated::connection_rollback() +{ + DBUG_ENTER("ha_federated::connection_rollback"); + DBUG_RETURN(execute_simple_query("ROLLBACK", 8)); +} + + +int ha_federated::connection_autocommit(bool state) +{ + const char *text; + DBUG_ENTER("ha_federated::connection_autocommit"); + text= (state == true) ? "SET AUTOCOMMIT=1" : "SET AUTOCOMMIT=0"; + DBUG_RETURN(execute_simple_query(text, 16)); +} + + +int ha_federated::execute_simple_query(const char *query, int len) +{ + DBUG_ENTER("ha_federated::execute_simple_query"); + + if (mysql_real_query(mysql, query, len)) + { + DBUG_RETURN(stash_remote_error()); + } + DBUG_RETURN(0); +} + diff --git a/sql/ha_federated.h b/sql/ha_federated.h index b25071dda16..4cca27b3900 100644 --- a/sql/ha_federated.h +++ b/sql/ha_federated.h @@ -174,12 +174,14 @@ private: public: ha_federated(TABLE *table_arg); - ~ha_federated() - { - } + ~ha_federated() {} /* The name that will be used for display purposes */ const char *table_type() const { return "FEDERATED"; } /* + Next pointer used in transaction + */ + ha_federated *trx_next; + /* The name of the index type that will be used for display don't implement this method unless you really have indexes */ @@ -259,7 +261,7 @@ public: int write_row(byte *buf); int update_row(const byte *old_data, byte *new_data); int delete_row(const byte *buf); - int index_init(uint keynr); + int index_init(uint keynr, bool sorted); int index_read(byte *buf, const byte *key, uint key_len, enum ha_rkey_function find_flag); int index_read_idx(byte *buf, uint idx, const byte *key, @@ -298,7 +300,14 @@ public: THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to, enum thr_lock_type lock_type); //required virtual bool get_error_message(int error, String *buf); + int external_lock(THD *thd, int lock_type); + int connection_commit(); + int connection_rollback(); + bool has_transactions() { return 1; } + int connection_autocommit(bool state); + int execute_simple_query(const char *query, int len); }; bool federated_db_init(void); -bool federated_db_end(void); +int federated_db_end(ha_panic_function type); + diff --git a/sql/ha_heap.cc b/sql/ha_heap.cc index 98cc96db707..0b91a57b889 100644 --- a/sql/ha_heap.cc +++ b/sql/ha_heap.cc @@ -23,6 +23,9 @@ #include <myisampack.h> #include "ha_heap.h" + +static handler *heap_create_handler(TABLE *table); + handlerton heap_hton= { "MEMORY", SHOW_OPTION_YES, @@ -44,9 +47,24 @@ handlerton heap_hton= { NULL, /* create_cursor_read_view */ NULL, /* set_cursor_read_view */ NULL, /* close_cursor_read_view */ + heap_create_handler, /* Create a new handler */ + NULL, /* Drop a database */ + heap_panic, /* Panic call */ + NULL, /* Release temporary latches */ + NULL, /* Update Statistics */ + NULL, /* Start Consistent Snapshot */ + NULL, /* Flush logs */ + NULL, /* Show status */ + NULL, /* Replication Report Sent Binlog */ HTON_CAN_RECREATE }; +static handler *heap_create_handler(TABLE *table) +{ + return new ha_heap(table); +} + + /***************************************************************************** ** HEAP tables *****************************************************************************/ @@ -477,6 +495,14 @@ int ha_heap::delete_table(const char *name) return error == ENOENT ? 0 : error; } + +void ha_heap::drop_table(const char *name) +{ + heap_drop_table(file); + close(); +} + + int ha_heap::rename_table(const char * from, const char * to) { return heap_rename(from,to); @@ -633,3 +659,15 @@ ulonglong ha_heap::get_auto_increment() ha_heap::info(HA_STATUS_AUTO); return auto_increment_value; } + + +bool ha_heap::check_if_incompatible_data(HA_CREATE_INFO *info, + uint table_changes) +{ + /* Check that auto_increment value was not changed */ + if ((table_changes != IS_EQUAL_YES && + info->used_fields & HA_CREATE_USED_AUTO) && + info->auto_increment_value != 0) + return COMPATIBLE_DATA_NO; + return COMPATIBLE_DATA_YES; +} diff --git a/sql/ha_heap.h b/sql/ha_heap.h index 7c4227e952c..9b93936b573 100644 --- a/sql/ha_heap.h +++ b/sql/ha_heap.h @@ -40,8 +40,8 @@ public: } const char *index_type(uint inx) { - return ((table->key_info[inx].algorithm == HA_KEY_ALG_BTREE) ? "BTREE" : - "HASH"); + return ((table->key_info[inx].algorithm == HA_KEY_ALG_BTREE) ? + "BTREE" : "HASH"); } /* Rows also use a fixed-size format */ enum row_type get_row_type() const { return ROW_TYPE_FIXED; } @@ -94,6 +94,7 @@ public: int indexes_are_disabled(void); ha_rows records_in_range(uint inx, key_range *min_key, key_range *max_key); int delete_table(const char *from); + void drop_table(const char *name); int rename_table(const char * from, const char * to); int create(const char *name, TABLE *form, HA_CREATE_INFO *create_info); void update_create_info(HA_CREATE_INFO *create_info); @@ -106,6 +107,7 @@ public: HEAP_PTR ptr2=*(HEAP_PTR*)ref2; return ptr1 < ptr2? -1 : (ptr1 > ptr2? 1 : 0); } + bool check_if_incompatible_data(HA_CREATE_INFO *info, uint table_changes); private: void update_key_stats(); }; diff --git a/sql/ha_innodb.cc b/sql/ha_innodb.cc index 56e5fd8923f..dae018e71f1 100644 --- a/sql/ha_innodb.cc +++ b/sql/ha_innodb.cc @@ -34,7 +34,6 @@ have disables the InnoDB inlining in this file. */ #include "mysql_priv.h" #include "slave.h" -#ifdef HAVE_INNOBASE_DB #include <m_ctype.h> #include <hash.h> #include <myisampack.h> @@ -111,28 +110,28 @@ typedef byte mysql_byte; /* Include necessary InnoDB headers */ extern "C" { -#include "../innobase/include/univ.i" -#include "../innobase/include/os0file.h" -#include "../innobase/include/os0thread.h" -#include "../innobase/include/srv0start.h" -#include "../innobase/include/srv0srv.h" -#include "../innobase/include/trx0roll.h" -#include "../innobase/include/trx0trx.h" -#include "../innobase/include/trx0sys.h" -#include "../innobase/include/mtr0mtr.h" -#include "../innobase/include/row0ins.h" -#include "../innobase/include/row0mysql.h" -#include "../innobase/include/row0sel.h" -#include "../innobase/include/row0upd.h" -#include "../innobase/include/log0log.h" -#include "../innobase/include/lock0lock.h" -#include "../innobase/include/dict0crea.h" -#include "../innobase/include/btr0cur.h" -#include "../innobase/include/btr0btr.h" -#include "../innobase/include/fsp0fsp.h" -#include "../innobase/include/sync0sync.h" -#include "../innobase/include/fil0fil.h" -#include "../innobase/include/trx0xa.h" +#include "../storage/innobase/include/univ.i" +#include "../storage/innobase/include/os0file.h" +#include "../storage/innobase/include/os0thread.h" +#include "../storage/innobase/include/srv0start.h" +#include "../storage/innobase/include/srv0srv.h" +#include "../storage/innobase/include/trx0roll.h" +#include "../storage/innobase/include/trx0trx.h" +#include "../storage/innobase/include/trx0sys.h" +#include "../storage/innobase/include/mtr0mtr.h" +#include "../storage/innobase/include/row0ins.h" +#include "../storage/innobase/include/row0mysql.h" +#include "../storage/innobase/include/row0sel.h" +#include "../storage/innobase/include/row0upd.h" +#include "../storage/innobase/include/log0log.h" +#include "../storage/innobase/include/lock0lock.h" +#include "../storage/innobase/include/dict0crea.h" +#include "../storage/innobase/include/btr0cur.h" +#include "../storage/innobase/include/btr0btr.h" +#include "../storage/innobase/include/fsp0fsp.h" +#include "../storage/innobase/include/sync0sync.h" +#include "../storage/innobase/include/fil0fil.h" +#include "../storage/innobase/include/trx0xa.h" } #define HA_INNOBASE_ROWS_IN_TABLE 10000 /* to get optimization right */ @@ -206,6 +205,7 @@ static int innobase_rollback(THD* thd, bool all); static int innobase_rollback_to_savepoint(THD* thd, void *savepoint); static int innobase_savepoint(THD* thd, void *savepoint); static int innobase_release_savepoint(THD* thd, void *savepoint); +static handler *innobase_create_handler(TABLE *table); handlerton innobase_hton = { "InnoDB", @@ -228,9 +228,29 @@ handlerton innobase_hton = { innobase_create_cursor_view, innobase_set_cursor_view, innobase_close_cursor_view, + innobase_create_handler, /* Create a new handler */ + innobase_drop_database, /* Drop a database */ + innobase_end, /* Panic call */ + innobase_release_temporary_latches, /* Release temporary latches */ + innodb_export_status, /* Update Statistics */ + innobase_start_trx_and_assign_read_view, /* Start Consistent Snapshot */ + innobase_flush_logs, /* Flush logs */ + innobase_show_status, /* Show status */ +#ifdef HAVE_REPLICATION + innobase_repl_report_sent_binlog, /* Replication Report Sent Binlog */ +#else + NULL, +#endif HTON_NO_FLAGS }; + +static handler *innobase_create_handler(TABLE *table) +{ + return new ha_innobase(table); +} + + /********************************************************************* Commits a transaction in an InnoDB database. */ @@ -391,7 +411,7 @@ Call this function when mysqld passes control to the client. That is to avoid deadlocks on the adaptive hash S-latch possibly held by thd. For more documentation, see handler.cc. */ -void +int innobase_release_temporary_latches( /*===============================*/ THD *thd) @@ -400,7 +420,7 @@ innobase_release_temporary_latches( if (!innodb_inited) { - return; + return 0; } trx = (trx_t*) thd->ha_data[innobase_hton.slot]; @@ -408,6 +428,7 @@ innobase_release_temporary_latches( if (trx) { innobase_release_stat_resources(trx); } + return 0; } /************************************************************************ @@ -1450,8 +1471,8 @@ error: /*********************************************************************** Closes an InnoDB database. */ -bool -innobase_end(void) +int +innobase_end(ha_panic_function type) /*==============*/ /* out: TRUE if error */ { @@ -3177,7 +3198,8 @@ build_template( goto include_field; } - if (thd->query_id == field->query_id) { + if (table->file->ha_get_bit_in_read_set(i+1) || + table->file->ha_get_bit_in_write_set(i+1)) { /* This field is needed in the query */ goto include_field; @@ -3797,7 +3819,8 @@ int ha_innobase::index_init( /*====================*/ /* out: 0 or error number */ - uint keynr) /* in: key (index) number */ + uint keynr, /* in: key (index) number */ + bool sorted) /* in: 1 if result MUST be sorted according to index */ { int error = 0; DBUG_ENTER("index_init"); @@ -5071,7 +5094,7 @@ ha_innobase::delete_table( /********************************************************************* Removes all tables in the named database inside InnoDB. */ -int +void innobase_drop_database( /*===================*/ /* out: error number */ @@ -5137,10 +5160,13 @@ innobase_drop_database( innobase_commit_low(trx); trx_free_for_mysql(trx); - +#ifdef NO_LONGER_INTERESTED_IN_DROP_DB_ERROR error = convert_error_code_to_mysql(error, NULL); return(error); +#else + return; +#endif } /************************************************************************* @@ -6445,11 +6471,12 @@ ha_innobase::transactional_table_lock( /**************************************************************************** Here we export InnoDB status variables to MySQL. */ -void +int innodb_export_status(void) /*======================*/ { srv_export_innodb_status(); + return 0; } /**************************************************************************** @@ -6459,9 +6486,9 @@ Monitor to the client. */ bool innodb_show_status( /*===============*/ - THD* thd) /* in: the MySQL query thread of the caller */ + THD* thd, /* in: the MySQL query thread of the caller */ + stat_print_fn *stat_print) { - Protocol* protocol = thd->protocol; trx_t* trx; static const char truncated_msg[] = "... truncated...\n"; const long MAX_STATUS_SIZE = 64000; @@ -6471,10 +6498,7 @@ innodb_show_status( DBUG_ENTER("innodb_show_status"); if (have_innodb != SHOW_OPTION_YES) { - my_message(ER_NOT_SUPPORTED_YET, - "Cannot call SHOW INNODB STATUS because skip-innodb is defined", - MYF(0)); - DBUG_RETURN(TRUE); + DBUG_RETURN(FALSE); } trx = check_trx_exists(thd); @@ -6536,28 +6560,14 @@ innodb_show_status( mutex_exit_noninline(&srv_monitor_file_mutex); - List<Item> field_list; - - field_list.push_back(new Item_empty_string("Status", flen)); - - if (protocol->send_fields(&field_list, Protocol::SEND_NUM_ROWS | - Protocol::SEND_EOF)) { - my_free(str, MYF(0)); - - DBUG_RETURN(TRUE); - } + bool result = FALSE; - protocol->prepare_for_resend(); - protocol->store(str, flen, system_charset_info); - my_free(str, MYF(0)); - - if (protocol->write()) { - - DBUG_RETURN(TRUE); + if (stat_print(thd, innobase_hton.name, "", str)) { + result= TRUE; } - send_eof(thd); + my_free(str, MYF(0)); - DBUG_RETURN(FALSE); + DBUG_RETURN(FALSE); } /**************************************************************************** @@ -6566,10 +6576,10 @@ Implements the SHOW MUTEX STATUS command. . */ bool innodb_mutex_show_status( /*===============*/ - THD* thd) /* in: the MySQL query thread of the caller */ + THD* thd, /* in: the MySQL query thread of the caller */ + stat_print_fn *stat_print) { - Protocol *protocol= thd->protocol; - List<Item> field_list; + char buf1[IO_SIZE], buf2[IO_SIZE]; mutex_t* mutex; ulint rw_lock_count= 0; ulint rw_lock_count_spin_loop= 0; @@ -6579,19 +6589,6 @@ innodb_mutex_show_status( ulonglong rw_lock_wait_time= 0; DBUG_ENTER("innodb_mutex_show_status"); - field_list.push_back(new Item_empty_string("Mutex", FN_REFLEN)); - field_list.push_back(new Item_empty_string("Module", FN_REFLEN)); - field_list.push_back(new Item_uint("Count", 21)); - field_list.push_back(new Item_uint("Spin_waits", 21)); - field_list.push_back(new Item_uint("Spin_rounds", 21)); - field_list.push_back(new Item_uint("OS_waits", 21)); - field_list.push_back(new Item_uint("OS_yields", 21)); - field_list.push_back(new Item_uint("OS_waits_time", 21)); - - if (protocol->send_fields(&field_list, - Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF)) - DBUG_RETURN(TRUE); - #ifdef MUTEX_PROTECT_TO_BE_ADDED_LATER mutex_enter(&mutex_list_mutex); #endif @@ -6604,17 +6601,16 @@ innodb_mutex_show_status( { if (mutex->count_using > 0) { - protocol->prepare_for_resend(); - protocol->store(mutex->cmutex_name, system_charset_info); - protocol->store(mutex->cfile_name, system_charset_info); - protocol->store((ulonglong)mutex->count_using); - protocol->store((ulonglong)mutex->count_spin_loop); - protocol->store((ulonglong)mutex->count_spin_rounds); - protocol->store((ulonglong)mutex->count_os_wait); - protocol->store((ulonglong)mutex->count_os_yield); - protocol->store((ulonglong)mutex->lspent_time/1000); - - if (protocol->write()) + my_snprintf(buf1, sizeof(buf1), "%s:%s", + mutex->cmutex_name, mutex->cfile_name); + my_snprintf(buf2, sizeof(buf2), + "count=%lu, spin_waits=%lu, spin_rounds=%lu, " + "os_waits=%lu, os_yields=%lu, os_wait_times=%lu", + mutex->count_using, mutex->count_spin_loop, + mutex->count_spin_rounds, + mutex->count_os_wait, mutex->count_os_yield, + mutex->lspent_time/1000); + if (stat_print(thd, innobase_hton.name, buf1, buf2)) { #ifdef MUTEX_PROTECT_TO_BE_ADDED_LATER mutex_exit(&mutex_list_mutex); @@ -6636,17 +6632,15 @@ innodb_mutex_show_status( mutex = UT_LIST_GET_NEXT(list, mutex); } - protocol->prepare_for_resend(); - protocol->store("rw_lock_mutexes", system_charset_info); - protocol->store("", system_charset_info); - protocol->store((ulonglong)rw_lock_count); - protocol->store((ulonglong)rw_lock_count_spin_loop); - protocol->store((ulonglong)rw_lock_count_spin_rounds); - protocol->store((ulonglong)rw_lock_count_os_wait); - protocol->store((ulonglong)rw_lock_count_os_yield); - protocol->store((ulonglong)rw_lock_wait_time/1000); - - if (protocol->write()) + my_snprintf(buf2, sizeof(buf2), + "count=%lu, spin_waits=%lu, spin_rounds=%lu, " + "os_waits=%lu, os_yields=%lu, os_wait_times=%lu", + rw_lock_count, rw_lock_count_spin_loop, + rw_lock_count_spin_rounds, + rw_lock_count_os_wait, rw_lock_count_os_yield, + rw_lock_wait_time/1000); + + if (stat_print(thd, innobase_hton.name, "rw_lock_mutexes", buf2)) { DBUG_RETURN(1); } @@ -6654,10 +6648,23 @@ innodb_mutex_show_status( #ifdef MUTEX_PROTECT_TO_BE_ADDED_LATER mutex_exit(&mutex_list_mutex); #endif - send_eof(thd); DBUG_RETURN(FALSE); } +bool innobase_show_status(THD* thd, stat_print_fn* stat_print, + enum ha_stat_type stat_type) +{ + switch (stat_type) { + case HA_ENGINE_STATUS: + return innodb_show_status(thd, stat_print); + case HA_ENGINE_MUTEX: + return innodb_mutex_show_status(thd, stat_print); + default: + return FALSE; + } +} + + /**************************************************************************** Handling the shared INNOBASE_SHARE structure that is needed to provide table locking. @@ -6926,7 +6933,7 @@ ha_innobase::innobase_read_and_init_auto_inc( } (void) extra(HA_EXTRA_KEYREAD); - index_init(table->s->next_number_index); + index_init(table->s->next_number_index, 1); /* Starting from 5.0.9, we use a consistent read to read the auto-inc column maximum value. This eliminates the spurious deadlocks caused @@ -7470,4 +7477,23 @@ innobase_set_cursor_view( (cursor_view_t*) curview); } -#endif /* HAVE_INNOBASE_DB */ + +bool ha_innobase::check_if_incompatible_data(HA_CREATE_INFO *info, + uint table_changes) +{ + if (table_changes != IS_EQUAL_YES) + return COMPATIBLE_DATA_NO; + + /* Check that auto_increment value was not changed */ + if ((info->used_fields & HA_CREATE_USED_AUTO) && + info->auto_increment_value != 0) + return COMPATIBLE_DATA_NO; + + /* Check that row format didn't change */ + if ((info->used_fields & HA_CREATE_USED_AUTO) && + get_row_type() != info->row_type) + return COMPATIBLE_DATA_NO; + + return COMPATIBLE_DATA_YES; +} + diff --git a/sql/ha_innodb.h b/sql/ha_innodb.h index eb4e10e545f..c249cdf2bf4 100644 --- a/sql/ha_innodb.h +++ b/sql/ha_innodb.h @@ -124,7 +124,7 @@ class ha_innobase: public handler int delete_row(const byte * buf); void unlock_row(); - int index_init(uint index); + int index_init(uint index, bool sorted); int index_end(); int index_read(byte * buf, const byte * key, uint key_len, enum ha_rkey_function find_flag); @@ -152,6 +152,16 @@ class ha_innobase: public handler int transactional_table_lock(THD *thd, int lock_type); int start_stmt(THD *thd, thr_lock_type lock_type); + int ha_retrieve_all_cols() + { + ha_set_all_bits_in_read_set(); + return extra(HA_EXTRA_RETRIEVE_ALL_COLS); + } + int ha_retrieve_all_pk() + { + ha_set_primary_key_in_read_set(); + return extra(HA_EXTRA_RETRIEVE_PRIMARY_KEY); + } void position(byte *record); ha_rows records_in_range(uint inx, key_range *min_key, key_range *max_key); @@ -196,6 +206,8 @@ class ha_innobase: public handler static ulonglong get_mysql_bin_log_pos(); bool primary_key_is_clustered() { return true; } int cmp_ref(const byte *ref1, const byte *ref2); + bool check_if_incompatible_data(HA_CREATE_INFO *info, + uint table_changes); }; extern struct show_var_st innodb_status_variables[]; @@ -243,7 +255,7 @@ extern ulong srv_commit_concurrency; extern TYPELIB innobase_lock_typelib; bool innobase_init(void); -bool innobase_end(void); +int innobase_end(ha_panic_function type); bool innobase_flush_logs(void); uint innobase_get_free_space(void); @@ -261,12 +273,11 @@ int innobase_commit_complete(void* trx_handle); void innobase_store_binlog_offset_and_flush_log(char *binlog_name,longlong offset); #endif -int innobase_drop_database(char *path); -bool innodb_show_status(THD* thd); -bool innodb_mutex_show_status(THD* thd); -void innodb_export_status(void); +void innobase_drop_database(char *path); +bool innobase_show_status(THD* thd, stat_print_fn*, enum ha_stat_type); +int innodb_export_status(void); -void innobase_release_temporary_latches(THD *thd); +int innobase_release_temporary_latches(THD *thd); void innobase_store_binlog_offset_and_flush_log(char *binlog_name,longlong offset); diff --git a/sql/ha_myisam.cc b/sql/ha_myisam.cc index 08fd2d9a8e3..f03434c74e6 100644 --- a/sql/ha_myisam.cc +++ b/sql/ha_myisam.cc @@ -27,8 +27,8 @@ #ifndef MASTER #include "../srclib/myisam/myisamdef.h" #else -#include "../myisam/myisamdef.h" -#include "../myisam/rt_index.h" +#include "../storage/myisam/myisamdef.h" +#include "../storage/myisam/rt_index.h" #endif ulong myisam_recover_options= HA_RECOVER_NONE; @@ -50,6 +50,8 @@ TYPELIB myisam_stats_method_typelib= { ** MyISAM tables *****************************************************************************/ +static handler *myisam_create_handler(TABLE *table); + /* MyISAM handlerton */ handlerton myisam_hton= { @@ -77,9 +79,25 @@ handlerton myisam_hton= { MyISAM doesn't support transactions and doesn't have transaction-dependent context: cursors can survive a commit. */ + myisam_create_handler, /* Create a new handler */ + NULL, /* Drop a database */ + mi_panic,/* Panic call */ + NULL, /* Release temporary latches */ + NULL, /* Update Statistics */ + NULL, /* Start Consistent Snapshot */ + NULL, /* Flush logs */ + NULL, /* Show status */ + NULL, /* Replication Report Sent Binlog */ HTON_CAN_RECREATE }; + +static handler *myisam_create_handler(TABLE *table) +{ + return new ha_myisam(table); +} + + // collect errors printed by mi_check routines static void mi_check_print_msg(MI_CHECK *param, const char* msg_type, @@ -280,6 +298,7 @@ err: int ha_myisam::open(const char *name, int mode, uint test_if_locked) { + uint i; if (!(file=mi_open(name, mode, test_if_locked))) return (my_errno ? my_errno : -1); @@ -292,6 +311,14 @@ int ha_myisam::open(const char *name, int mode, uint test_if_locked) int_table_flags|=HA_REC_NOT_IN_SEQ; if (file->s->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD)) int_table_flags|=HA_HAS_CHECKSUM; + + for (i= 0; i < table->s->keys; i++) + { + struct st_plugin_int *parser= table->key_info[i].parser; + if (table->key_info[i].flags & HA_USES_PARSER) + file->s->keyinfo[i].parser= + (struct st_mysql_ftparser *)parser->plugin->info; + } return (0); } @@ -1715,3 +1742,25 @@ uint ha_myisam::checksum() const return (uint)file->state->checksum; } + +bool ha_myisam::check_if_incompatible_data(HA_CREATE_INFO *info, + uint table_changes) +{ + uint options= table->s->db_options_in_use; + + if (info->auto_increment_value != auto_increment_value || + info->raid_type != raid_type || + info->raid_chunks != raid_chunks || + info->raid_chunksize != raid_chunksize || + info->data_file_name != data_file_name || + info->index_file_name != index_file_name || + table_changes == IS_EQUAL_NO) + return COMPATIBLE_DATA_NO; + + if ((options & (HA_OPTION_PACK_RECORD | HA_OPTION_CHECKSUM | + HA_OPTION_DELAY_KEY_WRITE)) != + (info->table_options & (HA_OPTION_PACK_RECORD | HA_OPTION_CHECKSUM | + HA_OPTION_DELAY_KEY_WRITE))) + return COMPATIBLE_DATA_NO; + return COMPATIBLE_DATA_YES; +} diff --git a/sql/ha_myisam.h b/sql/ha_myisam.h index ca684463311..79036893faf 100644 --- a/sql/ha_myisam.h +++ b/sql/ha_myisam.h @@ -123,6 +123,7 @@ class ha_myisam: public handler int backup(THD* thd, HA_CHECK_OPT* check_opt); int assign_to_keycache(THD* thd, HA_CHECK_OPT* check_opt); int preload_keys(THD* thd, HA_CHECK_OPT* check_opt); + bool check_if_incompatible_data(HA_CREATE_INFO *info, uint table_changes); #ifdef HAVE_REPLICATION int dump(THD* thd, int fd); int net_read_dump(NET* net); diff --git a/sql/ha_myisammrg.cc b/sql/ha_myisammrg.cc index da4136def68..abf0f1d2ed5 100644 --- a/sql/ha_myisammrg.cc +++ b/sql/ha_myisammrg.cc @@ -25,13 +25,15 @@ #ifndef MASTER #include "../srclib/myisammrg/myrg_def.h" #else -#include "../myisammrg/myrg_def.h" +#include "../storage/myisammrg/myrg_def.h" #endif /***************************************************************************** ** MyISAM MERGE tables *****************************************************************************/ +static handler *myisammrg_create_handler(TABLE *table); + /* MyISAM MERGE handlerton */ handlerton myisammrg_hton= { @@ -55,9 +57,23 @@ handlerton myisammrg_hton= { NULL, /* create_cursor_read_view */ NULL, /* set_cursor_read_view */ NULL, /* close_cursor_read_view */ + myisammrg_create_handler, /* Create a new handler */ + NULL, /* Drop a database */ + myrg_panic, /* Panic call */ + NULL, /* Release temporary latches */ + NULL, /* Update Statistics */ + NULL, /* Start Consistent Snapshot */ + NULL, /* Flush logs */ + NULL, /* Show status */ + NULL, /* Replication Report Sent Binlog */ HTON_CAN_RECREATE }; +static handler *myisammrg_create_handler(TABLE *table) +{ + return new ha_myisammrg(table); +} + ha_myisammrg::ha_myisammrg(TABLE *table_arg) :handler(&myisammrg_hton, table_arg), file(0) @@ -519,3 +535,14 @@ void ha_myisammrg::append_create_info(String *packet) } packet->append(')'); } + + +bool ha_myisammrg::check_if_incompatible_data(HA_CREATE_INFO *info, + uint table_changes) +{ + /* + For myisammrg, we should always re-generate the mapping file as this + is trivial to do + */ + return COMPATIBLE_DATA_NO; +} diff --git a/sql/ha_myisammrg.h b/sql/ha_myisammrg.h index c762b7c286e..45cf93a7e63 100644 --- a/sql/ha_myisammrg.h +++ b/sql/ha_myisammrg.h @@ -82,4 +82,5 @@ class ha_myisammrg: public handler void update_create_info(HA_CREATE_INFO *create_info); void append_create_info(String *packet); MYRG_INFO *myrg_info() { return file; } + bool check_if_incompatible_data(HA_CREATE_INFO *info, uint table_changes); }; diff --git a/sql/ha_ndbcluster.cc b/sql/ha_ndbcluster.cc index cdd406d473c..abb12163f1c 100644 --- a/sql/ha_ndbcluster.cc +++ b/sql/ha_ndbcluster.cc @@ -26,28 +26,37 @@ #include "mysql_priv.h" -#ifdef HAVE_NDBCLUSTER_DB #include <my_dir.h> #include "ha_ndbcluster.h" #include <ndbapi/NdbApi.hpp> #include <ndbapi/NdbScanFilter.hpp> +#include <../util/Bitmask.hpp> +#include <ndbapi/NdbIndexStat.hpp> // options from from mysqld.cc extern my_bool opt_ndb_optimized_node_selection; extern const char *opt_ndbcluster_connectstring; +const char *ndb_distribution_names[]= {"KEYHASH", "LINHASH", NullS}; +TYPELIB ndb_distribution_typelib= { array_elements(ndb_distribution_names)-1, + "", ndb_distribution_names, NULL }; +const char *opt_ndb_distribution= ndb_distribution_names[ND_KEYHASH]; +enum ndb_distribution opt_ndb_distribution_id= ND_KEYHASH; + // Default value for parallelism static const int parallelism= 0; // Default value for max number of transactions // createable against NDB from this handler -static const int max_transactions= 2; +static const int max_transactions= 3; // should really be 2 but there is a transaction to much allocated when loch table is used static const char *ha_ndb_ext=".ndb"; +static const char share_prefix[]= "./"; static int ndbcluster_close_connection(THD *thd); static int ndbcluster_commit(THD *thd, bool all); static int ndbcluster_rollback(THD *thd, bool all); +static handler* ndbcluster_create_handler(TABLE *table); handlerton ndbcluster_hton = { "ndbcluster", @@ -70,9 +79,23 @@ handlerton ndbcluster_hton = { NULL, /* create_cursor_read_view */ NULL, /* set_cursor_read_view */ NULL, /* close_cursor_read_view */ + ndbcluster_create_handler, /* Create a new handler */ + ndbcluster_drop_database, /* Drop a database */ + ndbcluster_end, /* Panic call */ + NULL, /* Release temporary latches */ + NULL, /* Update Statistics */ + NULL, /* Start Consistent Snapshot */ + NULL, /* Flush logs */ + ndbcluster_show_status, /* Show status */ + NULL, /* Replication Report Sent Binlog */ HTON_NO_FLAGS }; +static handler *ndbcluster_create_handler(TABLE *table) +{ + return new ha_ndbcluster(table); +} + #define NDB_HIDDEN_PRIMARY_KEY_LENGTH 8 #define NDB_FAILED_AUTO_INCREMENT ~(Uint64)0 @@ -90,13 +113,24 @@ handlerton ndbcluster_hton = { DBUG_RETURN(ndb_to_mysql_error(&tmp)); \ } +#define ERR_BREAK(err, code) \ +{ \ + const NdbError& tmp= err; \ + ERR_PRINT(tmp); \ + code= ndb_to_mysql_error(&tmp); \ + break; \ +} + // Typedefs for long names +typedef NdbDictionary::Object NDBOBJ; typedef NdbDictionary::Column NDBCOL; typedef NdbDictionary::Table NDBTAB; typedef NdbDictionary::Index NDBINDEX; typedef NdbDictionary::Dictionary NDBDICT; +typedef NdbDictionary::Event NDBEVENT; -bool ndbcluster_inited= FALSE; +static int ndbcluster_inited= 0; +static int ndbcluster_util_inited= 0; static Ndb* g_ndb= NULL; static Ndb_cluster_connection* g_ndb_cluster_connection= NULL; @@ -109,8 +143,12 @@ static HASH ndbcluster_open_tables; static byte *ndbcluster_get_key(NDB_SHARE *share,uint *length, my_bool not_used __attribute__((unused))); -static NDB_SHARE *get_share(const char *table_name); -static void free_share(NDB_SHARE *share); +static NDB_SHARE *get_share(const char *key, + bool create_if_not_exists= TRUE, + bool have_lock= FALSE); +static void free_share(NDB_SHARE **share, bool have_lock= FALSE); +static void real_free_share(NDB_SHARE **share); +static void ndb_set_fragmentation(NDBTAB &tab, TABLE *table, uint pk_len); static int packfrm(const void *data, uint len, const void **pack_data, uint *pack_len); static int unpackfrm(const void **data, uint *len, @@ -119,6 +157,33 @@ static int unpackfrm(const void **data, uint *len, static int ndb_get_table_statistics(Ndb*, const char *, struct Ndb_statistics *); +#ifndef DBUG_OFF +void print_records(TABLE *table, const char *record) +{ + if (_db_on_) + { + for (uint j= 0; j < table->s->fields; j++) + { + char buf[40]; + int pos= 0; + Field *field= table->field[j]; + const byte* field_ptr= field->ptr - table->record[0] + record; + int pack_len= field->pack_length(); + int n= pack_len < 10 ? pack_len : 10; + + for (int i= 0; i < n && pos < 20; i++) + { + pos+= sprintf(&buf[pos]," %x", (int) (unsigned char) field_ptr[i]); + } + buf[pos]= 0; + DBUG_PRINT("info",("[%u]field_ptr[0->%d]: %s", j, n, buf)); + } + } +} +#else +#define print_records(a,b) +#endif + // Util thread variables static pthread_t ndb_util_thread; pthread_mutex_t LOCK_ndb_util_thread; @@ -170,65 +235,70 @@ struct show_var_st ndb_status_variables[]= { {NullS, NullS, SHOW_LONG} }; +/* instantiated in storage/ndb/src/ndbapi/Ndbif.cpp */ +extern Uint64 g_latest_trans_gci; + /* Error handling functions */ -struct err_code_mapping -{ - int ndb_err; - int my_err; - int show_warning; -}; +/* Note for merge: old mapping table, moved to storage/ndb/ndberror.c */ -static const err_code_mapping err_map[]= +static int ndb_to_mysql_error(const NdbError *ndberr) { - { 626, HA_ERR_KEY_NOT_FOUND, 0 }, - { 630, HA_ERR_FOUND_DUPP_KEY, 0 }, - { 893, HA_ERR_FOUND_DUPP_KEY, 0 }, - { 721, HA_ERR_TABLE_EXIST, 1 }, - { 4244, HA_ERR_TABLE_EXIST, 1 }, - - { 709, HA_ERR_NO_SUCH_TABLE, 0 }, - - { 266, HA_ERR_LOCK_WAIT_TIMEOUT, 1 }, - { 274, HA_ERR_LOCK_WAIT_TIMEOUT, 1 }, - { 296, HA_ERR_LOCK_WAIT_TIMEOUT, 1 }, - { 297, HA_ERR_LOCK_WAIT_TIMEOUT, 1 }, - { 237, HA_ERR_LOCK_WAIT_TIMEOUT, 1 }, - - { 623, HA_ERR_RECORD_FILE_FULL, 1 }, - { 624, HA_ERR_RECORD_FILE_FULL, 1 }, - { 625, HA_ERR_RECORD_FILE_FULL, 1 }, - { 826, HA_ERR_RECORD_FILE_FULL, 1 }, - { 827, HA_ERR_RECORD_FILE_FULL, 1 }, - { 832, HA_ERR_RECORD_FILE_FULL, 1 }, + /* read the mysql mapped error code */ + int error= ndberr->mysql_code; - { 284, HA_ERR_TABLE_DEF_CHANGED, 0 }, - - { 0, 1, 0 }, - - { -1, -1, 1 } -}; + switch (error) + { + /* errors for which we do not add warnings, just return mapped error code + */ + case HA_ERR_NO_SUCH_TABLE: + case HA_ERR_KEY_NOT_FOUND: + case HA_ERR_FOUND_DUPP_KEY: + return error; + + /* Mapping missing, go with the ndb error code*/ + case -1: + error= ndberr->code; + break; + /* Mapping exists, go with the mapped code */ + default: + break; + } -static int ndb_to_mysql_error(const NdbError *err) -{ - uint i; - for (i=0; err_map[i].ndb_err != err->code && err_map[i].my_err != -1; i++); - if (err_map[i].show_warning) - { - // Push the NDB error message as warning + /* + Push the NDB error message as warning + - Used to be able to use SHOW WARNINGS toget more info on what the error is + - Used by replication to see if the error was temporary + */ + if (ndberr->status == NdbError::TemporaryError) push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR, - ER_GET_ERRMSG, ER(ER_GET_ERRMSG), - err->code, err->message, "NDB"); - } - if (err_map[i].my_err == -1) - return err->code; - return err_map[i].my_err; + ER_GET_TEMPORARY_ERRMSG, ER(ER_GET_TEMPORARY_ERRMSG), + ndberr->code, ndberr->message, "NDB"); + else + push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR, + ER_GET_ERRMSG, ER(ER_GET_ERRMSG), + ndberr->code, ndberr->message, "NDB"); + return error; } +int execute_no_commit_ignore_no_key(ha_ndbcluster *h, NdbTransaction *trans) +{ + int res= trans->execute(NdbTransaction::NoCommit, + NdbTransaction::AO_IgnoreError, + h->m_force_send); + if (res == 0) + return 0; + + const NdbError &err= trans->getNdbError(); + if (err.classification != NdbError::ConstraintViolation && + err.classification != NdbError::NoDataFound) + return res; + return 0; +} inline int execute_no_commit(ha_ndbcluster *h, NdbTransaction *trans) @@ -238,9 +308,11 @@ int execute_no_commit(ha_ndbcluster *h, NdbTransaction *trans) if (m_batch_execute) return 0; #endif - return trans->execute(NdbTransaction::NoCommit, - NdbTransaction::AbortOnError, - h->m_force_send); + return h->m_ignore_no_key ? + execute_no_commit_ignore_no_key(h,trans) : + trans->execute(NdbTransaction::NoCommit, + NdbTransaction::AbortOnError, + h->m_force_send); } inline @@ -442,29 +514,38 @@ void ha_ndbcluster::no_uncommitted_rows_reset(THD *thd) # The mapped error code */ -void ha_ndbcluster::invalidate_dictionary_cache(bool global) +void +ha_ndbcluster::invalidate_dictionary_cache(TABLE *table, Ndb *ndb, + const char *tabname, bool global) { - NDBDICT *dict= get_ndb()->getDictionary(); + NDBDICT *dict= ndb->getDictionary(); DBUG_ENTER("invalidate_dictionary_cache"); - DBUG_PRINT("info", ("invalidating %s", m_tabname)); + DBUG_PRINT("info", ("invalidating %s", tabname)); if (global) { - const NDBTAB *tab= dict->getTable(m_tabname); + const NDBTAB *tab= dict->getTable(tabname); if (!tab) DBUG_VOID_RETURN; if (tab->getObjectStatus() == NdbDictionary::Object::Invalid) { // Global cache has already been invalidated - dict->removeCachedTable(m_tabname); + dict->removeCachedTable(tabname); global= FALSE; } else - dict->invalidateTable(m_tabname); + dict->invalidateTable(tabname); } else - dict->removeCachedTable(m_tabname); + dict->removeCachedTable(tabname); table->s->version=0L; /* Free when thread is ready */ + DBUG_VOID_RETURN; +} + +void ha_ndbcluster::invalidate_dictionary_cache(bool global) +{ + NDBDICT *dict= get_ndb()->getDictionary(); + invalidate_dictionary_cache(table, get_ndb(), m_tabname, global); /* Invalidate indexes */ for (uint i= 0; i < table->s->keys; i++) { @@ -496,7 +577,6 @@ void ha_ndbcluster::invalidate_dictionary_cache(bool global) break; } } - DBUG_VOID_RETURN; } int ha_ndbcluster::ndb_err(NdbTransaction *trans) @@ -522,7 +602,7 @@ int ha_ndbcluster::ndb_err(NdbTransaction *trans) { err= dict->getNdbError(); DBUG_PRINT("info", ("Table not found, error: %d", err.code)); - if (err.code != 709) + if (err.code != 709 && err.code != 723) DBUG_RETURN(1); } DBUG_PRINT("info", ("Table exists but must have changed")); @@ -622,8 +702,7 @@ bool ha_ndbcluster::set_hidden_key(NdbOperation *ndb_op, uint fieldnr, const byte *field_ptr) { DBUG_ENTER("set_hidden_key"); - DBUG_RETURN(ndb_op->equal(fieldnr, (char*)field_ptr, - NDB_HIDDEN_PRIMARY_KEY_LENGTH) != 0); + DBUG_RETURN(ndb_op->equal(fieldnr, (char*)field_ptr) != 0); } @@ -653,14 +732,15 @@ int ha_ndbcluster::set_ndb_key(NdbOperation *ndb_op, Field *field, */ int ha_ndbcluster::set_ndb_value(NdbOperation *ndb_op, Field *field, - uint fieldnr, bool *set_blob_value) + uint fieldnr, int row_offset, + bool *set_blob_value) { - const byte* field_ptr= field->ptr; - uint32 pack_len= field->pack_length(); + const byte* field_ptr= field->ptr + row_offset; + uint32 pack_len= field->pack_length(); DBUG_ENTER("set_ndb_value"); - DBUG_PRINT("enter", ("%d: %s, type: %u, len=%d, is_null=%s", + DBUG_PRINT("enter", ("%d: %s type: %u len=%d is_null=%s", fieldnr, field->field_name, field->type(), - pack_len, field->is_null()?"Y":"N")); + pack_len, field->is_null(row_offset) ? "Y" : "N")); DBUG_DUMP("value", (char*) field_ptr, pack_len); DBUG_ASSERT(ndb_supported_type(field->type())); @@ -671,7 +751,7 @@ int ha_ndbcluster::set_ndb_value(NdbOperation *ndb_op, Field *field, { pack_len= sizeof(empty_field); field_ptr= (byte *)&empty_field; - if (field->is_null()) + if (field->is_null(row_offset)) empty_field= 0; else empty_field= 1; @@ -680,13 +760,14 @@ int ha_ndbcluster::set_ndb_value(NdbOperation *ndb_op, Field *field, { if (field->type() != MYSQL_TYPE_BIT) { - if (field->is_null()) + if (field->is_null(row_offset)) + { + DBUG_PRINT("info", ("field is NULL")); // Set value to NULL - DBUG_RETURN((ndb_op->setValue(fieldnr, - (char*)NULL, pack_len) != 0)); + DBUG_RETURN((ndb_op->setValue(fieldnr, (char*)NULL) != 0)); + } // Common implementation for most field types - DBUG_RETURN(ndb_op->setValue(fieldnr, - (char*)field_ptr, pack_len) != 0); + DBUG_RETURN(ndb_op->setValue(fieldnr, (char*)field_ptr) != 0); } else // if (field->type() == MYSQL_TYPE_BIT) { @@ -695,26 +776,25 @@ int ha_ndbcluster::set_ndb_value(NdbOperation *ndb_op, Field *field, // Round up bit field length to nearest word boundry pack_len= ((pack_len + 3) >> 2) << 2; DBUG_ASSERT(pack_len <= 8); - if (field->is_null()) + if (field->is_null(row_offset)) // Set value to NULL - DBUG_RETURN((ndb_op->setValue(fieldnr, (char*)NULL, pack_len) != 0)); + DBUG_RETURN((ndb_op->setValue(fieldnr, (char*)NULL) != 0)); DBUG_PRINT("info", ("bit field")); DBUG_DUMP("value", (char*)&bits, pack_len); #ifdef WORDS_BIGENDIAN if (pack_len < 5) { - DBUG_RETURN(ndb_op->setValue(fieldnr, - ((char*)&bits)+4, pack_len) != 0); + DBUG_RETURN(ndb_op->setValue(fieldnr, ((char*)&bits)+4) != 0); } #endif - DBUG_RETURN(ndb_op->setValue(fieldnr, (char*)&bits, pack_len) != 0); + DBUG_RETURN(ndb_op->setValue(fieldnr, (char*)&bits) != 0); } } // Blob type NdbBlob *ndb_blob= ndb_op->getBlobHandle(fieldnr); if (ndb_blob != NULL) { - if (field->is_null()) + if (field->is_null(row_offset)) DBUG_RETURN(ndb_blob->setNull() != 0); Field_blob *field_blob= (Field_blob*)field; @@ -795,8 +875,8 @@ int ha_ndbcluster::get_ndb_blobs_value(NdbBlob *last_ndb_blob) { char *buf= m_blobs_buffer + offset; uint32 len= 0xffffffff; // Max uint32 - DBUG_PRINT("value", ("read blob ptr=%x len=%u", - (UintPtr)buf, (uint)blob_len)); + DBUG_PRINT("value", ("read blob ptr=%lx len=%u", + buf, (uint) blob_len)); if (ndb_blob->readData(buf, len) != 0) DBUG_RETURN(-1); DBUG_ASSERT(len == blob_len); @@ -878,21 +958,18 @@ int ha_ndbcluster::get_ndb_value(NdbOperation *ndb_op, Field *field, /* Check if any set or get of blob value in current query. */ -bool ha_ndbcluster::uses_blob_value(bool all_fields) +bool ha_ndbcluster::uses_blob_value() { if (table->s->blob_fields == 0) return FALSE; - if (all_fields) - return TRUE; { uint no_fields= table->s->fields; int i; - THD *thd= current_thd; // They always put blobs at the end.. for (i= no_fields - 1; i >= 0; i--) { - Field *field= table->field[i]; - if (thd->query_id == field->query_id) + if ((m_write_op && ha_get_bit_in_write_set(i+1)) || + (!m_write_op && ha_get_bit_in_read_set(i+1))) { return TRUE; } @@ -910,6 +987,19 @@ bool ha_ndbcluster::uses_blob_value(bool all_fields) of table accessed in NDB */ +static int cmp_frm(const NDBTAB *ndbtab, const void *pack_data, + uint pack_length) +{ + DBUG_ENTER("cmp_frm"); + /* + Compare FrmData in NDB with frm file from disk. + */ + if ((pack_length != ndbtab->getFrmLength()) || + (memcmp(pack_data, ndbtab->getFrmData(), pack_length))) + DBUG_RETURN(1); + DBUG_RETURN(0); +} + int ha_ndbcluster::get_metadata(const char *path) { Ndb *ndb= get_ndb(); @@ -947,8 +1037,7 @@ int ha_ndbcluster::get_metadata(const char *path) DBUG_RETURN(1); } - if ((pack_length != tab->getFrmLength()) || - (memcmp(pack_data, tab->getFrmData(), pack_length))) + if (cmp_frm(tab, pack_data, pack_length)) { if (!invalidating_ndb_table) { @@ -959,9 +1048,9 @@ int ha_ndbcluster::get_metadata(const char *path) else { DBUG_PRINT("error", - ("metadata, pack_length: %d getFrmLength: %d memcmp: %d", + ("metadata, pack_length: %d getFrmLength: %d memcmp: %d", pack_length, tab->getFrmLength(), - memcmp(pack_data, tab->getFrmData(), pack_length))); + memcmp(pack_data, tab->getFrmData(), pack_length))); DBUG_DUMP("pack_data", (char*)pack_data, pack_length); DBUG_DUMP("frm", (char*)tab->getFrmData(), tab->getFrmLength()); error= 3; @@ -1085,6 +1174,26 @@ int ha_ndbcluster::build_index_list(Ndb *ndb, TABLE *tab, enum ILBP phase) const NDBINDEX *index= dict->getIndex(index_name, m_tabname); if (!index) DBUG_RETURN(1); m_index[i].index= (void *) index; + // ordered index - add stats + NDB_INDEX_DATA& d=m_index[i]; + delete d.index_stat; + d.index_stat=NULL; + THD *thd=current_thd; + if (thd->variables.ndb_index_stat_enable) + { + d.index_stat=new NdbIndexStat(index); + d.index_stat_cache_entries=thd->variables.ndb_index_stat_cache_entries; + d.index_stat_update_freq=thd->variables.ndb_index_stat_update_freq; + d.index_stat_query_count=0; + d.index_stat->alloc_cache(d.index_stat_cache_entries); + DBUG_PRINT("info", ("index %s stat=on cache_entries=%u update_freq=%u", + index->getName(), + d.index_stat_cache_entries, + d.index_stat_update_freq)); + } else + { + DBUG_PRINT("info", ("index %s stat=off", index->getName())); + } } if (idx_type == UNIQUE_ORDERED_INDEX || idx_type == UNIQUE_INDEX) { @@ -1156,6 +1265,8 @@ void ha_ndbcluster::release_metadata() my_free((char *)m_index[i].unique_index_attrid_map, MYF(0)); m_index[i].unique_index_attrid_map= NULL; } + delete m_index[i].index_stat; + m_index[i].index_stat=NULL; } DBUG_VOID_RETURN; @@ -1165,7 +1276,7 @@ int ha_ndbcluster::get_ndb_lock_type(enum thr_lock_type type) { if (type >= TL_WRITE_ALLOW_WRITE) return NdbOperation::LM_Exclusive; - else if (uses_blob_value(m_retrieve_all_fields)) + else if (uses_blob_value()) return NdbOperation::LM_Read; else return NdbOperation::LM_CommittedRead; @@ -1318,17 +1429,14 @@ inline int ha_ndbcluster::define_read_attrs(byte* buf, NdbOperation* op) { uint i; - THD *thd= current_thd; - DBUG_ENTER("define_read_attrs"); // Define attributes to read for (i= 0; i < table->s->fields; i++) { Field *field= table->field[i]; - if ((thd->query_id == field->query_id) || - ((field->flags & PRI_KEY_FLAG)) || - m_retrieve_all_fields) + if (ha_get_bit_in_read_set(i+1) || + ((field->flags & PRI_KEY_FLAG))) { if (get_ndb_value(op, field, i, buf)) ERR_RETURN(op->getNdbError()); @@ -1355,11 +1463,13 @@ int ha_ndbcluster::define_read_attrs(byte* buf, NdbOperation* op) DBUG_RETURN(0); } + /* Read one record from NDB using primary key */ -int ha_ndbcluster::pk_read(const byte *key, uint key_len, byte *buf) +int ha_ndbcluster::pk_read(const byte *key, uint key_len, byte *buf, + uint32 part_id) { uint no_fields= table->s->fields; NdbConnection *trans= m_active_trans; @@ -1369,6 +1479,7 @@ int ha_ndbcluster::pk_read(const byte *key, uint key_len, byte *buf) DBUG_ENTER("pk_read"); DBUG_PRINT("enter", ("key_len: %u", key_len)); DBUG_DUMP("key", (char*)key, key_len); + m_write_op= FALSE; NdbOperation::LockMode lm= (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type); @@ -1376,6 +1487,8 @@ int ha_ndbcluster::pk_read(const byte *key, uint key_len, byte *buf) op->readTuple(lm) != 0) ERR_RETURN(trans->getNdbError()); + if (m_use_partition_function) + op->setPartitionId(part_id); if (table->s->primary_key == MAX_KEY) { // This table has no primary key, use "hidden" primary key @@ -1413,17 +1526,20 @@ int ha_ndbcluster::pk_read(const byte *key, uint key_len, byte *buf) Read one complementing record from NDB using primary key from old_data */ -int ha_ndbcluster::complemented_pk_read(const byte *old_data, byte *new_data) +int ha_ndbcluster::complemented_pk_read(const byte *old_data, byte *new_data, + uint32 old_part_id) { uint no_fields= table->s->fields, i; NdbTransaction *trans= m_active_trans; NdbOperation *op; - THD *thd= current_thd; DBUG_ENTER("complemented_pk_read"); + m_write_op= FALSE; - if (m_retrieve_all_fields) + if (ha_get_all_bit_in_read_set()) + { // We have allready retrieved all fields, nothing to complement DBUG_RETURN(0); + } NdbOperation::LockMode lm= (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type); @@ -1433,12 +1549,16 @@ int ha_ndbcluster::complemented_pk_read(const byte *old_data, byte *new_data) int res; if ((res= set_primary_key_from_record(op, old_data))) ERR_RETURN(trans->getNdbError()); + + if (m_use_partition_function) + op->setPartitionId(old_part_id); + // Read all unreferenced non-key field(s) for (i= 0; i < no_fields; i++) { Field *field= table->field[i]; if (!((field->flags & PRI_KEY_FLAG) || - (thd->query_id == field->query_id))) + (ha_get_bit_in_read_set(i+1)))) { if (get_ndb_value(op, field, i, new_data)) ERR_RETURN(trans->getNdbError()); @@ -1462,7 +1582,7 @@ int ha_ndbcluster::complemented_pk_read(const byte *old_data, byte *new_data) { Field *field= table->field[i]; if (!((field->flags & PRI_KEY_FLAG) || - (thd->query_id == field->query_id))) + (ha_get_bit_in_read_set(i+1)))) { m_value[i].ptr= NULL; } @@ -1491,6 +1611,17 @@ int ha_ndbcluster::peek_row(const byte *record) if ((res= set_primary_key_from_record(op, record))) ERR_RETURN(trans->getNdbError()); + if (m_use_partition_function) + { + uint32 part_id; + int error; + if ((error= m_part_info->get_partition_id(m_part_info, &part_id))) + { + DBUG_RETURN(error); + } + op->setPartitionId(part_id); + } + if (execute_no_commit_ie(this,trans) != 0) { table->status= STATUS_NOT_FOUND; @@ -1650,10 +1781,12 @@ inline int ha_ndbcluster::next_result(byte *buf) */ int ha_ndbcluster::set_bounds(NdbIndexScanOperation *op, + uint inx, + bool rir, const key_range *keys[2], uint range_no) { - const KEY *const key_info= table->key_info + active_index; + const KEY *const key_info= table->key_info + inx; const uint key_parts= key_info->key_parts; uint key_tot_len[2]; uint tot_len; @@ -1718,7 +1851,10 @@ int ha_ndbcluster::set_bounds(NdbIndexScanOperation *op, switch (p.key->flag) { case HA_READ_KEY_EXACT: - p.bound_type= NdbIndexScanOperation::BoundEQ; + if (! rir) + p.bound_type= NdbIndexScanOperation::BoundEQ; + else // differs for records_in_range + p.bound_type= NdbIndexScanOperation::BoundLE; break; // ascending case HA_READ_KEY_OR_NEXT: @@ -1829,7 +1965,8 @@ int ha_ndbcluster::set_bounds(NdbIndexScanOperation *op, int ha_ndbcluster::ordered_index_scan(const key_range *start_key, const key_range *end_key, - bool sorted, bool descending, byte* buf) + bool sorted, bool descending, + byte* buf, part_id_range *part_spec) { int res; bool restart; @@ -1840,6 +1977,7 @@ int ha_ndbcluster::ordered_index_scan(const key_range *start_key, DBUG_PRINT("enter", ("index: %u, sorted: %d, descending: %d", active_index, sorted, descending)); DBUG_PRINT("enter", ("Starting new ordered scan on %s", m_tabname)); + m_write_op= FALSE; // Check that sorted seems to be initialised DBUG_ASSERT(sorted == 0 || sorted == 1); @@ -1854,11 +1992,17 @@ int ha_ndbcluster::ordered_index_scan(const key_range *start_key, (const NDBTAB *) m_table)) || op->readTuples(lm, 0, parallelism, sorted, descending)) ERR_RETURN(trans->getNdbError()); + if (m_use_partition_function && part_spec != NULL && + part_spec->start_part == part_spec->end_part) + op->setPartitionId(part_spec->start_part); m_active_cursor= op; } else { restart= TRUE; op= (NdbIndexScanOperation*)m_active_cursor; + if (m_use_partition_function && part_spec != NULL && + part_spec->start_part == part_spec->end_part) + op->setPartitionId(part_spec->start_part); DBUG_ASSERT(op->getSorted() == sorted); DBUG_ASSERT(op->getLockMode() == (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type)); @@ -1868,7 +2012,7 @@ int ha_ndbcluster::ordered_index_scan(const key_range *start_key, { const key_range *keys[2]= { start_key, end_key }; - res= set_bounds(op, keys); + res= set_bounds(op, active_index, false, keys); if (res) DBUG_RETURN(res); } @@ -1899,6 +2043,7 @@ int ha_ndbcluster::full_table_scan(byte *buf) DBUG_ENTER("full_table_scan"); DBUG_PRINT("enter", ("Starting new scan on %s", m_tabname)); + m_write_op= FALSE; NdbOperation::LockMode lm= (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type); @@ -1928,10 +2073,11 @@ int ha_ndbcluster::write_row(byte *record) NdbOperation *op; int res; THD *thd= current_thd; + m_write_op= TRUE; DBUG_ENTER("write_row"); - if (m_ignore_dup_key && table->s->primary_key != MAX_KEY) + if (!m_use_write && m_ignore_dup_key && table->s->primary_key != MAX_KEY) { int peek_res= peek_row(record); @@ -1956,6 +2102,17 @@ int ha_ndbcluster::write_row(byte *record) if (res != 0) ERR_RETURN(trans->getNdbError()); + if (m_use_partition_function) + { + uint32 part_id; + int error; + if ((error= m_part_info->get_partition_id(m_part_info, &part_id))) + { + DBUG_RETURN(error); + } + op->setPartitionId(part_id); + } + if (table->s->primary_key == MAX_KEY) { // Table has hidden primary key @@ -1997,7 +2154,8 @@ int ha_ndbcluster::write_row(byte *record) { Field *field= table->field[i]; if (!(field->flags & PRI_KEY_FLAG) && - set_ndb_value(op, field, i, &set_blob_value)) + (ha_get_bit_in_write_set(i + 1) || !m_use_write) && + set_ndb_value(op, field, i, record-table->record[0], &set_blob_value)) { m_skip_auto_increment= TRUE; ERR_RETURN(op->getNdbError()); @@ -2113,25 +2271,35 @@ int ha_ndbcluster::update_row(const byte *old_data, byte *new_data) NdbScanOperation* cursor= m_active_cursor; NdbOperation *op; uint i; + uint32 old_part_id= 0, new_part_id= 0; + int error; DBUG_ENTER("update_row"); + m_write_op= TRUE; statistic_increment(thd->status_var.ha_update_count, &LOCK_status); if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE) { table->timestamp_field->set_time(); - // Set query_id so that field is really updated - table->timestamp_field->query_id= thd->query_id; + ha_set_bit_in_write_set(table->timestamp_field->fieldnr); + } + + if (m_use_partition_function && + (error= get_parts_for_update(old_data, new_data, table->record[0], + m_part_info, &old_part_id, &new_part_id))) + { + DBUG_RETURN(error); } /* Check for update of primary key for special handling */ if ((table->s->primary_key != MAX_KEY) && - (key_cmp(table->s->primary_key, old_data, new_data))) + (key_cmp(table->s->primary_key, old_data, new_data)) || + (old_part_id != new_part_id)) { int read_res, insert_res, delete_res, undo_res; DBUG_PRINT("info", ("primary key update, doing pk read+delete+insert")); // Get all old fields, since we optimize away fields not in query - read_res= complemented_pk_read(old_data, new_data); + read_res= complemented_pk_read(old_data, new_data, old_part_id); if (read_res) { DBUG_PRINT("info", ("pk read failed")); @@ -2185,8 +2353,10 @@ int ha_ndbcluster::update_row(const byte *old_data, byte *new_data) if (!(op= cursor->updateCurrentTuple())) ERR_RETURN(trans->getNdbError()); m_ops_pending++; - if (uses_blob_value(FALSE)) + if (uses_blob_value()) m_blobs_pending= TRUE; + if (m_use_partition_function) + cursor->setPartitionId(new_part_id); } else { @@ -2194,6 +2364,8 @@ int ha_ndbcluster::update_row(const byte *old_data, byte *new_data) op->updateTuple() != 0) ERR_RETURN(trans->getNdbError()); + if (m_use_partition_function) + op->setPartitionId(new_part_id); if (table->s->primary_key == MAX_KEY) { // This table has no primary key, use "hidden" primary key @@ -2223,9 +2395,9 @@ int ha_ndbcluster::update_row(const byte *old_data, byte *new_data) for (i= 0; i < table->s->fields; i++) { Field *field= table->field[i]; - if (((thd->query_id == field->query_id) || m_retrieve_all_fields) && + if (ha_get_bit_in_write_set(i+1) && (!(field->flags & PRI_KEY_FLAG)) && - set_ndb_value(op, field, i)) + set_ndb_value(op, field, i, new_data - table->record[0])) ERR_RETURN(op->getNdbError()); } @@ -2249,11 +2421,21 @@ int ha_ndbcluster::delete_row(const byte *record) NdbTransaction *trans= m_active_trans; NdbScanOperation* cursor= m_active_cursor; NdbOperation *op; + uint32 part_id; + int error; DBUG_ENTER("delete_row"); + m_write_op= TRUE; statistic_increment(thd->status_var.ha_delete_count,&LOCK_status); m_rows_changed++; + if (m_use_partition_function && + (error= get_part_for_delete(record, table->record[0], m_part_info, + &part_id))) + { + DBUG_RETURN(error); + } + if (cursor) { /* @@ -2268,6 +2450,9 @@ int ha_ndbcluster::delete_row(const byte *record) ERR_RETURN(trans->getNdbError()); m_ops_pending++; + if (m_use_partition_function) + cursor->setPartitionId(part_id); + no_uncommitted_rows_update(-1); if (!m_primary_key_update) @@ -2281,6 +2466,9 @@ int ha_ndbcluster::delete_row(const byte *record) op->deleteTuple() != 0) ERR_RETURN(trans->getNdbError()); + if (m_use_partition_function) + op->setPartitionId(part_id); + no_uncommitted_rows_update(-1); if (table->s->primary_key == MAX_KEY) @@ -2324,51 +2512,73 @@ int ha_ndbcluster::delete_row(const byte *record) set to null. */ -void ha_ndbcluster::unpack_record(byte* buf) +static void ndb_unpack_record(TABLE *table, NdbValue *value, + MY_BITMAP *defined, byte *buf) { + Field **p_field= table->field, *field= *p_field; uint row_offset= (uint) (buf - table->record[0]); - Field **field, **end; - NdbValue *value= m_value; - DBUG_ENTER("unpack_record"); + DBUG_ENTER("ndb_unpack_record"); - end= table->field + table->s->fields; - // Set null flag(s) bzero(buf, table->s->null_bytes); - for (field= table->field; - field < end; - field++, value++) + for ( ; field; + p_field++, value++, field= *p_field) { if ((*value).ptr) { - if (! ((*field)->flags & BLOB_FLAG)) + if (!(field->flags & BLOB_FLAG)) { - if ((*value).rec->isNULL()) - (*field)->set_null(row_offset); - else if ((*field)->type() == MYSQL_TYPE_BIT) + int is_null= (*value).rec->isNULL(); + if (is_null) { - uint pack_len= (*field)->pack_length(); - if (pack_len < 5) + if (is_null > 0) + { + DBUG_PRINT("info",("[%u] NULL", + (*value).rec->getColumn()->getColumnNo())); + field->set_null(row_offset); + } + else + { + DBUG_PRINT("info",("[%u] UNDEFINED", + (*value).rec->getColumn()->getColumnNo())); + bitmap_clear_bit(defined, + (*value).rec->getColumn()->getColumnNo()); + } + } + else if (field->type() == MYSQL_TYPE_BIT) + { + byte *save_field_ptr= field->ptr; + field->ptr= save_field_ptr + row_offset; + if (field->pack_length() < 5) { DBUG_PRINT("info", ("bit field H'%.8X", (*value).rec->u_32_value())); - ((Field_bit *) *field)->store((longlong) - (*value).rec->u_32_value(), - FALSE); + ((Field_bit*) field)->store((longlong) + (*value).rec->u_32_value(), FALSE); } else { DBUG_PRINT("info", ("bit field H'%.8X%.8X", - *(Uint32 *)(*value).rec->aRef(), - *((Uint32 *)(*value).rec->aRef()+1))); - ((Field_bit *) *field)->store((longlong) - (*value).rec->u_64_value(), TRUE); + *(Uint32*) (*value).rec->aRef(), + *((Uint32*) (*value).rec->aRef()+1))); + ((Field_bit*) field)->store((longlong) + (*value).rec->u_64_value(),TRUE); } + field->ptr= save_field_ptr; + DBUG_PRINT("info",("[%u] SET", + (*value).rec->getColumn()->getColumnNo())); + DBUG_DUMP("info", (const char*) field->ptr, field->field_length); + } + else + { + DBUG_PRINT("info",("[%u] SET", + (*value).rec->getColumn()->getColumnNo())); + DBUG_DUMP("info", (const char*) field->ptr, field->field_length); } } else { - NdbBlob* ndb_blob= (*value).blob; + NdbBlob *ndb_blob= (*value).blob; bool isNull= TRUE; #ifndef DBUG_OFF int ret= @@ -2376,11 +2586,16 @@ void ha_ndbcluster::unpack_record(byte* buf) ndb_blob->getNull(isNull); DBUG_ASSERT(ret == 0); if (isNull) - (*field)->set_null(row_offset); + field->set_null(row_offset); } } } - + DBUG_VOID_RETURN; +} + +void ha_ndbcluster::unpack_record(byte *buf) +{ + ndb_unpack_record(table, m_value, 0, buf); #ifndef DBUG_OFF // Read and print all values that was fetched if (table->s->primary_key == MAX_KEY) @@ -2396,7 +2611,6 @@ void ha_ndbcluster::unpack_record(byte* buf) } //print_results(); #endif - DBUG_VOID_RETURN; } /* @@ -2408,8 +2622,6 @@ void ha_ndbcluster::print_results() DBUG_ENTER("print_results"); #ifndef DBUG_OFF - const NDBTAB *tab= (const NDBTAB*) m_table; - if (!_db_on_) DBUG_VOID_RETURN; @@ -2464,11 +2676,13 @@ print_value: } -int ha_ndbcluster::index_init(uint index) +int ha_ndbcluster::index_init(uint index, bool sorted) { DBUG_ENTER("ha_ndbcluster::index_init"); - DBUG_PRINT("enter", ("index: %u", index)); - DBUG_RETURN(handler::index_init(index)); + DBUG_PRINT("enter", ("index: %u sorted: %d", index, sorted)); + active_index= index; + m_sorted= sorted; + DBUG_RETURN(0); } @@ -2505,55 +2719,16 @@ int ha_ndbcluster::index_read(byte *buf, const byte *key, uint key_len, enum ha_rkey_function find_flag) { + key_range start_key; + bool descending= FALSE; DBUG_ENTER("ha_ndbcluster::index_read"); DBUG_PRINT("enter", ("active_index: %u, key_len: %u, find_flag: %d", active_index, key_len, find_flag)); - int error; - ndb_index_type type= get_index_type(active_index); - const KEY* key_info= table->key_info+active_index; - switch (type){ - case PRIMARY_KEY_ORDERED_INDEX: - case PRIMARY_KEY_INDEX: - if (find_flag == HA_READ_KEY_EXACT && key_info->key_length == key_len) - { - if (m_active_cursor && (error= close_scan())) - DBUG_RETURN(error); - DBUG_RETURN(pk_read(key, key_len, buf)); - } - else if (type == PRIMARY_KEY_INDEX) - { - DBUG_RETURN(1); - } - break; - case UNIQUE_ORDERED_INDEX: - case UNIQUE_INDEX: - if (find_flag == HA_READ_KEY_EXACT && key_info->key_length == key_len && - !check_null_in_key(key_info, key, key_len)) - { - if (m_active_cursor && (error= close_scan())) - DBUG_RETURN(error); - DBUG_RETURN(unique_index_read(key, key_len, buf)); - } - else if (type == UNIQUE_INDEX) - { - DBUG_RETURN(1); - } - break; - case ORDERED_INDEX: - break; - default: - case UNDEFINED_INDEX: - DBUG_ASSERT(FALSE); - DBUG_RETURN(1); - break; - } - - key_range start_key; start_key.key= key; start_key.length= key_len; start_key.flag= find_flag; - bool descending= FALSE; + descending= FALSE; switch (find_flag) { case HA_READ_KEY_OR_PREV: case HA_READ_BEFORE_KEY: @@ -2564,8 +2739,8 @@ int ha_ndbcluster::index_read(byte *buf, default: break; } - error= ordered_index_scan(&start_key, 0, TRUE, descending, buf); - DBUG_RETURN(error == HA_ERR_END_OF_FILE ? HA_ERR_KEY_NOT_FOUND : error); + DBUG_RETURN(read_range_first_to_buf(&start_key, 0, descending, + m_sorted, buf)); } @@ -2576,7 +2751,7 @@ int ha_ndbcluster::index_read_idx(byte *buf, uint index_no, statistic_increment(current_thd->status_var.ha_read_key_count, &LOCK_status); DBUG_ENTER("ha_ndbcluster::index_read_idx"); DBUG_PRINT("enter", ("index_no: %u, key_len: %u", index_no, key_len)); - index_init(index_no); + index_init(index_no, 0); DBUG_RETURN(index_read(buf, key, key_len, find_flag)); } @@ -2607,7 +2782,7 @@ int ha_ndbcluster::index_first(byte *buf) // Start the ordered index scan and fetch the first row // Only HA_READ_ORDER indexes get called by index_first - DBUG_RETURN(ordered_index_scan(0, 0, TRUE, FALSE, buf)); + DBUG_RETURN(ordered_index_scan(0, 0, TRUE, FALSE, buf, NULL)); } @@ -2615,7 +2790,7 @@ int ha_ndbcluster::index_last(byte *buf) { DBUG_ENTER("ha_ndbcluster::index_last"); statistic_increment(current_thd->status_var.ha_read_last_count,&LOCK_status); - DBUG_RETURN(ordered_index_scan(0, 0, TRUE, TRUE, buf)); + DBUG_RETURN(ordered_index_scan(0, 0, TRUE, TRUE, buf, NULL)); } int ha_ndbcluster::index_read_last(byte * buf, const byte * key, uint key_len) @@ -2624,66 +2799,76 @@ int ha_ndbcluster::index_read_last(byte * buf, const byte * key, uint key_len) DBUG_RETURN(index_read(buf, key, key_len, HA_READ_PREFIX_LAST)); } -inline int ha_ndbcluster::read_range_first_to_buf(const key_range *start_key, const key_range *end_key, - bool eq_r, bool sorted, + bool desc, bool sorted, byte* buf) { - KEY* key_info; - int error= 1; + part_id_range part_spec; + ndb_index_type type= get_index_type(active_index); + const KEY* key_info= table->key_info+active_index; + int error; DBUG_ENTER("ha_ndbcluster::read_range_first_to_buf"); - DBUG_PRINT("info", ("eq_r: %d, sorted: %d", eq_r, sorted)); + DBUG_PRINT("info", ("desc: %d, sorted: %d", desc, sorted)); - switch (get_index_type(active_index)){ + if (m_use_partition_function) + { + get_partition_set(table, buf, active_index, start_key, &part_spec); + if (part_spec.start_part > part_spec.end_part) + { + DBUG_RETURN(HA_ERR_END_OF_FILE); + } + else if (part_spec.start_part == part_spec.end_part) + { + /* + Only one partition is required to scan, if sorted is required we + don't need it any more since output from one ordered partitioned + index is always sorted. + */ + sorted= FALSE; + } + } + m_write_op= FALSE; + switch (type){ case PRIMARY_KEY_ORDERED_INDEX: case PRIMARY_KEY_INDEX: - key_info= table->key_info + active_index; if (start_key && start_key->length == key_info->key_length && start_key->flag == HA_READ_KEY_EXACT) { if (m_active_cursor && (error= close_scan())) DBUG_RETURN(error); - error= pk_read(start_key->key, start_key->length, buf); - DBUG_RETURN(error == HA_ERR_KEY_NOT_FOUND ? HA_ERR_END_OF_FILE : error); + DBUG_RETURN(pk_read(start_key->key, start_key->length, buf, + part_spec.start_part)); } break; case UNIQUE_ORDERED_INDEX: case UNIQUE_INDEX: - key_info= table->key_info + active_index; if (start_key && start_key->length == key_info->key_length && start_key->flag == HA_READ_KEY_EXACT && !check_null_in_key(key_info, start_key->key, start_key->length)) { if (m_active_cursor && (error= close_scan())) DBUG_RETURN(error); - error= unique_index_read(start_key->key, start_key->length, buf); - DBUG_RETURN(error == HA_ERR_KEY_NOT_FOUND ? HA_ERR_END_OF_FILE : error); + DBUG_RETURN(unique_index_read(start_key->key, start_key->length, buf)); } break; default: break; } - // Start the ordered index scan and fetch the first row - error= ordered_index_scan(start_key, end_key, sorted, FALSE, buf); - DBUG_RETURN(error); + DBUG_RETURN(ordered_index_scan(start_key, end_key, sorted, desc, buf, + &part_spec)); } - int ha_ndbcluster::read_range_first(const key_range *start_key, const key_range *end_key, bool eq_r, bool sorted) { byte* buf= table->record[0]; DBUG_ENTER("ha_ndbcluster::read_range_first"); - - DBUG_RETURN(read_range_first_to_buf(start_key, - end_key, - eq_r, - sorted, - buf)); + DBUG_RETURN(read_range_first_to_buf(start_key, end_key, FALSE, + sorted, buf)); } int ha_ndbcluster::read_range_next() @@ -2709,7 +2894,7 @@ int ha_ndbcluster::rnd_init(bool scan) DBUG_RETURN(-1); } } - index_init(table->s->primary_key); + index_init(table->s->primary_key, 0); DBUG_RETURN(0); } @@ -2776,7 +2961,20 @@ int ha_ndbcluster::rnd_pos(byte *buf, byte *pos) &LOCK_status); // The primary key for the record is stored in pos // Perform a pk_read using primary key "index" - DBUG_RETURN(pk_read(pos, ref_length, buf)); + { + part_id_range part_spec; + if (m_use_partition_function) + { + key_range key_spec; + KEY *key_info= table->key_info + active_index; + key_spec.key= pos; + key_spec.length= ref_length; + key_spec.flag= HA_READ_KEY_EXACT; + get_full_part_id_from_key(table, buf, key_info, &key_spec, &part_spec); + DBUG_ASSERT(part_spec.start_part == part_spec.end_part); + } + DBUG_RETURN(pk_read(pos, ref_length, buf, part_spec.start_part)); + } } @@ -2908,83 +3106,11 @@ int ha_ndbcluster::extra(enum ha_extra_function operation) { DBUG_ENTER("extra"); switch (operation) { - case HA_EXTRA_NORMAL: /* Optimize for space (def) */ - DBUG_PRINT("info", ("HA_EXTRA_NORMAL")); - break; - case HA_EXTRA_QUICK: /* Optimize for speed */ - DBUG_PRINT("info", ("HA_EXTRA_QUICK")); - break; case HA_EXTRA_RESET: /* Reset database to after open */ DBUG_PRINT("info", ("HA_EXTRA_RESET")); DBUG_PRINT("info", ("Clearing condition stack")); cond_clear(); break; - case HA_EXTRA_CACHE: /* Cash record in HA_rrnd() */ - DBUG_PRINT("info", ("HA_EXTRA_CACHE")); - break; - case HA_EXTRA_NO_CACHE: /* End cacheing of records (def) */ - DBUG_PRINT("info", ("HA_EXTRA_NO_CACHE")); - break; - case HA_EXTRA_NO_READCHECK: /* No readcheck on update */ - DBUG_PRINT("info", ("HA_EXTRA_NO_READCHECK")); - break; - case HA_EXTRA_READCHECK: /* Use readcheck (def) */ - DBUG_PRINT("info", ("HA_EXTRA_READCHECK")); - break; - case HA_EXTRA_KEYREAD: /* Read only key to database */ - DBUG_PRINT("info", ("HA_EXTRA_KEYREAD")); - break; - case HA_EXTRA_NO_KEYREAD: /* Normal read of records (def) */ - DBUG_PRINT("info", ("HA_EXTRA_NO_KEYREAD")); - break; - case HA_EXTRA_NO_USER_CHANGE: /* No user is allowed to write */ - DBUG_PRINT("info", ("HA_EXTRA_NO_USER_CHANGE")); - break; - case HA_EXTRA_KEY_CACHE: - DBUG_PRINT("info", ("HA_EXTRA_KEY_CACHE")); - break; - case HA_EXTRA_NO_KEY_CACHE: - DBUG_PRINT("info", ("HA_EXTRA_NO_KEY_CACHE")); - break; - case HA_EXTRA_WAIT_LOCK: /* Wait until file is avalably (def) */ - DBUG_PRINT("info", ("HA_EXTRA_WAIT_LOCK")); - break; - case HA_EXTRA_NO_WAIT_LOCK: /* If file is locked, return quickly */ - DBUG_PRINT("info", ("HA_EXTRA_NO_WAIT_LOCK")); - break; - case HA_EXTRA_WRITE_CACHE: /* Use write cache in ha_write() */ - DBUG_PRINT("info", ("HA_EXTRA_WRITE_CACHE")); - break; - case HA_EXTRA_FLUSH_CACHE: /* flush write_record_cache */ - DBUG_PRINT("info", ("HA_EXTRA_FLUSH_CACHE")); - break; - case HA_EXTRA_NO_KEYS: /* Remove all update of keys */ - DBUG_PRINT("info", ("HA_EXTRA_NO_KEYS")); - break; - case HA_EXTRA_KEYREAD_CHANGE_POS: /* Keyread, but change pos */ - DBUG_PRINT("info", ("HA_EXTRA_KEYREAD_CHANGE_POS")); /* xxxxchk -r must be used */ - break; - case HA_EXTRA_REMEMBER_POS: /* Remember pos for next/prev */ - DBUG_PRINT("info", ("HA_EXTRA_REMEMBER_POS")); - break; - case HA_EXTRA_RESTORE_POS: - DBUG_PRINT("info", ("HA_EXTRA_RESTORE_POS")); - break; - case HA_EXTRA_REINIT_CACHE: /* init cache from current record */ - DBUG_PRINT("info", ("HA_EXTRA_REINIT_CACHE")); - break; - case HA_EXTRA_FORCE_REOPEN: /* Datafile have changed on disk */ - DBUG_PRINT("info", ("HA_EXTRA_FORCE_REOPEN")); - break; - case HA_EXTRA_FLUSH: /* Flush tables to disk */ - DBUG_PRINT("info", ("HA_EXTRA_FLUSH")); - break; - case HA_EXTRA_NO_ROWS: /* Don't write rows */ - DBUG_PRINT("info", ("HA_EXTRA_NO_ROWS")); - break; - case HA_EXTRA_RESET_STATE: /* Reset positions */ - DBUG_PRINT("info", ("HA_EXTRA_RESET_STATE")); - break; case HA_EXTRA_IGNORE_DUP_KEY: /* Dup keys don't rollback everything*/ DBUG_PRINT("info", ("HA_EXTRA_IGNORE_DUP_KEY")); if (current_thd->lex->sql_command == SQLCOM_REPLACE) @@ -3003,34 +3129,8 @@ int ha_ndbcluster::extra(enum ha_extra_function operation) m_use_write= FALSE; m_ignore_dup_key= FALSE; break; - case HA_EXTRA_RETRIEVE_ALL_COLS: /* Retrieve all columns, not just those - where field->query_id is the same as - the current query id */ - DBUG_PRINT("info", ("HA_EXTRA_RETRIEVE_ALL_COLS")); - m_retrieve_all_fields= TRUE; - break; - case HA_EXTRA_PREPARE_FOR_DELETE: - DBUG_PRINT("info", ("HA_EXTRA_PREPARE_FOR_DELETE")); - break; - case HA_EXTRA_PREPARE_FOR_UPDATE: /* Remove read cache if problems */ - DBUG_PRINT("info", ("HA_EXTRA_PREPARE_FOR_UPDATE")); - break; - case HA_EXTRA_PRELOAD_BUFFER_SIZE: - DBUG_PRINT("info", ("HA_EXTRA_PRELOAD_BUFFER_SIZE")); - break; - case HA_EXTRA_RETRIEVE_PRIMARY_KEY: - DBUG_PRINT("info", ("HA_EXTRA_RETRIEVE_PRIMARY_KEY")); - m_retrieve_primary_key= TRUE; - break; - case HA_EXTRA_CHANGE_KEY_TO_UNIQUE: - DBUG_PRINT("info", ("HA_EXTRA_CHANGE_KEY_TO_UNIQUE")); - break; - case HA_EXTRA_CHANGE_KEY_TO_DUP: - DBUG_PRINT("info", ("HA_EXTRA_CHANGE_KEY_TO_DUP")); - case HA_EXTRA_KEYREAD_PRESERVE_FIELDS: - DBUG_PRINT("info", ("HA_EXTRA_KEYREAD_PRESERVE_FIELDS")); + default: break; - } DBUG_RETURN(0); @@ -3231,8 +3331,9 @@ int ha_ndbcluster::external_lock(THD *thd, int lock_type) Thd_ndb *thd_ndb= get_thd_ndb(thd); Ndb *ndb= thd_ndb->ndb; - DBUG_PRINT("enter", ("thd: %x, thd_ndb: %x, thd_ndb->lock_count: %d", - thd, thd_ndb, thd_ndb->lock_count)); + DBUG_PRINT("enter", ("this: %x thd: %lx thd_ndb: %lx " + "thd_ndb->lock_count: %d", + this, thd, thd_ndb, thd_ndb->lock_count)); if (lock_type != F_UNLCK) { @@ -3309,8 +3410,6 @@ int ha_ndbcluster::external_lock(THD *thd, int lock_type) DBUG_ASSERT(m_active_trans); // Start of transaction m_rows_changed= 0; - m_retrieve_all_fields= FALSE; - m_retrieve_primary_key= FALSE; m_ops_pending= 0; { NDBDICT *dict= ndb->getDictionary(); @@ -3445,8 +3544,6 @@ int ha_ndbcluster::start_stmt(THD *thd, thr_lock_type lock_type) m_active_trans= trans; // Start of statement - m_retrieve_all_fields= FALSE; - m_retrieve_primary_key= FALSE; m_ops_pending= 0; DBUG_RETURN(error); @@ -3492,7 +3589,8 @@ int ndbcluster_commit(THD *thd, bool all) while ((share= it++)) { pthread_mutex_lock(&share->mutex); - DBUG_PRINT("info", ("Invalidate commit_count for %s, share->commit_count: %d ", share->table_name, share->commit_count)); + DBUG_PRINT("info", ("Invalidate commit_count for %s, share->commit_count: %d ", + share->key, share->commit_count)); share->commit_count= 0; share->commit_count_lock++; pthread_mutex_unlock(&share->mutex); @@ -3823,53 +3921,7 @@ static int create_ndb_column(NDBCOL &col, /* Create a table in NDB Cluster - */ - -static void ndb_set_fragmentation(NDBTAB &tab, TABLE *form, uint pk_length) -{ - if (form->s->max_rows == (ha_rows) 0) /* default setting, don't set fragmentation */ - return; - /** - * get the number of fragments right - */ - uint no_fragments; - { -#if MYSQL_VERSION_ID >= 50000 - uint acc_row_size= 25 + /*safety margin*/ 2; -#else - uint acc_row_size= pk_length*4; - /* add acc overhead */ - if (pk_length <= 8) /* main page will set the limit */ - acc_row_size+= 25 + /*safety margin*/ 2; - else /* overflow page will set the limit */ - acc_row_size+= 4 + /*safety margin*/ 4; -#endif - ulonglong acc_fragment_size= 512*1024*1024; - ulonglong max_rows= form->s->max_rows; -#if MYSQL_VERSION_ID >= 50100 - no_fragments= (max_rows*acc_row_size)/acc_fragment_size+1; -#else - no_fragments= ((max_rows*acc_row_size)/acc_fragment_size+1 - +1/*correct rounding*/)/2; -#endif - } - { - uint no_nodes= g_ndb_cluster_connection->no_db_nodes(); - NDBTAB::FragmentType ftype; - if (no_fragments > 2*no_nodes) - { - ftype= NDBTAB::FragAllLarge; - if (no_fragments > 4*no_nodes) - push_warning(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR, - "Ndb might have problems storing the max amount of rows specified"); - } - else if (no_fragments > no_nodes) - ftype= NDBTAB::FragAllMedium; - else - ftype= NDBTAB::FragAllSmall; - tab.setFragmentType(ftype); - } -} +*/ int ha_ndbcluster::create(const char *name, TABLE *form, @@ -3896,7 +3948,8 @@ int ha_ndbcluster::create(const char *name, caller. Do Ndb specific stuff, such as create a .ndb file */ - my_errno= write_ndb_file(); + if ((my_errno= write_ndb_file())) + DBUG_RETURN(my_errno); DBUG_RETURN(my_errno); } @@ -3910,7 +3963,7 @@ int ha_ndbcluster::create(const char *name, if (packfrm(data, length, &pack_data, &pack_length)) DBUG_RETURN(2); - DBUG_PRINT("info", ("setFrm data=%x, len=%d", pack_data, pack_length)); + DBUG_PRINT("info", ("setFrm data=%lx len=%d", pack_data, pack_length)); tab.setFrm(pack_data, pack_length); my_free((char*)data, MYF(0)); my_free((char*)pack_data, MYF(0)); @@ -3923,11 +3976,40 @@ int ha_ndbcluster::create(const char *name, field->pack_length())); if ((my_errno= create_ndb_column(col, field, info))) DBUG_RETURN(my_errno); + + if ( +#ifdef NDB_DISKDATA + info->store_on_disk || +#else + getenv("NDB_DEFAULT_DISK")) +#endif + col.setStorageType(NdbDictionary::Column::StorageTypeDisk); + else + col.setStorageType(NdbDictionary::Column::StorageTypeMemory); + tab.addColumn(col); if (col.getPrimaryKey()) pk_length += (field->pack_length() + 3) / 4; } - + + KEY* key_info; + for (i= 0, key_info= form->key_info; i < form->s->keys; i++, key_info++) + { + KEY_PART_INFO *key_part= key_info->key_part; + KEY_PART_INFO *end= key_part + key_info->key_parts; + for (; key_part != end; key_part++) + tab.getColumn(key_part->fieldnr-1)->setStorageType( + NdbDictionary::Column::StorageTypeMemory); + } + +#ifdef NDB_DISKDATA + if (info->store_on_disk) + if (info->tablespace) + tab.setTablespace(info->tablespace); + else + tab.setTablespace("DEFAULT-TS"); +#endif + // No primary key, create shadow key as 64 bit, auto increment if (form->s->primary_key == MAX_KEY) { @@ -3975,7 +4057,22 @@ int ha_ndbcluster::create(const char *name, } } - ndb_set_fragmentation(tab, form, pk_length); + // Check partition info + partition_info *part_info= form->s->part_info; + if (part_info) + { + int error; + if ((error= set_up_partition_info(part_info, form, (void*)&tab))) + { + DBUG_RETURN(error); + } + } + else + { + ndb_set_fragmentation(tab, form, pk_length); + } + + if ((my_errno= check_ndb_connection())) DBUG_RETURN(my_errno); @@ -4097,10 +4194,17 @@ int ha_ndbcluster::rename_table(const char *from, const char *to) // Change current database to that of target table set_dbname(to); ndb->setDatabaseName(m_dbname); - if (!(result= alter_table_name(new_tabname))) + + if ((result= alter_table_name(new_tabname))) + { + DBUG_RETURN(result); + } + + // Rename .ndb file + if ((result= handler::rename_table(from, to))) { - // Rename .ndb file - result= handler::rename_table(from, to); + // ToDo in 4.1 should rollback alter table... + DBUG_RETURN(result); } DBUG_RETURN(result); @@ -4116,7 +4220,7 @@ int ha_ndbcluster::alter_table_name(const char *to) Ndb *ndb= get_ndb(); NDBDICT *dict= ndb->getDictionary(); const NDBTAB *orig_tab= (const NDBTAB *) m_table; - DBUG_ENTER("alter_table_name_table"); + DBUG_ENTER("alter_table_name"); NdbDictionary::Table new_tab= *orig_tab; new_tab.setName(to); @@ -4135,6 +4239,38 @@ int ha_ndbcluster::alter_table_name(const char *to) */ +/* static version which does not need a handler */ + +int +ha_ndbcluster::delete_table(ha_ndbcluster *h, Ndb *ndb, + const char *path, + const char *db, + const char *table_name) +{ + DBUG_ENTER("ha_ndbcluster::ndbcluster_delete_table"); + NDBDICT *dict= ndb->getDictionary(); + + /* Drop the table from NDB */ + + int res; + if (h) + { + res= h->drop_table(); + } + else + { + ndb->setDatabaseName(db); + res= dict->dropTable(table_name); + } + + if (res) + { + DBUG_RETURN(res); + } + + DBUG_RETURN(0); +} + int ha_ndbcluster::delete_table(const char *name) { DBUG_ENTER("ha_ndbcluster::delete_table"); @@ -4147,9 +4283,8 @@ int ha_ndbcluster::delete_table(const char *name) /* Call ancestor function to delete .ndb file */ handler::delete_table(name); - - /* Drop the table from NDB */ - DBUG_RETURN(drop_table()); + + DBUG_RETURN(delete_table(this, get_ndb(),name, m_dbname, m_tabname)); } @@ -4164,7 +4299,6 @@ int ha_ndbcluster::drop_table() DBUG_ENTER("drop_table"); DBUG_PRINT("enter", ("Deleting %s", m_tabname)); - release_metadata(); if (dict->dropTable(m_tabname)) ERR_RETURN(dict->getNdbError()); @@ -4215,6 +4349,15 @@ ulonglong ha_ndbcluster::get_auto_increment() Constructor for the NDB Cluster table handler */ +#define HA_NDBCLUSTER_TABLE_FLAGS \ + HA_REC_NOT_IN_SEQ | \ + HA_NULL_IN_KEY | \ + HA_AUTO_PART_KEY | \ + HA_NO_PREFIX_CHAR_KEYS | \ + HA_NEED_READ_RANGE_BUFFER | \ + HA_CAN_GEOMETRY | \ + HA_CAN_BIT_FIELD + ha_ndbcluster::ha_ndbcluster(TABLE *table_arg): handler(&ndbcluster_hton, table_arg), m_active_trans(NULL), @@ -4222,19 +4365,15 @@ ha_ndbcluster::ha_ndbcluster(TABLE *table_arg): m_table(NULL), m_table_version(-1), m_table_info(NULL), - m_table_flags(HA_REC_NOT_IN_SEQ | - HA_NULL_IN_KEY | - HA_AUTO_PART_KEY | - HA_NO_PREFIX_CHAR_KEYS | - HA_NEED_READ_RANGE_BUFFER | - HA_CAN_GEOMETRY | - HA_CAN_BIT_FIELD), + m_table_flags(HA_NDBCLUSTER_TABLE_FLAGS), m_share(0), + m_part_info(NULL), + m_use_partition_function(FALSE), + m_sorted(FALSE), m_use_write(FALSE), m_ignore_dup_key(FALSE), m_primary_key_update(FALSE), - m_retrieve_all_fields(FALSE), - m_retrieve_primary_key(FALSE), + m_ignore_no_key(FALSE), m_rows_to_insert((ha_rows) 1), m_rows_inserted((ha_rows) 0), m_bulk_insert_rows((ha_rows) 1024), @@ -4269,6 +4408,10 @@ ha_ndbcluster::ha_ndbcluster(TABLE *table_arg): m_index[i].unique_index= NULL; m_index[i].index= NULL; m_index[i].unique_index_attrid_map= NULL; + m_index[i].index_stat=NULL; + m_index[i].index_stat_cache_entries=0; + m_index[i].index_stat_update_freq=0; + m_index[i].index_stat_query_count=0; } DBUG_VOID_RETURN; @@ -4284,7 +4427,9 @@ ha_ndbcluster::~ha_ndbcluster() DBUG_ENTER("~ha_ndbcluster"); if (m_share) - free_share(m_share); + { + free_share(&m_share); + } release_metadata(); my_free(m_blobs_buffer, MYF(MY_ALLOW_ZERO_PTR)); m_blobs_buffer= 0; @@ -4317,8 +4462,8 @@ int ha_ndbcluster::open(const char *name, int mode, uint test_if_locked) int res; KEY *key; DBUG_ENTER("open"); - DBUG_PRINT("enter", ("name: %s mode: %d test_if_locked: %d", - name, mode, test_if_locked)); + DBUG_PRINT("enter", ("this: %d name: %s mode: %d test_if_locked: %d", + this, name, mode, test_if_locked)); // Setup ref_length to make room for the whole // primary key to be written in the ref variable @@ -4338,7 +4483,8 @@ int ha_ndbcluster::open(const char *name, int mode, uint test_if_locked) set_tabname(name); if (check_ndb_connection()) { - free_share(m_share); m_share= 0; + free_share(&m_share); + m_share= 0; DBUG_RETURN(HA_ERR_NO_CONNECTION); } @@ -4346,6 +4492,15 @@ int ha_ndbcluster::open(const char *name, int mode, uint test_if_locked) if (!res) info(HA_STATUS_VARIABLE | HA_STATUS_CONST); + if (table->s->part_info) + { + m_part_info= table->s->part_info; + if (!(m_part_info->part_type == HASH_PARTITION && + m_part_info->list_of_part_fields && + !is_sub_partitioned(m_part_info))) + m_use_partition_function= TRUE; + } + DBUG_RETURN(res); } @@ -4358,7 +4513,8 @@ int ha_ndbcluster::open(const char *name, int mode, uint test_if_locked) int ha_ndbcluster::close(void) { DBUG_ENTER("close"); - free_share(m_share); m_share= 0; + free_share(&m_share); + m_share= 0; release_metadata(); DBUG_RETURN(0); } @@ -4467,7 +4623,7 @@ int ndbcluster_discover(THD* thd, const char *db, const char *name, if (!(tab= dict->getTable(name))) { const NdbError err= dict->getNdbError(); - if (err.code == 709) + if (err.code == 709 || err.code == 723) DBUG_RETURN(-1); ERR_RETURN(err); } @@ -4514,7 +4670,7 @@ int ndbcluster_table_exists_in_engine(THD* thd, const char *db, const char *name if (!(tab= dict->getTable(name))) { const NdbError err= dict->getNdbError(); - if (err.code == 709) + if (err.code == 709 || err.code == 723) DBUG_RETURN(0); ERR_RETURN(err); } @@ -4535,9 +4691,10 @@ extern "C" byte* tables_get_key(const char *entry, uint *length, /* Drop a database in NDB Cluster - */ + NOTE add a dummy void function, since stupid handlerton is returning void instead of int... +*/ -int ndbcluster_drop_database(const char *path) +int ndbcluster_drop_database_impl(const char *path) { DBUG_ENTER("ndbcluster_drop_database"); THD *thd= current_thd; @@ -4552,33 +4709,36 @@ int ndbcluster_drop_database(const char *path) DBUG_PRINT("enter", ("db: %s", dbname)); if (!(ndb= check_ndb_in_thd(thd))) - DBUG_RETURN(HA_ERR_NO_CONNECTION); + DBUG_RETURN(-1); // List tables in NDB NDBDICT *dict= ndb->getDictionary(); if (dict->listObjects(list, NdbDictionary::Object::UserTable) != 0) - ERR_RETURN(dict->getNdbError()); + DBUG_RETURN(-1); for (i= 0 ; i < list.count ; i++) { - NdbDictionary::Dictionary::List::Element& t= list.elements[i]; - DBUG_PRINT("info", ("Found %s/%s in NDB", t.database, t.name)); + NdbDictionary::Dictionary::List::Element& elmt= list.elements[i]; + DBUG_PRINT("info", ("Found %s/%s in NDB", elmt.database, elmt.name)); // Add only tables that belongs to db - if (my_strcasecmp(system_charset_info, t.database, dbname)) + if (my_strcasecmp(system_charset_info, elmt.database, dbname)) continue; - DBUG_PRINT("info", ("%s must be dropped", t.name)); - drop_list.push_back(thd->strdup(t.name)); + DBUG_PRINT("info", ("%s must be dropped", elmt.name)); + drop_list.push_back(thd->strdup(elmt.name)); } // Drop any tables belonging to database + char full_path[FN_REFLEN]; + char *tmp= strxnmov(full_path, FN_REFLEN, share_prefix, dbname, "/", NullS); ndb->setDatabaseName(dbname); List_iterator_fast<char> it(drop_list); while ((tabname=it++)) { - if (dict->dropTable(tabname)) + strxnmov(tmp, FN_REFLEN - (tmp - full_path), tabname, NullS); + if (ha_ndbcluster::delete_table(0, ndb, full_path, dbname, tabname)) { const NdbError err= dict->getNdbError(); - if (err.code != 709) + if (err.code != 709 && err.code != 723) { ERR_PRINT(err); ret= ndb_to_mysql_error(&err); @@ -4588,6 +4748,96 @@ int ndbcluster_drop_database(const char *path) DBUG_RETURN(ret); } +void ndbcluster_drop_database(char *path) +{ + ndbcluster_drop_database_impl(path); +} +/* + find all tables in ndb and discover those needed +*/ +static int ndbcluster_find_all_files(THD *thd) +{ + DBUG_ENTER("ndbcluster_find_all_files"); + Ndb* ndb; + char key[FN_REFLEN]; + NdbDictionary::Dictionary::List list; + + if (!(ndb= check_ndb_in_thd(thd))) + DBUG_RETURN(HA_ERR_NO_CONNECTION); + + NDBDICT *dict= ndb->getDictionary(); + + int unhandled, retries= 5; + do + { + if (dict->listObjects(list, NdbDictionary::Object::UserTable) != 0) + ERR_RETURN(dict->getNdbError()); + unhandled= 0; + for (uint i= 0 ; i < list.count ; i++) + { + NDBDICT::List::Element& elmt= list.elements[i]; + DBUG_PRINT("info", ("Found %s.%s in NDB", elmt.database, elmt.name)); + if (!(elmt.state == NDBOBJ::StateBuilding || + elmt.state == NDBOBJ::StateOnline)) + { + sql_print_information("NDB: skipping setup table %s.%s, in state %d", + elmt.database, elmt.name, elmt.state); + continue; + } + + ndb->setDatabaseName(elmt.database); + const NDBTAB *ndbtab; + + if (!(ndbtab= dict->getTable(elmt.name))) + { + sql_print_error("NDB: failed to setup table %s.%s, error: %d, %s", + elmt.database, elmt.name, + dict->getNdbError().code, + dict->getNdbError().message); + unhandled++; + continue; + } + + if (ndbtab->getFrmLength() == 0) + continue; + + strxnmov(key, FN_LEN, mysql_data_home, "/", + elmt.database, "/", elmt.name, NullS); + const void *data= 0, *pack_data= 0; + uint length, pack_length; + int discover= 0; + if (readfrm(key, &data, &length) || + packfrm(data, length, &pack_data, &pack_length)) + { + discover= 1; + sql_print_information("NDB: missing frm for %s.%s, discovering...", + elmt.database, elmt.name); + } + else if (cmp_frm(ndbtab, pack_data, pack_length)) + { + discover= 1; + sql_print_information("NDB: mismatch in frm for %s.%s, discovering...", + elmt.database, elmt.name); + } + my_free((char*) data, MYF(MY_ALLOW_ZERO_PTR)); + my_free((char*) pack_data, MYF(MY_ALLOW_ZERO_PTR)); + + if (discover) + { + /* ToDo 4.1 database needs to be created if missing */ + pthread_mutex_lock(&LOCK_open); + if (ha_create_table_from_engine(thd, elmt.database, elmt.name)) + { + /* ToDo 4.1 handle error */ + } + pthread_mutex_unlock(&LOCK_open); + } + } + } + while (unhandled && retries--); + + DBUG_RETURN(0); +} int ndbcluster_find_files(THD *thd,const char *db,const char *path, const char *wild, bool dir, List<char> *files) @@ -4599,7 +4849,7 @@ int ndbcluster_find_files(THD *thd,const char *db,const char *path, Ndb* ndb; char name[FN_REFLEN]; HASH ndb_tables, ok_tables; - NdbDictionary::Dictionary::List list; + NDBDICT::List list; if (!(ndb= check_ndb_in_thd(thd))) DBUG_RETURN(HA_ERR_NO_CONNECTION); @@ -4630,11 +4880,11 @@ int ndbcluster_find_files(THD *thd,const char *db,const char *path, for (i= 0 ; i < list.count ; i++) { - NdbDictionary::Dictionary::List::Element& t= list.elements[i]; - DBUG_PRINT("info", ("Found %s/%s in NDB", t.database, t.name)); + NDBDICT::List::Element& elmt= list.elements[i]; + DBUG_PRINT("info", ("Found %s/%s in NDB", elmt.database, elmt.name)); // Add only tables that belongs to db - if (my_strcasecmp(system_charset_info, t.database, db)) + if (my_strcasecmp(system_charset_info, elmt.database, db)) continue; // Apply wildcard to list of tables in NDB @@ -4642,14 +4892,14 @@ int ndbcluster_find_files(THD *thd,const char *db,const char *path, { if (lower_case_table_names) { - if (wild_case_compare(files_charset_info, t.name, wild)) + if (wild_case_compare(files_charset_info, elmt.name, wild)) continue; } - else if (wild_compare(t.name,wild,0)) + else if (wild_compare(elmt.name,wild,0)) continue; } - DBUG_PRINT("info", ("Inserting %s into ndb_tables hash", t.name)); - my_hash_insert(&ndb_tables, (byte*)thd->strdup(t.name)); + DBUG_PRINT("info", ("Inserting %s into ndb_tables hash", elmt.name)); + my_hash_insert(&ndb_tables, (byte*)thd->strdup(elmt.name)); } char *file_name; @@ -4697,10 +4947,15 @@ int ndbcluster_find_files(THD *thd,const char *db,const char *path, file_name= hash_element(&ndb_tables, i); if (!hash_search(&ok_tables, file_name, strlen(file_name))) { - DBUG_PRINT("info", ("%s must be discovered", file_name)); - // File is in list of ndb tables and not in ok_tables - // This table need to be created - create_list.push_back(thd->strdup(file_name)); + strxnmov(name, sizeof(name), + mysql_data_home, "/", db, "/", file_name, reg_ext, NullS); + if (access(name, F_OK)) + { + DBUG_PRINT("info", ("%s must be discovered", file_name)); + // File is in list of ndb tables and not in ok_tables + // This table need to be created + create_list.push_back(thd->strdup(file_name)); + } } } @@ -4756,6 +5011,7 @@ int ndbcluster_find_files(THD *thd,const char *db,const char *path, static int connect_callback() { update_status_variables(g_ndb_cluster_connection); + pthread_cond_signal(&COND_ndb_util_thread); return 0; } @@ -4867,7 +5123,7 @@ ndbcluster_init_error: ndbcluster_init() */ -bool ndbcluster_end() +int ndbcluster_end(ha_panic_function type) { DBUG_ENTER("ndbcluster_end"); @@ -4906,6 +5162,7 @@ bool ndbcluster_end() pthread_mutex_destroy(&LOCK_ndb_util_thread); pthread_cond_destroy(&COND_ndb_util_thread); ndbcluster_inited= 0; + ndbcluster_util_inited= 0; DBUG_RETURN(0); } @@ -5030,6 +5287,84 @@ ha_ndbcluster::records_in_range(uint inx, key_range *min_key, (max_key && max_key->length == key_length))) DBUG_RETURN(1); + if ((idx_type == PRIMARY_KEY_ORDERED_INDEX || + idx_type == UNIQUE_ORDERED_INDEX || + idx_type == ORDERED_INDEX) && + m_index[inx].index_stat != NULL) + { + NDB_INDEX_DATA& d=m_index[inx]; + NDBINDEX* index=(NDBINDEX*)d.index; + Ndb* ndb=get_ndb(); + NdbTransaction* trans=NULL; + NdbIndexScanOperation* op=NULL; + int res=0; + Uint64 rows; + + do + { + // We must provide approx table rows + Uint64 table_rows=0; + Ndb_local_table_statistics *info= + (Ndb_local_table_statistics *)m_table_info; + if (info->records != ~(ha_rows)0 && info->records != 0) + { + table_rows = info->records; + DBUG_PRINT("info", ("use info->records: %llu", table_rows)); + } + else + { + Ndb_statistics stat; + if ((res=ndb_get_table_statistics(ndb, m_tabname, &stat)) != 0) + break; + table_rows=stat.row_count; + DBUG_PRINT("info", ("use db row_count: %llu", table_rows)); + if (table_rows == 0) { + // Problem if autocommit=0 +#ifdef ndb_get_table_statistics_uses_active_trans + rows=0; + break; +#endif + } + } + + // Define scan op for the range + if ((trans=m_active_trans) == NULL) + { + DBUG_PRINT("info", ("no active trans")); + if (! (trans=ndb->startTransaction())) + ERR_BREAK(ndb->getNdbError(), res); + } + if (! (op=trans->getNdbIndexScanOperation(index, (NDBTAB*)m_table))) + ERR_BREAK(trans->getNdbError(), res); + if ((op->readTuples(NdbOperation::LM_CommittedRead)) == -1) + ERR_BREAK(op->getNdbError(), res); + const key_range *keys[2]={ min_key, max_key }; + if ((res=set_bounds(op, inx, true, keys)) != 0) + break; + + // Decide if db should be contacted + int flags=0; + if (d.index_stat_query_count < d.index_stat_cache_entries || + (d.index_stat_update_freq != 0 && + d.index_stat_query_count % d.index_stat_update_freq == 0)) + { + DBUG_PRINT("info", ("force stat from db")); + flags|=NdbIndexStat::RR_UseDb; + } + if (d.index_stat->records_in_range(index, op, table_rows, &rows, flags) == -1) + ERR_BREAK(d.index_stat->getNdbError(), res); + d.index_stat_query_count++; + } while (0); + + if (trans != m_active_trans && rows == 0) + rows = 1; + if (trans != m_active_trans && trans != NULL) + ndb->closeTransaction(trans); + if (res != 0) + DBUG_RETURN(HA_POS_ERROR); + DBUG_RETURN(rows); + } + DBUG_RETURN(10); /* Good guess when you don't know anything */ } @@ -5042,7 +5377,7 @@ ulong ha_ndbcluster::table_flags(void) const } const char * ha_ndbcluster::table_type() const { - return("ndbcluster"); + return("NDBCLUSTER"); } uint ha_ndbcluster::max_supported_record_length() const { @@ -5100,7 +5435,7 @@ uint ndb_get_commitcount(THD *thd, char *dbname, char *tabname, char name[FN_REFLEN]; NDB_SHARE *share; - (void)strxnmov(name, FN_REFLEN, "./",dbname,"/",tabname,NullS); + (void)strxnmov(name, FN_REFLEN, share_prefix, dbname, "/", tabname, NullS); DBUG_PRINT("enter", ("name: %s", name)); pthread_mutex_lock(&ndbcluster_mutex); if (!(share=(NDB_SHARE*) hash_search(&ndbcluster_open_tables, @@ -5108,8 +5443,7 @@ uint ndb_get_commitcount(THD *thd, char *dbname, char *tabname, strlen(name)))) { pthread_mutex_unlock(&ndbcluster_mutex); - DBUG_PRINT("info", ("Table %s not found in ndbcluster_open_tables", - name)); + DBUG_PRINT("info", ("Table %s not found in ndbcluster_open_tables", name)); DBUG_RETURN(1); } share->use_count++; @@ -5124,7 +5458,7 @@ uint ndb_get_commitcount(THD *thd, char *dbname, char *tabname, DBUG_PRINT("info", ("Getting commit_count: %llu from share", share->commit_count)); pthread_mutex_unlock(&share->mutex); - free_share(share); + free_share(&share); DBUG_RETURN(0); } } @@ -5139,7 +5473,7 @@ uint ndb_get_commitcount(THD *thd, char *dbname, char *tabname, struct Ndb_statistics stat; if (ndb_get_table_statistics(ndb, tabname, &stat)) { - free_share(share); + free_share(&share); DBUG_RETURN(1); } @@ -5156,7 +5490,7 @@ uint ndb_get_commitcount(THD *thd, char *dbname, char *tabname, *commit_count= 0; } pthread_mutex_unlock(&share->mutex); - free_share(share); + free_share(&share); DBUG_RETURN(0); } @@ -5293,6 +5627,60 @@ ha_ndbcluster::register_query_cache_table(THD *thd, } +#ifndef DBUG_OFF +static void dbug_print_table(const char *info, TABLE *table) +{ + if (table == 0) + { + DBUG_PRINT("info",("%s: (null)", info)); + return; + } + DBUG_PRINT("info", + ("%s: %s.%s s->fields: %d " + "reclength: %d rec_buff_length: %d record[0]: %lx " + "record[1]: %lx", + info, + table->s->db, + table->s->table_name, + table->s->fields, + table->s->reclength, + table->s->rec_buff_length, + table->record[0], + table->record[1])); + + for (unsigned int i= 0; i < table->s->fields; i++) + { + Field *f= table->field[i]; + DBUG_PRINT("info", + ("[%d] \"%s\"(0x%lx:%s%s%s%s%s%s) type: %d pack_length: %d " + "ptr: 0x%lx[+%d] null_bit: %u null_ptr: 0x%lx[+%d]", + i, + f->field_name, + f->flags, + (f->flags & PRI_KEY_FLAG) ? "pri" : "attr", + (f->flags & NOT_NULL_FLAG) ? "" : ",nullable", + (f->flags & UNSIGNED_FLAG) ? ",unsigned" : ",signed", + (f->flags & ZEROFILL_FLAG) ? ",zerofill" : "", + (f->flags & BLOB_FLAG) ? ",blob" : "", + (f->flags & BINARY_FLAG) ? ",binary" : "", + f->real_type(), + f->pack_length(), + f->ptr, f->ptr - table->record[0], + f->null_bit, + f->null_ptr, (byte*) f->null_ptr - table->record[0])); + if (f->type() == MYSQL_TYPE_BIT) + { + Field_bit *g= (Field_bit*) f; + DBUG_PRINT("MYSQL_TYPE_BIT",("field_length: %d bit_ptr: 0x%lx[+%d] " + "bit_ofs: %u bit_len: %u", + g->field_length, g->bit_ptr, + (byte*) g->bit_ptr-table->record[0], + g->bit_ofs, g->bit_len)); + } + } +} +#endif + /* Handling the shared NDB_SHARE structure that is needed to provide table locking. @@ -5301,68 +5689,195 @@ ha_ndbcluster::register_query_cache_table(THD *thd, data we want to or can share. */ -static byte* ndbcluster_get_key(NDB_SHARE *share,uint *length, +static byte *ndbcluster_get_key(NDB_SHARE *share,uint *length, my_bool not_used __attribute__((unused))) { - *length=share->table_name_length; - return (byte*) share->table_name; + *length= share->key_length; + return (byte*) share->key; } -static NDB_SHARE* get_share(const char *table_name) +#ifndef DBUG_OFF +static void dbug_print_open_tables() +{ + DBUG_ENTER("dbug_print_open_tables"); + for (uint i= 0; i < ndbcluster_open_tables.records; i++) + { + NDB_SHARE *share= (NDB_SHARE*) hash_element(&ndbcluster_open_tables, i); + DBUG_PRINT("share", + ("[%d] 0x%lx key: %s key_length: %d", + i, share, share->key, share->key_length)); + DBUG_PRINT("share", + ("db.tablename: %s.%s use_count: %d commit_count: %d", + share->db, share->table_name, + share->use_count, share->commit_count)); + } + DBUG_VOID_RETURN; +} +#else +#define dbug_print_open_tables() +#endif + +/* + Increase refcount on existing share. + Always returns share and cannot fail. +*/ +static NDB_SHARE *get_share(NDB_SHARE *share) { - NDB_SHARE *share; pthread_mutex_lock(&ndbcluster_mutex); - uint length=(uint) strlen(table_name); - if (!(share=(NDB_SHARE*) hash_search(&ndbcluster_open_tables, - (byte*) table_name, - length))) + share->use_count++; + + dbug_print_open_tables(); + + DBUG_PRINT("get_share", + ("0x%lx key: %s key_length: %d", + share, share->key, share->key_length)); + DBUG_PRINT("get_share", + ("db.tablename: %s.%s use_count: %d commit_count: %d", + share->db, share->table_name, + share->use_count, share->commit_count)); + pthread_mutex_unlock(&ndbcluster_mutex); + return share; +} + +/* + Get a share object for key + + Returns share for key, and increases the refcount on the share. + + create_if_not_exists == TRUE: + creates share if it does not alreade exist + returns 0 only due to out of memory, and then sets my_error + + create_if_not_exists == FALSE: + returns 0 if share does not exist + + have_lock == TRUE, pthread_mutex_lock(&ndbcluster_mutex) already taken +*/ +static NDB_SHARE *get_share(const char *key, bool create_if_not_exists, + bool have_lock) +{ + NDB_SHARE *share; + if (!have_lock) + pthread_mutex_lock(&ndbcluster_mutex); + uint length= (uint) strlen(key); + if (!(share= (NDB_SHARE*) hash_search(&ndbcluster_open_tables, + (byte*) key, + length))) { - if ((share=(NDB_SHARE *) my_malloc(sizeof(*share)+length+1, + if (!create_if_not_exists) + { + DBUG_PRINT("error", ("get_share: %s does not exist", key)); + if (!have_lock) + pthread_mutex_unlock(&ndbcluster_mutex); + return 0; + } + if ((share= (NDB_SHARE*) my_malloc(sizeof(*share), MYF(MY_WME | MY_ZEROFILL)))) { - share->table_name_length=length; - share->table_name=(char*) (share+1); - strmov(share->table_name,table_name); + MEM_ROOT **root_ptr= + my_pthread_getspecific_ptr(MEM_ROOT**, THR_MALLOC); + MEM_ROOT *old_root= *root_ptr; + init_sql_alloc(&share->mem_root, 1024, 0); + *root_ptr= &share->mem_root; // remember to reset before return + + /* enough space for key, db, and table_name */ + share->key= alloc_root(*root_ptr, 2 * (length + 1)); + share->key_length= length; + strmov(share->key, key); if (my_hash_insert(&ndbcluster_open_tables, (byte*) share)) { - pthread_mutex_unlock(&ndbcluster_mutex); - my_free((gptr) share,0); + free_root(&share->mem_root, MYF(0)); + my_free((gptr) share, 0); + *root_ptr= old_root; + if (!have_lock) + pthread_mutex_unlock(&ndbcluster_mutex); return 0; } thr_lock_init(&share->lock); - pthread_mutex_init(&share->mutex,MY_MUTEX_INIT_FAST); + pthread_mutex_init(&share->mutex, MY_MUTEX_INIT_FAST); share->commit_count= 0; share->commit_count_lock= 0; + share->db= share->key + length + 1; + ha_ndbcluster::set_dbname(key, share->db); + share->table_name= share->db + strlen(share->db) + 1; + ha_ndbcluster::set_tabname(key, share->table_name); + *root_ptr= old_root; } else { - DBUG_PRINT("error", ("Failed to alloc share")); - pthread_mutex_unlock(&ndbcluster_mutex); + DBUG_PRINT("error", ("get_share: failed to alloc share")); + if (!have_lock) + pthread_mutex_unlock(&ndbcluster_mutex); + my_error(ER_OUTOFMEMORY, MYF(0), sizeof(*share)); return 0; } } share->use_count++; - DBUG_PRINT("share", - ("table_name: %s, length: %d, use_count: %d, commit_count: %d", - share->table_name, share->table_name_length, share->use_count, - share->commit_count)); - pthread_mutex_unlock(&ndbcluster_mutex); + dbug_print_open_tables(); + + DBUG_PRINT("get_share", + ("0x%lx key: %s key_length: %d key: %s", + share, share->key, share->key_length, key)); + DBUG_PRINT("get_share", + ("db.tablename: %s.%s use_count: %d commit_count: %d", + share->db, share->table_name, + share->use_count, share->commit_count)); + if (!have_lock) + pthread_mutex_unlock(&ndbcluster_mutex); return share; } +static void real_free_share(NDB_SHARE **share) +{ + DBUG_PRINT("real_free_share", + ("0x%lx key: %s key_length: %d", + (*share), (*share)->key, (*share)->key_length)); + DBUG_PRINT("real_free_share", + ("db.tablename: %s.%s use_count: %d commit_count: %d", + (*share)->db, (*share)->table_name, + (*share)->use_count, (*share)->commit_count)); + + hash_delete(&ndbcluster_open_tables, (byte*) *share); + thr_lock_delete(&(*share)->lock); + pthread_mutex_destroy(&(*share)->mutex); + free_root(&(*share)->mem_root, MYF(0)); + + my_free((gptr) *share, MYF(0)); + *share= 0; + + dbug_print_open_tables(); +} + +/* + decrease refcount of share + calls real_free_share when refcount reaches 0 -static void free_share(NDB_SHARE *share) + have_lock == TRUE, pthread_mutex_lock(&ndbcluster_mutex) already taken +*/ +static void free_share(NDB_SHARE **share, bool have_lock) { - pthread_mutex_lock(&ndbcluster_mutex); - if (!--share->use_count) + if (!have_lock) + pthread_mutex_lock(&ndbcluster_mutex); + if ((*share)->util_lock == current_thd) + (*share)->util_lock= 0; + if (!--(*share)->use_count) { - hash_delete(&ndbcluster_open_tables, (byte*) share); - thr_lock_delete(&share->lock); - pthread_mutex_destroy(&share->mutex); - my_free((gptr) share, MYF(0)); + real_free_share(share); } - pthread_mutex_unlock(&ndbcluster_mutex); + else + { + dbug_print_open_tables(); + DBUG_PRINT("free_share", + ("0x%lx key: %s key_length: %d", + *share, (*share)->key, (*share)->key_length)); + DBUG_PRINT("free_share", + ("db.tablename: %s.%s use_count: %d commit_count: %d", + (*share)->db, (*share)->table_name, + (*share)->use_count, (*share)->commit_count)); + } + if (!have_lock) + pthread_mutex_unlock(&ndbcluster_mutex); } @@ -5393,14 +5908,14 @@ static int packfrm(const void *data, uint len, uint blob_len; frm_blob_struct* blob; DBUG_ENTER("packfrm"); - DBUG_PRINT("enter", ("data: %x, len: %d", data, len)); + DBUG_PRINT("enter", ("data: 0x%lx len: %d", data, len)); error= 1; org_len= len; if (my_compress((byte*)data, &org_len, &comp_len)) goto err; - DBUG_PRINT("info", ("org_len: %d, comp_len: %d", org_len, comp_len)); + DBUG_PRINT("info", ("org_len: %d comp_len: %d", org_len, comp_len)); DBUG_DUMP("compressed", (char*)data, org_len); error= 2; @@ -5420,7 +5935,8 @@ static int packfrm(const void *data, uint len, *pack_len= blob_len; error= 0; - DBUG_PRINT("exit", ("pack_data: %x, pack_len: %d", *pack_data, *pack_len)); + DBUG_PRINT("exit", + ("pack_data: 0x%lx pack_len: %d", *pack_data, *pack_len)); err: DBUG_RETURN(error); @@ -5434,7 +5950,7 @@ static int unpackfrm(const void **unpack_data, uint *unpack_len, byte *data; ulong complen, orglen, ver; DBUG_ENTER("unpackfrm"); - DBUG_PRINT("enter", ("pack_data: %x", pack_data)); + DBUG_PRINT("enter", ("pack_data: 0x%lx", pack_data)); complen= uint4korr((char*)&blob->head.complen); orglen= uint4korr((char*)&blob->head.orglen); @@ -5459,7 +5975,7 @@ static int unpackfrm(const void **unpack_data, uint *unpack_len, *unpack_data= data; *unpack_len= complen; - DBUG_PRINT("exit", ("frmdata: %x, len: %d", *unpack_data, *unpack_len)); + DBUG_PRINT("exit", ("frmdata: 0x%lx, len: %d", *unpack_data, *unpack_len)); DBUG_RETURN(0); } @@ -5472,11 +5988,10 @@ ndb_get_table_statistics(Ndb* ndb, const char * table, DBUG_ENTER("ndb_get_table_statistics"); DBUG_PRINT("enter", ("table: %s", table)); NdbTransaction* pTrans= ndb->startTransaction(); + if (pTrans == NULL) + ERR_RETURN(ndb->getNdbError()); do { - if (pTrans == NULL) - break; - NdbScanOperation* pOp= pTrans->getNdbScanOperation(table); if (pOp == NULL) break; @@ -5576,6 +6091,7 @@ ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p, HANDLER_BUFFER *buffer) { DBUG_ENTER("ha_ndbcluster::read_multi_range_first"); + m_write_op= FALSE; int res; KEY* key_info= table->key_info + active_index; @@ -5583,7 +6099,7 @@ ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p, ulong reclength= table->s->reclength; NdbOperation* op; - if (uses_blob_value(m_retrieve_all_fields)) + if (uses_blob_value()) { /** * blobs can't be batched currently @@ -5635,12 +6151,29 @@ ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p, for (; multi_range_curr<multi_range_end && curr+reclength <= end_of_buffer; multi_range_curr++) { - switch (index_type){ + part_id_range part_spec; + if (m_use_partition_function) + { + get_partition_set(table, curr, active_index, + &multi_range_curr->start_key, + &part_spec); + if (part_spec.start_part > part_spec.end_part) + { + /* + We can skip this partition since the key won't fit into any + partition + */ + curr += reclength; + multi_range_curr->range_flag |= SKIP_RANGE; + continue; + } + } + switch(index_type){ case PRIMARY_KEY_ORDERED_INDEX: if (!(multi_range_curr->start_key.length == key_info->key_length && - multi_range_curr->start_key.flag == HA_READ_KEY_EXACT)) - goto range; - /* fall through */ + multi_range_curr->start_key.flag == HA_READ_KEY_EXACT)) + goto range; + // else fall through case PRIMARY_KEY_INDEX: { multi_range_curr->range_flag |= UNIQUE_RANGE; @@ -5648,7 +6181,9 @@ ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p, !op->readTuple(lm) && !set_primary_key(op, multi_range_curr->start_key.key) && !define_read_attrs(curr, op) && - (op->setAbortOption(AO_IgnoreError), TRUE)) + (op->setAbortOption(AO_IgnoreError), TRUE) && + (!m_use_partition_function || + (op->setPartitionId(part_spec.start_part), true))) curr += reclength; else ERR_RETURN(op ? op->getNdbError() : m_active_trans->getNdbError()); @@ -5657,11 +6192,11 @@ ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p, break; case UNIQUE_ORDERED_INDEX: if (!(multi_range_curr->start_key.length == key_info->key_length && - multi_range_curr->start_key.flag == HA_READ_KEY_EXACT && - !check_null_in_key(key_info, multi_range_curr->start_key.key, - multi_range_curr->start_key.length))) - goto range; - /* fall through */ + multi_range_curr->start_key.flag == HA_READ_KEY_EXACT && + !check_null_in_key(key_info, multi_range_curr->start_key.key, + multi_range_curr->start_key.length))) + goto range; + // else fall through case UNIQUE_INDEX: { multi_range_curr->range_flag |= UNIQUE_RANGE; @@ -5675,8 +6210,7 @@ ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p, ERR_RETURN(op ? op->getNdbError() : m_active_trans->getNdbError()); break; } - case ORDERED_INDEX: - { + case ORDERED_INDEX: { range: multi_range_curr->range_flag &= ~(uint)UNIQUE_RANGE; if (scanOp == 0) @@ -5709,7 +6243,8 @@ ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p, const key_range *keys[2]= { &multi_range_curr->start_key, &multi_range_curr->end_key }; - if ((res= set_bounds(scanOp, keys, multi_range_curr-ranges))) + if ((res= set_bounds(scanOp, active_index, false, keys, + multi_range_curr-ranges))) DBUG_RETURN(res); break; } @@ -5751,7 +6286,7 @@ ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p, } #if 0 -#define DBUG_MULTI_RANGE(x) printf("read_multi_range_next: case %d\n", x); +#define DBUG_MULTI_RANGE(x) DBUG_PRINT("info", ("read_multi_range_next: case %d\n", x)); #else #define DBUG_MULTI_RANGE(x) #endif @@ -5762,6 +6297,7 @@ ha_ndbcluster::read_multi_range_next(KEY_MULTI_RANGE ** multi_range_found_p) DBUG_ENTER("ha_ndbcluster::read_multi_range_next"); if (m_disable_multi_read) { + DBUG_MULTI_RANGE(11); DBUG_RETURN(handler::read_multi_range_next(multi_range_found_p)); } @@ -5771,10 +6307,16 @@ ha_ndbcluster::read_multi_range_next(KEY_MULTI_RANGE ** multi_range_found_p) const NdbOperation* op= m_current_multi_operation; for (;multi_range_curr < m_multi_range_defined; multi_range_curr++) { + DBUG_MULTI_RANGE(12); + if (multi_range_curr->range_flag & SKIP_RANGE) + continue; if (multi_range_curr->range_flag & UNIQUE_RANGE) { if (op->getNdbError().code == 0) + { + DBUG_MULTI_RANGE(13); goto found_next; + } op= m_active_trans->getNextCompletedOperation(op); m_multi_range_result_ptr += reclength; @@ -5791,6 +6333,7 @@ ha_ndbcluster::read_multi_range_next(KEY_MULTI_RANGE ** multi_range_found_p) } else { + DBUG_MULTI_RANGE(14); goto close_scan; } } @@ -5824,6 +6367,7 @@ ha_ndbcluster::read_multi_range_next(KEY_MULTI_RANGE ** multi_range_found_p) DBUG_ASSERT(range_no == -1); if ((res= m_multi_cursor->nextResult(true))) { + DBUG_MULTI_RANGE(15); goto close_scan; } multi_range_curr--; // Will be increased in for-loop @@ -5851,12 +6395,16 @@ close_scan: } else { + DBUG_MULTI_RANGE(9); DBUG_RETURN(ndb_err(m_active_trans)); } } if (multi_range_curr == multi_range_end) + { + DBUG_MULTI_RANGE(16); DBUG_RETURN(HA_ERR_END_OF_FILE); + } /** * Read remaining ranges @@ -5965,6 +6513,7 @@ pthread_handler_t ndb_util_thread_func(void *arg __attribute__((unused))) THD *thd; /* needs to be first for thread_stack */ Ndb* ndb; struct timespec abstime; + List<NDB_SHARE> util_open_tables; my_thread_init(); DBUG_ENTER("ndb_util_thread"); @@ -5985,10 +6534,51 @@ pthread_handler_t ndb_util_thread_func(void *arg __attribute__((unused))) delete ndb; DBUG_RETURN(NULL); } + thd->init_for_queries(); + thd->version=refresh_version; + thd->set_time(); + thd->main_security_ctx.host_or_ip= ""; + thd->client_capabilities = 0; + my_net_init(&thd->net, 0); + thd->main_security_ctx.master_access= ~0; + thd->main_security_ctx.priv_user = 0; + + /* + wait for mysql server to start + */ + pthread_mutex_lock(&LOCK_server_started); + while (!mysqld_server_started) + pthread_cond_wait(&COND_server_started, &LOCK_server_started); + pthread_mutex_unlock(&LOCK_server_started); + + /* + Wait for cluster to start + */ + pthread_mutex_lock(&LOCK_ndb_util_thread); + while (!ndb_cluster_node_id) + { + /* ndb not connected yet */ + set_timespec(abstime, 1); + pthread_cond_timedwait(&COND_ndb_util_thread, + &LOCK_ndb_util_thread, + &abstime); + if (abort_loop) + { + pthread_mutex_unlock(&LOCK_ndb_util_thread); + goto ndb_util_thread_end; + } + } + pthread_mutex_unlock(&LOCK_ndb_util_thread); + + /* + Get all table definitions from the storage node + */ + ndbcluster_find_all_files(thd); + + ndbcluster_util_inited= 1; - List<NDB_SHARE> util_open_tables; set_timespec(abstime, 0); - for (;;) + for (;!abort_loop;) { pthread_mutex_lock(&LOCK_ndb_util_thread); @@ -5996,10 +6586,10 @@ pthread_handler_t ndb_util_thread_func(void *arg __attribute__((unused))) &LOCK_ndb_util_thread, &abstime); pthread_mutex_unlock(&LOCK_ndb_util_thread); - +#ifdef NDB_EXTRA_DEBUG_UTIL_THREAD DBUG_PRINT("ndb_util_thread", ("Started, ndb_cache_check_time: %d", ndb_cache_check_time)); - +#endif if (abort_loop) break; /* Shutting down server */ @@ -6030,20 +6620,12 @@ pthread_handler_t ndb_util_thread_func(void *arg __attribute__((unused))) List_iterator_fast<NDB_SHARE> it(util_open_tables); while ((share= it++)) { - /* Split tab- and dbname */ - char buf[FN_REFLEN]; - char *tabname, *db; - uint length= dirname_length(share->table_name); - tabname= share->table_name+length; - memcpy(buf, share->table_name, length-1); - buf[length-1]= 0; - db= buf+dirname_length(buf); DBUG_PRINT("ndb_util_thread", ("Fetching commit count for: %s", - share->table_name)); + share->key)); /* Contact NDB to get commit count for table */ - ndb->setDatabaseName(db); + ndb->setDatabaseName(share->db); struct Ndb_statistics stat; uint lock; @@ -6051,17 +6633,17 @@ pthread_handler_t ndb_util_thread_func(void *arg __attribute__((unused))) lock= share->commit_count_lock; pthread_mutex_unlock(&share->mutex); - if (ndb_get_table_statistics(ndb, tabname, &stat) == 0) + if (ndb_get_table_statistics(ndb, share->table_name, &stat) == 0) { DBUG_PRINT("ndb_util_thread", ("Table: %s, commit_count: %llu, rows: %llu", - share->table_name, stat.commit_count, stat.row_count)); + share->key, stat.commit_count, stat.row_count)); } else { DBUG_PRINT("ndb_util_thread", ("Error: Could not get commit count for table %s", - share->table_name)); + share->key)); stat.commit_count= 0; } @@ -6071,7 +6653,7 @@ pthread_handler_t ndb_util_thread_func(void *arg __attribute__((unused))) pthread_mutex_unlock(&share->mutex); /* Decrease the use count and possibly free share */ - free_share(share); + free_share(&share); } /* Clear the list of open tables */ @@ -6098,7 +6680,8 @@ pthread_handler_t ndb_util_thread_func(void *arg __attribute__((unused))) abstime.tv_nsec-= 1000000000; } } - +ndb_util_thread_end: + net_end(&thd->net); thd->cleanup(); delete thd; delete ndb; @@ -7086,6 +7669,8 @@ ha_ndbcluster::build_scan_filter_predicate(Ndb_cond * &cond, : NULL; break; default: + field= NULL; //Keep compiler happy + DBUG_ASSERT(0); break; } switch ((negated) ? @@ -7433,29 +8018,24 @@ ha_ndbcluster::generate_scan_filter(Ndb_cond_stack *ndb_cond_stack, DBUG_RETURN(0); } -int -ndbcluster_show_status(THD* thd) +/* + Implements the SHOW NDB STATUS command. +*/ +bool +ndbcluster_show_status(THD* thd, stat_print_fn *stat_print, + enum ha_stat_type stat_type) { - Protocol *protocol= thd->protocol; - + char buf[IO_SIZE]; DBUG_ENTER("ndbcluster_show_status"); if (have_ndbcluster != SHOW_OPTION_YES) { - my_message(ER_NOT_SUPPORTED_YET, - "Cannot call SHOW NDBCLUSTER STATUS because skip-ndbcluster is defined", - MYF(0)); - DBUG_RETURN(TRUE); + DBUG_RETURN(FALSE); + } + if (stat_type != HA_ENGINE_STATUS) + { + DBUG_RETURN(FALSE); } - - List<Item> field_list; - field_list.push_back(new Item_empty_string("free_list", 255)); - field_list.push_back(new Item_return_int("created", 10,MYSQL_TYPE_LONG)); - field_list.push_back(new Item_return_int("free", 10,MYSQL_TYPE_LONG)); - field_list.push_back(new Item_return_int("sizeof", 10,MYSQL_TYPE_LONG)); - - if (protocol->send_fields(&field_list, Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF)) - DBUG_RETURN(TRUE); if (get_thd_ndb(thd) && get_thd_ndb(thd)->ndb) { @@ -7463,14 +8043,11 @@ ndbcluster_show_status(THD* thd) Ndb::Free_list_usage tmp; tmp.m_name= 0; while (ndb->get_free_list_usage(&tmp)) { - protocol->prepare_for_resend(); - - protocol->store(tmp.m_name, &my_charset_bin); - protocol->store((uint)tmp.m_created); - protocol->store((uint)tmp.m_free); - protocol->store((uint)tmp.m_sizeof); - if (protocol->write()) - DBUG_RETURN(TRUE); + my_snprintf(buf, sizeof(buf), + "created=%u, free=%u, sizeof=%u", + tmp.m_created, tmp.m_free, tmp.m_sizeof); + if (stat_print(thd, ndbcluster_hton.name, tmp.m_name, buf)) + DBUG_RETURN(TRUE); } } send_eof(thd); @@ -7478,4 +8055,427 @@ ndbcluster_show_status(THD* thd) DBUG_RETURN(FALSE); } -#endif /* HAVE_NDBCLUSTER_DB */ +/* + Create a table in NDB Cluster + */ +static uint get_no_fragments(ulonglong max_rows) +{ +#if MYSQL_VERSION_ID >= 50000 + uint acc_row_size= 25 + /*safety margin*/ 2; +#else + uint acc_row_size= pk_length*4; + /* add acc overhead */ + if (pk_length <= 8) /* main page will set the limit */ + acc_row_size+= 25 + /*safety margin*/ 2; + else /* overflow page will set the limit */ + acc_row_size+= 4 + /*safety margin*/ 4; +#endif + ulonglong acc_fragment_size= 512*1024*1024; +#if MYSQL_VERSION_ID >= 50100 + return (max_rows*acc_row_size)/acc_fragment_size+1; +#else + return ((max_rows*acc_row_size)/acc_fragment_size+1 + +1/*correct rounding*/)/2; +#endif +} + + +/* + Routine to adjust default number of partitions to always be a multiple + of number of nodes and never more than 4 times the number of nodes. + +*/ +static bool adjusted_frag_count(uint no_fragments, uint no_nodes, + uint &reported_frags) +{ + uint i= 0; + reported_frags= no_nodes; + while (reported_frags < no_fragments && ++i < 4 && + (reported_frags + no_nodes) < MAX_PARTITIONS) + reported_frags+= no_nodes; + return (reported_frags < no_fragments); +} + +int ha_ndbcluster::get_default_no_partitions(ulonglong max_rows) +{ + uint reported_frags; + uint no_fragments= get_no_fragments(max_rows); + uint no_nodes= g_ndb_cluster_connection->no_db_nodes(); + adjusted_frag_count(no_fragments, no_nodes, reported_frags); + return (int)reported_frags; +} + + +/* + User defined partitioning set-up. We need to check how many fragments the + user wants defined and which node groups to put those into. Later we also + want to attach those partitions to a tablespace. + + All the functionality of the partition function, partition limits and so + forth are entirely handled by the MySQL Server. There is one exception to + this rule for PARTITION BY KEY where NDB handles the hash function and + this type can thus be handled transparently also by NDB API program. + For RANGE, HASH and LIST and subpartitioning the NDB API programs must + implement the function to map to a partition. +*/ + +uint ha_ndbcluster::set_up_partition_info(partition_info *part_info, + TABLE *table, + void *tab_par) +{ + DBUG_ENTER("ha_ndbcluster::set_up_partition_info"); + ushort node_group[MAX_PARTITIONS]; + ulong ng_index= 0, i, j; + NDBTAB *tab= (NDBTAB*)tab_par; + NDBTAB::FragmentType ftype= NDBTAB::UserDefined; + partition_element *part_elem; + + if (part_info->part_type == HASH_PARTITION && + part_info->list_of_part_fields == TRUE) + { + Field **fields= part_info->part_field_array; + + if (part_info->linear_hash_ind) + ftype= NDBTAB::DistrKeyLin; + else + ftype= NDBTAB::DistrKeyHash; + + for (i= 0; i < part_info->part_field_list.elements; i++) + { + NDBCOL *col= tab->getColumn(fields[i]->fieldnr - 1); + DBUG_PRINT("info",("setting dist key on %s", col->getName())); + col->setPartitionKey(TRUE); + } + } + List_iterator<partition_element> part_it(part_info->partitions); + for (i= 0; i < part_info->no_parts; i++) + { + part_elem= part_it++; + if (!is_sub_partitioned(part_info)) + { + node_group[ng_index++]= part_elem->nodegroup_id; + //Here we should insert tablespace id based on tablespace name + } + else + { + List_iterator<partition_element> sub_it(part_elem->subpartitions); + for (j= 0; j < part_info->no_subparts; j++) + { + part_elem= sub_it++; + node_group[ng_index++]= part_elem->nodegroup_id; + //Here we should insert tablespace id based on tablespace name + } + } + } + { + uint no_nodes= g_ndb_cluster_connection->no_db_nodes(); + if (ng_index > 4 * no_nodes) + { + DBUG_RETURN(1300); + } + } + tab->setNodeGroupIds(&node_group, ng_index); + tab->setFragmentType(ftype); + DBUG_RETURN(0); +} + + +/* + This routine is used to set-up fragmentation when the user has only specified + ENGINE = NDB and no user defined partitioning what so ever. Thus all values + will be based on default values. We will choose Linear Hash or Hash with + perfect spread dependent on a session variable defined in MySQL. +*/ + +static void ndb_set_fragmentation(NDBTAB &tab, TABLE *form, uint pk_length) +{ + NDBTAB::FragmentType ftype= NDBTAB::DistrKeyHash; + ushort node_group[MAX_PARTITIONS]; + uint no_nodes= g_ndb_cluster_connection->no_db_nodes(), no_fragments, i; + DBUG_ENTER("ndb_set_fragmentation"); + + if (form->s->max_rows == (ha_rows) 0) + { + no_fragments= no_nodes; + } + else + { + /* + Ensure that we get enough fragments to handle all rows and ensure that + the table is fully distributed by keeping the number of fragments a + multiple of the number of nodes. + */ + uint fragments= get_no_fragments(form->s->max_rows); + if (adjusted_frag_count(fragments, no_nodes, no_fragments)) + { + push_warning(current_thd, + MYSQL_ERROR::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR, + "Ndb might have problems storing the max amount of rows specified"); + } + } + /* + Always start with node group 0 and continue with next node group from + there + */ + node_group[0]= 0; + for (i= 1; i < no_fragments; i++) + node_group[i]= UNDEF_NODEGROUP; + switch (opt_ndb_distribution_id) + { + case ND_KEYHASH: + ftype= NDBTAB::DistrKeyHash; + break; + case ND_LINHASH: + ftype= NDBTAB::DistrKeyLin; + break; + } + tab.setFragmentType(ftype); + tab.setNodeGroupIds(&node_group, no_fragments); + DBUG_VOID_RETURN; +} + +bool ha_ndbcluster::check_if_incompatible_data(HA_CREATE_INFO *info, + uint table_changes) +{ + /* + TODO: Remove the dummy return below, when cluster gets + signal from alter table when only .frm is changed. Cluster + needs it to manage the copies. + */ + return COMPATIBLE_DATA_NO; + + if (table_changes != IS_EQUAL_YES) + return COMPATIBLE_DATA_NO; + + /* Check that auto_increment value was not changed */ + if ((info->used_fields & HA_CREATE_USED_AUTO) && + info->auto_increment_value != 0) + return COMPATIBLE_DATA_NO; + + /* Check that row format didn't change */ + if ((info->used_fields & HA_CREATE_USED_AUTO) && + get_row_type() != info->row_type) + return COMPATIBLE_DATA_NO; + + return COMPATIBLE_DATA_YES; +} + +#ifdef NDB_DISKDATA +bool set_up_tablespace(st_alter_tablespace *info, + NdbDictionary::Tablespace *ndb_ts) +{ + ndb_ts->setName(info->tablespace_name); + ndb_ts->setExtentSize(info->extent_size); + ndb_ts->setDefaultLogfileGroup(info->logfile_group_name); + return false; +} + +bool set_up_datafile(st_alter_tablespace *info, + NdbDictionary::Datafile *ndb_df) +{ + if (info->max_size > 0) + { + my_error(ER_TABLESPACE_AUTO_EXTEND_ERROR, MYF(0)); + return true; + } + ndb_df->setPath(info->data_file_name); + ndb_df->setSize(info->initial_size); + ndb_df->setTablespace(info->tablespace_name); + return false; +} + +bool set_up_logfile_group(st_alter_tablespace *info, + NdbDictionary::LogfileGroup *ndb_lg) +{ + ndb_lg->setName(info->logfile_group_name); + ndb_lg->setUndoBufferSize(info->undo_buffer_size); + return false; +} + +bool set_up_undofile(st_alter_tablespace *info, + NdbDictionary::Undofile *ndb_uf) +{ + ndb_uf->setPath(info->undo_file_name); + ndb_uf->setSize(info->initial_size); + ndb_uf->setLogfileGroup(info->logfile_group_name); + return false; +} + +int ha_ndbcluster::alter_tablespace(st_alter_tablespace *info) +{ + Ndb *ndb; + NDBDICT *dict; + int error; + DBUG_ENTER("ha_ndbcluster::alter_tablespace"); + if (check_ndb_connection()) + { + DBUG_RETURN(my_errno= HA_ERR_NO_CONNECTION); + } + ndb= get_ndb(); + dict= ndb->getDictionary(); + switch (info->ts_cmd_type){ + case (CREATE_TABLESPACE): + { + NdbDictionary::Tablespace ndb_ts; + NdbDictionary::Datafile ndb_df; + if (set_up_tablespace(info, &ndb_ts)) + { + DBUG_RETURN(1); + } + if (set_up_datafile(info, &ndb_df)) + { + DBUG_RETURN(1); + } + if (error= dict->createTablespace(ndb_ts)) + { + DBUG_PRINT("error", ("createTablespace returned %d", error)); + my_error(ER_CREATE_TABLESPACE_FAILED, MYF(0), "TABLESPACE"); + DBUG_RETURN(1); + } + DBUG_PRINT("info", ("Successfully created Tablespace")); + if (error= dict->createDatafile(ndb_df)) + { + DBUG_PRINT("error", ("createDatafile returned %d", error)); + my_error(ER_CREATE_TABLESPACE_FAILED, MYF(0), "DATAFILE"); + DBUG_RETURN(1); + } + break; + } + case (ALTER_TABLESPACE): + { + if (info->ts_alter_tablespace_type == ALTER_TABLESPACE_ADD_FILE) + { + NdbDictionary::Datafile ndb_df; + if (set_up_datafile(info, &ndb_df)) + { + DBUG_RETURN(1); + } + if (error= dict->createDatafile(ndb_df)) + { + DBUG_PRINT("error", ("createDatafile returned %d", error)); + my_error(ER_ALTER_TABLESPACE_FAILED, MYF(0), "CREATE DATAFILE"); + DBUG_RETURN(1); + } + } + else if(info->ts_alter_tablespace_type == ALTER_TABLESPACE_DROP_FILE) + { + NdbDictionary::Datafile df = dict->getDatafile(0, + info->data_file_name); + if (strcmp(df.getPath(), info->data_file_name) == 0) + { + if (error= dict->dropDatafile(df)) + { + DBUG_PRINT("error", ("createDatafile returned %d", error)); + my_error(ER_ALTER_TABLESPACE_FAILED, MYF(0), " DROP DATAFILE"); + DBUG_RETURN(1); + } + } + else + { + DBUG_PRINT("error", ("No such datafile")); + my_error(ER_ALTER_TABLESPACE_FAILED, MYF(0), " NO SUCH FILE"); + DBUG_RETURN(1); + } + } + else + { + DBUG_PRINT("error", ("Unsupported alter tablespace: %d", + info->ts_alter_tablespace_type)); + DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED); + } + break; + } + case (CREATE_LOGFILE_GROUP): + { + NdbDictionary::LogfileGroup ndb_lg; + NdbDictionary::Undofile ndb_uf; + if (info->undo_file_name == NULL) + { + /* + REDO files in LOGFILE GROUP not supported yet + */ + DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED); + } + if (set_up_logfile_group(info, &ndb_lg)) + { + DBUG_RETURN(1); + } + if (error= dict->createLogfileGroup(ndb_lg)) + { + DBUG_PRINT("error", ("createLogfileGroup returned %d", error)); + my_error(ER_CREATE_TABLESPACE_FAILED, MYF(0), "LOGFILE GROUP"); + DBUG_RETURN(1); + } + DBUG_PRINT("info", ("Successfully created Logfile Group")); + if (set_up_undofile(info, &ndb_uf)) + { + DBUG_RETURN(1); + } + if (error= dict->createUndofile(ndb_uf)) + { + DBUG_PRINT("error", ("createUndofile returned %d", error)); + my_error(ER_CREATE_TABLESPACE_FAILED, MYF(0), "UNDOFILE"); + DBUG_RETURN(1); + } + break; + } + case (ALTER_LOGFILE_GROUP): + { + if (info->undo_file_name == NULL) + { + /* + REDO files in LOGFILE GROUP not supported yet + */ + DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED); + } + NdbDictionary::Undofile ndb_uf; + if (set_up_undofile(info, &ndb_uf)) + { + DBUG_RETURN(1); + } + if (error= dict->createUndofile(ndb_uf)) + { + DBUG_PRINT("error", ("createUndofile returned %d", error)); + my_error(ER_ALTER_TABLESPACE_FAILED, MYF(0), "CREATE UNDOFILE"); + DBUG_RETURN(1); + } + break; + } + case (DROP_TABLESPACE): + { + if (error= dict->dropTablespace( + dict->getTablespace(info->tablespace_name))) + { + DBUG_PRINT("error", ("dropTablespace returned %d", error)); + my_error(ER_DROP_TABLESPACE_FAILED, MYF(0), "TABLESPACE"); + DBUG_RETURN(1); + } + break; + } + case (DROP_LOGFILE_GROUP): + { + if (error= dict->dropLogfileGroup(dict->getLogfileGroup(info->logfile_group_name))) + { + DBUG_PRINT("error", ("dropLogfileGroup returned %d", error)); + my_error(ER_DROP_TABLESPACE_FAILED, MYF(0), "LOGFILE GROUP"); + DBUG_RETURN(1); + } + break; + } + case (CHANGE_FILE_TABLESPACE): + { + DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED); + } + case (ALTER_ACCESS_MODE_TABLESPACE): + { + DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED); + } + default: + { + DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED); + } + } + DBUG_RETURN(FALSE); +} + +#endif /* NDB_DISKDATA */ diff --git a/sql/ha_ndbcluster.h b/sql/ha_ndbcluster.h index 466d7b7044b..40c9450809c 100644 --- a/sql/ha_ndbcluster.h +++ b/sql/ha_ndbcluster.h @@ -25,6 +25,7 @@ #pragma interface /* gcc class implementation */ #endif +#include <NdbApi.hpp> #include <ndbapi_limits.h> class Ndb; // Forward declaration @@ -35,10 +36,14 @@ class NdbScanOperation; class NdbScanFilter; class NdbIndexScanOperation; class NdbBlob; +class NdbIndexStat; +class NdbEventOperation; // connectstring to cluster if given by mysqld extern const char *ndbcluster_connectstring; extern ulong ndb_cache_check_time; +extern ulong ndb_report_thresh_binlog_epoch_slip; +extern ulong ndb_report_thresh_binlog_mem_usage; typedef enum ndb_index_type { UNDEFINED_INDEX = 0, @@ -54,15 +59,33 @@ typedef struct ndb_index_data { void *index; void *unique_index; unsigned char *unique_index_attrid_map; + // In this version stats are not shared between threads + NdbIndexStat* index_stat; + uint index_stat_cache_entries; + // Simple counter mechanism to decide when to connect to db + uint index_stat_update_freq; + uint index_stat_query_count; } NDB_INDEX_DATA; +typedef union { const NdbRecAttr *rec; NdbBlob *blob; void *ptr; } NdbValue; + +typedef enum { + NSS_INITIAL= 0, + NSS_DROPPED +} NDB_SHARE_STATE; + typedef struct st_ndbcluster_share { + MEM_ROOT mem_root; THR_LOCK lock; pthread_mutex_t mutex; - char *table_name; - uint table_name_length,use_count; + char *key; + uint key_length; + THD *util_lock; + uint use_count; uint commit_count_lock; ulonglong commit_count; + char *db; + char *table_name; } NDB_SHARE; typedef enum ndb_item_type { @@ -113,6 +136,7 @@ struct negated_function_mapping NDB_FUNC_TYPE neg_fun; }; + /* Define what functions can be negated in condition pushdown. Note, these HAVE to be in the same order as in definition enum @@ -463,7 +487,7 @@ class ha_ndbcluster: public handler int write_row(byte *buf); int update_row(const byte *old_data, byte *new_data); int delete_row(const byte *buf); - int index_init(uint index); + int index_init(uint index, bool sorted); int index_end(); int index_read(byte *buf, const byte *key, uint key_len, enum ha_rkey_function find_flag); @@ -505,6 +529,11 @@ class ha_ndbcluster: public handler const char * table_type() const; const char ** bas_ext() const; ulong table_flags(void) const; + ulong partition_flags(void) const + { + return (HA_CAN_PARTITION | HA_CAN_UPDATE_PARTITION_KEY | + HA_CAN_PARTITION_UNIQUE); + } ulong index_flags(uint idx, uint part, bool all_parts) const; uint max_supported_record_length() const; uint max_supported_keys() const; @@ -514,6 +543,7 @@ class ha_ndbcluster: public handler int rename_table(const char *from, const char *to); int delete_table(const char *name); int create(const char *name, TABLE *form, HA_CREATE_INFO *info); + int get_default_no_partitions(ulonglong max_rows); THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to, enum thr_lock_type lock_type); @@ -577,8 +607,19 @@ static void set_tabname(const char *pathname, char *tabname); uint key_length, qc_engine_callback *engine_callback, ulonglong *engine_data); + + bool check_if_incompatible_data(HA_CREATE_INFO *info, + uint table_changes); + static void invalidate_dictionary_cache(TABLE *table, Ndb *ndb, + const char *tabname, bool global); + private: + friend int ndbcluster_drop_database_impl(const char *path); int alter_table_name(const char *to); + static int delete_table(ha_ndbcluster *h, Ndb *ndb, + const char *path, + const char *db, + const char *table_name); int drop_table(); int create_index(const char *name, KEY *key_info, bool unique); int create_ordered_index(const char *name, KEY *key_info); @@ -592,15 +633,21 @@ private: NDB_INDEX_TYPE get_index_type_from_table(uint index_no) const; int check_index_fields_not_null(uint index_no); - int pk_read(const byte *key, uint key_len, byte *buf); - int complemented_pk_read(const byte *old_data, byte *new_data); - int peek_row(const byte *record); - int unique_index_read(const byte *key, uint key_len, - byte *buf); + uint set_up_partition_info(partition_info *part_info, + TABLE *table, + void *tab); + int complemented_pk_read(const byte *old_data, byte *new_data, + uint32 old_part_id); + int pk_read(const byte *key, uint key_len, byte *buf, uint32 part_id); int ordered_index_scan(const key_range *start_key, const key_range *end_key, - bool sorted, bool descending, byte* buf); + bool sorted, bool descending, byte* buf, + part_id_range *part_spec); int full_table_scan(byte * buf); + + int peek_row(const byte *record); + int unique_index_read(const byte *key, uint key_len, + byte *buf); int fetch_next(NdbScanOperation* op); int next_result(byte *buf); int define_read_attrs(byte* buf, NdbOperation* op); @@ -618,13 +665,15 @@ private: uint fieldnr, const byte* field_ptr); int set_ndb_key(NdbOperation*, Field *field, uint fieldnr, const byte* field_ptr); - int set_ndb_value(NdbOperation*, Field *field, uint fieldnr, bool *set_blob_value= 0); + int set_ndb_value(NdbOperation*, Field *field, uint fieldnr, + int row_offset= 0, bool *set_blob_value= 0); int get_ndb_value(NdbOperation*, Field *field, uint fieldnr, byte*); friend int g_get_ndb_blobs_value(NdbBlob *ndb_blob, void *arg); int get_ndb_blobs_value(NdbBlob *last_ndb_blob); int set_primary_key(NdbOperation *op, const byte *key); int set_primary_key_from_record(NdbOperation *op, const byte *record); - int set_bounds(NdbIndexScanOperation*, const key_range *keys[2], uint= 0); + int set_bounds(NdbIndexScanOperation*, uint inx, bool rir, + const key_range *keys[2], uint= 0); int key_cmp(uint keynr, const byte * old_row, const byte * new_row); int set_index_key(NdbOperation *, const KEY *key_info, const byte *key_ptr); void print_results(); @@ -632,7 +681,7 @@ private: ulonglong get_auto_increment(); void invalidate_dictionary_cache(bool global); int ndb_err(NdbTransaction*); - bool uses_blob_value(bool all_fields); + bool uses_blob_value(); char *update_table_comment(const char * comment); @@ -662,6 +711,7 @@ private: NdbScanOperation* op); friend int execute_commit(ha_ndbcluster*, NdbTransaction*); + friend int execute_no_commit_ignore_no_key(ha_ndbcluster*, NdbTransaction*); friend int execute_no_commit(ha_ndbcluster*, NdbTransaction*); friend int execute_no_commit_ie(ha_ndbcluster*, NdbTransaction*); @@ -678,13 +728,17 @@ private: NDB_SHARE *m_share; NDB_INDEX_DATA m_index[MAX_KEY]; // NdbRecAttr has no reference to blob - typedef union { const NdbRecAttr *rec; NdbBlob *blob; void *ptr; } NdbValue; NdbValue m_value[NDB_MAX_ATTRIBUTES_IN_TABLE]; + partition_info *m_part_info; + byte *m_rec0; + Field **m_part_field_array; + bool m_use_partition_function; + bool m_sorted; bool m_use_write; bool m_ignore_dup_key; bool m_primary_key_update; - bool m_retrieve_all_fields; - bool m_retrieve_primary_key; + bool m_write_op; + bool m_ignore_no_key; ha_rows m_rows_to_insert; ha_rows m_rows_inserted; ha_rows m_bulk_insert_rows; @@ -717,7 +771,7 @@ private: extern struct show_var_st ndb_status_variables[]; bool ndbcluster_init(void); -bool ndbcluster_end(void); +int ndbcluster_end(ha_panic_function flag); int ndbcluster_discover(THD* thd, const char* dbname, const char* name, const void** frmblob, uint* frmlen); @@ -725,8 +779,9 @@ int ndbcluster_find_files(THD *thd,const char *db,const char *path, const char *wild, bool dir, List<char> *files); int ndbcluster_table_exists_in_engine(THD* thd, const char *db, const char *name); -int ndbcluster_drop_database(const char* path); +void ndbcluster_drop_database(char* path); void ndbcluster_print_error(int error, const NdbOperation *error_op); -int ndbcluster_show_status(THD*); +bool ndbcluster_show_status(THD*,stat_print_fn *,enum ha_stat_type); + diff --git a/sql/ha_partition.cc b/sql/ha_partition.cc new file mode 100644 index 00000000000..14dab7f1248 --- /dev/null +++ b/sql/ha_partition.cc @@ -0,0 +1,3276 @@ +/* Copyright (C) 2005 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + This handler was developed by Mikael Ronstrom for version 5.1 of MySQL. + It is an abstraction layer on top of other handlers such as MyISAM, + InnoDB, Federated, Berkeley DB and so forth. Partitioned tables can also + be handled by a storage engine. The current example of this is NDB + Cluster that has internally handled partitioning. This have benefits in + that many loops needed in the partition handler can be avoided. + + Partitioning has an inherent feature which in some cases is positive and + in some cases is negative. It splits the data into chunks. This makes + the data more manageable, queries can easily be parallelised towards the + parts and indexes are split such that there are less levels in the + index trees. The inherent disadvantage is that to use a split index + one has to scan all index parts which is ok for large queries but for + small queries it can be a disadvantage. + + Partitioning lays the foundation for more manageable databases that are + extremely large. It does also lay the foundation for more parallelism + in the execution of queries. This functionality will grow with later + versions of MySQL. + + You can enable it in your buld by doing the following during your build + process: + ./configure --with-partition + + The partition is setup to use table locks. It implements an partition "SHARE" + that is inserted into a hash by table name. You can use this to store + information of state that any partition handler object will be able to see + if it is using the same table. + + Please read the object definition in ha_partition.h before reading the rest + if this file. +*/ + +#ifdef __GNUC__ +#pragma implementation // gcc: Class implementation +#endif + +#include <mysql_priv.h> + +#include "ha_partition.h" + +static const char *ha_par_ext= ".par"; +#ifdef NOT_USED +static int free_share(PARTITION_SHARE * share); +static PARTITION_SHARE *get_share(const char *table_name, TABLE * table); +#endif + +/**************************************************************************** + MODULE create/delete handler object +****************************************************************************/ + +static handler* partition_create_handler(TABLE *table); + +handlerton partition_hton = { + "partition", + SHOW_OPTION_YES, + "Partition Storage Engine Helper", /* A comment used by SHOW to describe an engine */ + DB_TYPE_PARTITION_DB, + 0, /* Method that initializes a storage engine */ + 0, /* slot */ + 0, /* savepoint size */ + NULL /*ndbcluster_close_connection*/, + NULL, /* savepoint_set */ + NULL, /* savepoint_rollback */ + NULL, /* savepoint_release */ + NULL /*ndbcluster_commit*/, + NULL /*ndbcluster_rollback*/, + NULL, /* prepare */ + NULL, /* recover */ + NULL, /* commit_by_xid */ + NULL, /* rollback_by_xid */ + NULL, + NULL, + NULL, + partition_create_handler, /* Create a new handler */ + NULL, /* Drop a database */ + NULL, /* Panic call */ + NULL, /* Release temporary latches */ + NULL, /* Update Statistics */ + NULL, /* Start Consistent Snapshot */ + NULL, /* Flush logs */ + NULL, /* Show status */ + NULL, /* Replication Report Sent Binlog */ + HTON_NOT_USER_SELECTABLE +}; + +static handler* partition_create_handler(TABLE *table) +{ + return new ha_partition(table); +} + +ha_partition::ha_partition(TABLE *table) + :handler(&partition_hton, table), m_part_info(NULL), m_create_handler(FALSE), + m_is_sub_partitioned(0) +{ + DBUG_ENTER("ha_partition::ha_partition(table)"); + init_handler_variables(); + if (table) + { + if (table->s->part_info) + { + m_part_info= table->s->part_info; + m_is_sub_partitioned= is_sub_partitioned(m_part_info); + } + } + DBUG_VOID_RETURN; +} + + +ha_partition::ha_partition(partition_info *part_info) + :handler(&partition_hton, NULL), m_part_info(part_info), m_create_handler(TRUE), + m_is_sub_partitioned(is_sub_partitioned(m_part_info)) + +{ + DBUG_ENTER("ha_partition::ha_partition(part_info)"); + init_handler_variables(); + DBUG_ASSERT(m_part_info); + DBUG_VOID_RETURN; +} + + +void ha_partition::init_handler_variables() +{ + active_index= MAX_KEY; + m_file_buffer= NULL; + m_name_buffer_ptr= NULL; + m_engine_array= NULL; + m_file= NULL; + m_tot_parts= 0; + m_has_transactions= 0; + m_pkey_is_clustered= 0; + m_lock_type= F_UNLCK; + m_part_spec.start_part= NO_CURRENT_PART_ID; + m_scan_value= 2; + m_ref_length= 0; + m_part_spec.end_part= NO_CURRENT_PART_ID; + m_index_scan_type= partition_no_index_scan; + m_start_key.key= NULL; + m_start_key.length= 0; + m_myisam= FALSE; + m_innodb= FALSE; + m_extra_cache= FALSE; + m_extra_cache_size= 0; + m_table_flags= HA_FILE_BASED | HA_REC_NOT_IN_SEQ; + m_low_byte_first= 1; + m_part_field_array= NULL; + m_ordered_rec_buffer= NULL; + m_top_entry= NO_CURRENT_PART_ID; + m_rec_length= 0; + m_last_part= 0; + m_rec0= 0; + m_curr_key_info= 0; + /* + this allows blackhole to work properly + */ + m_no_locks= 0; + +#ifdef DONT_HAVE_TO_BE_INITALIZED + m_start_key.flag= 0; + m_ordered= TRUE; +#endif +} + + +ha_partition::~ha_partition() +{ + DBUG_ENTER("ha_partition::~ha_partition()"); + if (m_file != NULL) + { + uint i; + for (i= 0; i < m_tot_parts; i++) + delete m_file[i]; + } + my_free((char*) m_ordered_rec_buffer, MYF(MY_ALLOW_ZERO_PTR)); + + clear_handler_file(); + DBUG_VOID_RETURN; +} + + +/* + The partition handler is only a layer on top of other engines. Thus it + can't really perform anything without the underlying handlers. Thus we + add this method as part of the allocation of a handler object. + + 1) Allocation of underlying handlers + If we have access to the partition info we will allocate one handler + instance for each partition. + 2) Allocation without partition info + The cases where we don't have access to this information is when called + in preparation for delete_table and rename_table and in that case we + only need to set HA_FILE_BASED. In that case we will use the .par file + that contains information about the partitions and their engines and + the names of each partition. + 3) Table flags initialisation + We need also to set table flags for the partition handler. This is not + static since it depends on what storage engines are used as underlying + handlers. + The table flags is set in this routine to simulate the behaviour of a + normal storage engine + The flag HA_FILE_BASED will be set independent of the underlying handlers + 4) Index flags initialisation + When knowledge exists on the indexes it is also possible to initialise the + index flags. Again the index flags must be initialised by using the under- + lying handlers since this is storage engine dependent. + The flag HA_READ_ORDER will be reset for the time being to indicate no + ordered output is available from partition handler indexes. Later a merge + sort will be performed using the underlying handlers. + 5) primary_key_is_clustered, has_transactions and low_byte_first is + calculated here. +*/ + +int ha_partition::ha_initialise() +{ + handler **file_array, *file; + DBUG_ENTER("ha_partition::set_up_constants"); + + if (m_part_info) + { + m_tot_parts= get_tot_partitions(m_part_info); + DBUG_ASSERT(m_tot_parts > 0); + if (m_create_handler) + { + if (new_handlers_from_part_info()) + DBUG_RETURN(1); + } + else if (get_from_handler_file(table->s->path)) + { + my_error(ER_OUTOFMEMORY, MYF(0), 129); //Temporary fix TODO print_error + DBUG_RETURN(1); + } + /* + We create all underlying table handlers here. We only do it if we have + access to the partition info. We do it in this special method to be + able to report allocation errors. + */ + /* + Set up table_flags, low_byte_first, primary_key_is_clustered and + has_transactions since they are called often in all kinds of places, + other parameters are calculated on demand. + HA_FILE_BASED is always set for partition handler since we use a + special file for handling names of partitions, engine types. + HA_CAN_GEOMETRY, HA_CAN_FULLTEXT, HA_CAN_SQL_HANDLER, + HA_CAN_INSERT_DELAYED is disabled until further investigated. + */ + m_table_flags= m_file[0]->table_flags(); + m_low_byte_first= m_file[0]->low_byte_first(); + m_has_transactions= TRUE; + m_pkey_is_clustered= TRUE; + file_array= m_file; + do + { + file= *file_array; + if (m_low_byte_first != file->low_byte_first()) + { + // Cannot have handlers with different endian + my_error(ER_MIX_HANDLER_ERROR, MYF(0)); + DBUG_RETURN(1); + } + if (!file->has_transactions()) + m_has_transactions= FALSE; + if (!file->primary_key_is_clustered()) + m_pkey_is_clustered= FALSE; + m_table_flags&= file->table_flags(); + } while (*(++file_array)); + m_table_flags&= ~(HA_CAN_GEOMETRY & HA_CAN_FULLTEXT & + HA_CAN_SQL_HANDLER & HA_CAN_INSERT_DELAYED); + /* + TODO RONM: + Make sure that the tree works without partition defined, compiles + and goes through mysql-test-run. + */ + } + m_table_flags|= HA_FILE_BASED | HA_REC_NOT_IN_SEQ; + DBUG_RETURN(0); +} + +/**************************************************************************** + MODULE meta data changes +****************************************************************************/ +/* + This method is used to calculate the partition name, service routine to + the del_ren_cre_table method. +*/ + +static void create_partition_name(char *out, const char *in1, const char *in2) +{ + strxmov(out, in1, "_", in2, NullS); +} + +/* + This method is used to calculate the partition name, service routine to + the del_ren_cre_table method. +*/ + +static void create_subpartition_name(char *out, const char *in1, + const char *in2, const char *in3) +{ + strxmov(out, in1, "_", in2, "_", in3, NullS); +} + + +/* + Used to delete a table. By the time delete_table() has been called all + opened references to this table will have been closed (and your globally + shared references released. The variable name will just be the name of + the table. You will need to remove any files you have created at this + point. + + If you do not implement this, the default delete_table() is called from + handler.cc and it will delete all files with the file extentions returned + by bas_ext(). + + Called from handler.cc by delete_table and ha_create_table(). Only used + during create if the table_flag HA_DROP_BEFORE_CREATE was specified for + the storage engine. +*/ + +int ha_partition::delete_table(const char *name) +{ + int error; + DBUG_ENTER("ha_partition::delete_table"); + if ((error= del_ren_cre_table(name, NULL, NULL, NULL))) + DBUG_RETURN(error); + DBUG_RETURN(handler::delete_table(name)); +} + + +/* + Renames a table from one name to another from alter table call. + + If you do not implement this, the default rename_table() is called from + handler.cc and it will delete all files with the file extentions returned + by bas_ext(). + + Called from sql_table.cc by mysql_rename_table(). +*/ + +int ha_partition::rename_table(const char *from, const char *to) +{ + int error; + DBUG_ENTER("ha_partition::rename_table"); + if ((error= del_ren_cre_table(from, to, NULL, NULL))) + DBUG_RETURN(error); + DBUG_RETURN(handler::rename_table(from, to)); +} + + +/* + create_handler_files is called to create any handler specific files + before opening the file with openfrm to later call ::create on the + file object. + In the partition handler this is used to store the names of partitions + and types of engines in the partitions. +*/ + +int ha_partition::create_handler_files(const char *name) +{ + DBUG_ENTER("ha_partition::create_handler_files()"); + + /* + We need to update total number of parts since we might write the handler + file as part of a partition management command + */ + m_tot_parts= get_tot_partitions(m_part_info); + if (create_handler_file(name)) + { + my_error(ER_CANT_CREATE_HANDLER_FILE, MYF(0)); + DBUG_RETURN(1); + } + DBUG_RETURN(0); +} + + +/* + create() is called to create a table. The variable name will have the name + of the table. When create() is called you do not need to worry about + opening the table. Also, the FRM file will have already been created so + adjusting create_info will not do you any good. You can overwrite the frm + file at this point if you wish to change the table definition, but there + are no methods currently provided for doing that. + + Called from handle.cc by ha_create_table(). +*/ + +int ha_partition::create(const char *name, TABLE *table_arg, + HA_CREATE_INFO *create_info) +{ + char t_name[FN_REFLEN]; + DBUG_ENTER("ha_partition::create"); + + strmov(t_name, name); + *fn_ext(t_name)= 0; + if (del_ren_cre_table(t_name, NULL, table_arg, create_info)) + { + handler::delete_table(t_name); + DBUG_RETURN(1); + } + DBUG_RETURN(0); +} + +int ha_partition::drop_partitions(const char *path) +{ + List_iterator<partition_element> part_it(m_part_info->partitions); + char part_name_buff[FN_REFLEN]; + uint no_parts= m_part_info->no_parts; + uint no_subparts= m_part_info->no_subparts, i= 0; + int error= 1; + DBUG_ENTER("ha_partition::drop_partitions()"); + + do + { + partition_element *part_elem= part_it++; + if (part_elem->part_state == PART_IS_DROPPED) + { + /* + This part is to be dropped, meaning the part or all its subparts. + */ + if (is_sub_partitioned(m_part_info)) + { + List_iterator<partition_element> sub_it(part_elem->subpartitions); + uint j= 0, part; + do + { + partition_element *sub_elem= sub_it++; + create_subpartition_name(part_name_buff, path, + part_elem->partition_name, + sub_elem->partition_name); + part= i * no_subparts + j; + DBUG_PRINT("info", ("Drop subpartition %s", part_name_buff)); + error= m_file[part]->delete_table((const char *) part_name_buff); + } while (++j < no_subparts); + } + else + { + create_partition_name(part_name_buff, path, + part_elem->partition_name); + DBUG_PRINT("info", ("Drop partition %s", part_name_buff)); + error= m_file[i]->delete_table((const char *) part_name_buff); + } + } + } while (++i < no_parts); + DBUG_RETURN(error); +} + +void ha_partition::update_create_info(HA_CREATE_INFO *create_info) +{ + return; +} + + +char *ha_partition::update_table_comment(const char *comment) +{ + return (char*) comment; // Nothing to change +} + + + +/* + Common routine to handle delete_table and rename_table. + The routine uses the partition handler file to get the + names of the partition instances. Both these routines + are called after creating the handler without table + object and thus the file is needed to discover the + names of the partitions and the underlying storage engines. +*/ + +uint ha_partition::del_ren_cre_table(const char *from, + const char *to, + TABLE *table_arg, + HA_CREATE_INFO *create_info) +{ + int save_error= 0, error; + char from_buff[FN_REFLEN], to_buff[FN_REFLEN]; + char *name_buffer_ptr; + uint i; + handler **file; + DBUG_ENTER("del_ren_cre_table()"); + + if (get_from_handler_file(from)) + DBUG_RETURN(TRUE); + DBUG_ASSERT(m_file_buffer); + name_buffer_ptr= m_name_buffer_ptr; + file= m_file; + i= 0; + do + { + create_partition_name(from_buff, from, name_buffer_ptr); + if (to != NULL) + { // Rename branch + create_partition_name(to_buff, to, name_buffer_ptr); + error= (*file)->rename_table((const char*) from_buff, + (const char*) to_buff); + } + else if (table_arg == NULL) // delete branch + error= (*file)->delete_table((const char*) from_buff); + else + { + set_up_table_before_create(table_arg, create_info, i); + error= (*file)->create(from_buff, table_arg, create_info); + } + name_buffer_ptr= strend(name_buffer_ptr) + 1; + if (error) + save_error= error; + i++; + } while (*(++file)); + DBUG_RETURN(save_error); +} + + +partition_element *ha_partition::find_partition_element(uint part_id) +{ + uint i; + uint curr_part_id= 0; + List_iterator_fast < partition_element > part_it(m_part_info->partitions); + + for (i= 0; i < m_part_info->no_parts; i++) + { + partition_element *part_elem; + part_elem= part_it++; + if (m_is_sub_partitioned) + { + uint j; + List_iterator_fast <partition_element> sub_it(part_elem->subpartitions); + for (j= 0; j < m_part_info->no_subparts; j++) + { + part_elem= sub_it++; + if (part_id == curr_part_id++) + return part_elem; + } + } + else if (part_id == curr_part_id++) + return part_elem; + } + DBUG_ASSERT(0); + current_thd->fatal_error(); // Abort + return NULL; +} + + +void ha_partition::set_up_table_before_create(TABLE *table, + HA_CREATE_INFO *info, + uint part_id) +{ + /* + Set up + 1) Comment on partition + 2) MAX_ROWS, MIN_ROWS on partition + 3) Index file name on partition + 4) Data file name on partition + */ + partition_element *part_elem= find_partition_element(part_id); + if (!part_elem) + return; // Fatal error + table->s->max_rows= part_elem->part_max_rows; + table->s->min_rows= part_elem->part_min_rows; + info->index_file_name= part_elem->index_file_name; + info->data_file_name= part_elem->data_file_name; +} + + +/* + Routine used to add two names with '_' in between then. Service routine + to create_handler_file + Include the NULL in the count of characters since it is needed as separator + between the partition names. +*/ + +static uint name_add(char *dest, const char *first_name, const char *sec_name) +{ + return (uint) (strxmov(dest, first_name, "_", sec_name, NullS) -dest) + 1; +} + + +/* + Method used to create handler file with names of partitions, their + engine types and the number of partitions. +*/ + +bool ha_partition::create_handler_file(const char *name) +{ + partition_element *part_elem, *subpart_elem; + uint i, j, part_name_len, subpart_name_len; + uint tot_partition_words, tot_name_len; + uint tot_len_words, tot_len_byte, chksum, tot_name_words; + char *name_buffer_ptr; + uchar *file_buffer, *engine_array; + bool result= TRUE; + char file_name[FN_REFLEN]; + File file; + List_iterator_fast < partition_element > part_it(m_part_info->partitions); + DBUG_ENTER("create_handler_file"); + + DBUG_PRINT("info", ("table name = %s", name)); + tot_name_len= 0; + for (i= 0; i < m_part_info->no_parts; i++) + { + part_elem= part_it++; + part_name_len= strlen(part_elem->partition_name); + if (!m_is_sub_partitioned) + tot_name_len+= part_name_len + 1; + else + { + List_iterator_fast<partition_element> sub_it(part_elem->subpartitions); + for (j= 0; j < m_part_info->no_subparts; j++) + { + subpart_elem= sub_it++; + subpart_name_len= strlen(subpart_elem->partition_name); + tot_name_len+= part_name_len + subpart_name_len + 2; + } + } + } + /* + File format: + Length in words 4 byte + Checksum 4 byte + Total number of partitions 4 byte + Array of engine types n * 4 bytes where + n = (m_tot_parts + 3)/4 + Length of name part in bytes 4 bytes + Name part m * 4 bytes where + m = ((length_name_part + 3)/4)*4 + + All padding bytes are zeroed + */ + tot_partition_words= (m_tot_parts + 3) / 4; + tot_name_words= (tot_name_len + 3) / 4; + tot_len_words= 4 + tot_partition_words + tot_name_words; + tot_len_byte= 4 * tot_len_words; + if (!(file_buffer= (uchar *) my_malloc(tot_len_byte, MYF(MY_ZEROFILL)))) + DBUG_RETURN(TRUE); + engine_array= (file_buffer + 12); + name_buffer_ptr= (char*) (file_buffer + ((4 + tot_partition_words) * 4)); + part_it.rewind(); + for (i= 0; i < m_part_info->no_parts; i++) + { + part_elem= part_it++; + if (!m_is_sub_partitioned) + { + name_buffer_ptr= strmov(name_buffer_ptr, part_elem->partition_name)+1; + *engine_array= (uchar) part_elem->engine_type; + DBUG_PRINT("info", ("engine: %u", *engine_array)); + engine_array++; + } + else + { + List_iterator_fast<partition_element> sub_it(part_elem->subpartitions); + for (j= 0; j < m_part_info->no_subparts; j++) + { + subpart_elem= sub_it++; + name_buffer_ptr+= name_add(name_buffer_ptr, + part_elem->partition_name, + subpart_elem->partition_name); + *engine_array= (uchar) part_elem->engine_type; + engine_array++; + } + } + } + chksum= 0; + int4store(file_buffer, tot_len_words); + int4store(file_buffer + 8, m_tot_parts); + int4store(file_buffer + 12 + (tot_partition_words * 4), tot_name_len); + for (i= 0; i < tot_len_words; i++) + chksum^= uint4korr(file_buffer + 4 * i); + int4store(file_buffer + 4, chksum); + /* + Remove .frm extension and replace with .par + Create and write and close file + to be used at open, delete_table and rename_table + */ + fn_format(file_name, name, "", ".par", MYF(MY_REPLACE_EXT)); + if ((file= my_create(file_name, CREATE_MODE, O_RDWR | O_TRUNC, + MYF(MY_WME))) >= 0) + { + result= my_write(file, (byte *) file_buffer, tot_len_byte, + MYF(MY_WME | MY_NABP)); + VOID(my_close(file, MYF(0))); + } + else + result= TRUE; + my_free((char*) file_buffer, MYF(0)); + DBUG_RETURN(result); +} + + +void ha_partition::clear_handler_file() +{ + my_free((char*) m_file_buffer, MYF(MY_ALLOW_ZERO_PTR)); + m_file_buffer= NULL; + m_name_buffer_ptr= NULL; + m_engine_array= NULL; +} + + +bool ha_partition::create_handlers() +{ + uint i; + uint alloc_len= (m_tot_parts + 1) * sizeof(handler*); + DBUG_ENTER("create_handlers"); + + if (!(m_file= (handler **) sql_alloc(alloc_len))) + DBUG_RETURN(TRUE); + bzero(m_file, alloc_len); + for (i= 0; i < m_tot_parts; i++) + { + if (!(m_file[i]= get_new_handler(table, current_thd->mem_root, + (enum db_type) m_engine_array[i]))) + DBUG_RETURN(TRUE); + DBUG_PRINT("info", ("engine_type: %u", m_engine_array[i])); + } + m_file[m_tot_parts]= 0; + /* For the moment we only support partition over the same table engine */ + if (m_engine_array[0] == (uchar) DB_TYPE_MYISAM) + { + DBUG_PRINT("info", ("MyISAM")); + m_myisam= TRUE; + } + else if (m_engine_array[0] == (uchar) DB_TYPE_INNODB) + { + DBUG_PRINT("info", ("InnoDB")); + m_innodb= TRUE; + } + DBUG_RETURN(FALSE); +} + + +bool ha_partition::new_handlers_from_part_info() +{ + uint i, j; + partition_element *part_elem; + uint alloc_len= (m_tot_parts + 1) * sizeof(handler*); + List_iterator_fast <partition_element> part_it(m_part_info->partitions); + THD *thd= current_thd; + DBUG_ENTER("ha_partition::new_handlers_from_part_info"); + + if (!(m_file= (handler **) sql_alloc(alloc_len))) + goto error; + bzero(m_file, alloc_len); + DBUG_ASSERT(m_part_info->no_parts > 0); + + i= 0; + /* + Don't know the size of the underlying storage engine, invent a number of + bytes allocated for error message if allocation fails + */ + alloc_len= 128; + do + { + part_elem= part_it++; + if (!(m_file[i]= get_new_handler(table, thd->mem_root, + part_elem->engine_type))) + goto error; + DBUG_PRINT("info", ("engine_type: %u", (uint) part_elem->engine_type)); + if (m_is_sub_partitioned) + { + for (j= 0; j < m_part_info->no_subparts; j++) + { + if (!(m_file[i]= get_new_handler(table, thd->mem_root, + part_elem->engine_type))) + goto error; + DBUG_PRINT("info", ("engine_type: %u", (uint) part_elem->engine_type)); + } + } + } while (++i < m_part_info->no_parts); + if (part_elem->engine_type == DB_TYPE_MYISAM) + { + DBUG_PRINT("info", ("MyISAM")); + m_myisam= TRUE; + } + DBUG_RETURN(FALSE); +error: + my_error(ER_OUTOFMEMORY, MYF(0), alloc_len); + DBUG_RETURN(TRUE); +} + + +/* + Open handler file to get partition names, engine types and number of + partitions. +*/ + +bool ha_partition::get_from_handler_file(const char *name) +{ + char buff[FN_REFLEN], *address_tot_name_len; + File file; + char *file_buffer, *name_buffer_ptr; + uchar *engine_array; + uint i, len_bytes, len_words, tot_partition_words, tot_name_words, chksum; + DBUG_ENTER("ha_partition::get_from_handler_file"); + DBUG_PRINT("enter", ("table name: '%s'", name)); + + if (m_file_buffer) + DBUG_RETURN(FALSE); + fn_format(buff, name, "", ha_par_ext, MYF(0)); + + /* Following could be done with my_stat to read in whole file */ + if ((file= my_open(buff, O_RDONLY | O_SHARE, MYF(0))) < 0) + DBUG_RETURN(TRUE); + if (my_read(file, (byte *) & buff[0], 8, MYF(MY_NABP))) + goto err1; + len_words= uint4korr(buff); + len_bytes= 4 * len_words; + if (!(file_buffer= my_malloc(len_bytes, MYF(0)))) + goto err1; + VOID(my_seek(file, 0, MY_SEEK_SET, MYF(0))); + if (my_read(file, (byte *) file_buffer, len_bytes, MYF(MY_NABP))) + goto err2; + + chksum= 0; + for (i= 0; i < len_words; i++) + chksum ^= uint4korr((file_buffer) + 4 * i); + if (chksum) + goto err2; + m_tot_parts= uint4korr((file_buffer) + 8); + tot_partition_words= (m_tot_parts + 3) / 4; + engine_array= (uchar *) ((file_buffer) + 12); + address_tot_name_len= file_buffer + 12 + 4 * tot_partition_words; + tot_name_words= (uint4korr(address_tot_name_len) + 3) / 4; + if (len_words != (tot_partition_words + tot_name_words + 4)) + goto err2; + name_buffer_ptr= file_buffer + 16 + 4 * tot_partition_words; + VOID(my_close(file, MYF(0))); + m_file_buffer= file_buffer; // Will be freed in clear_handler_file() + m_name_buffer_ptr= name_buffer_ptr; + m_engine_array= engine_array; + if (!m_file && create_handlers()) + { + clear_handler_file(); + DBUG_RETURN(TRUE); + } + DBUG_RETURN(FALSE); + +err2: + my_free(file_buffer, MYF(0)); +err1: + VOID(my_close(file, MYF(0))); + DBUG_RETURN(TRUE); +} + +/**************************************************************************** + MODULE open/close object +****************************************************************************/ +/* + Used for opening tables. The name will be the name of the file. + A table is opened when it needs to be opened. For instance + when a request comes in for a select on the table (tables are not + open and closed for each request, they are cached). + + Called from handler.cc by handler::ha_open(). The server opens all tables + by calling ha_open() which then calls the handler specific open(). +*/ + +int ha_partition::open(const char *name, int mode, uint test_if_locked) +{ + int error; + char name_buff[FN_REFLEN]; + char *name_buffer_ptr= m_name_buffer_ptr; + handler **file; + uint alloc_len; + DBUG_ENTER("ha_partition::open"); + + ref_length= 0; + m_part_field_array= m_part_info->full_part_field_array; + if (get_from_handler_file(name)) + DBUG_RETURN(1); + m_start_key.length= 0; + m_rec0= table->record[0]; + m_rec_length= table->s->reclength; + alloc_len= m_tot_parts * (m_rec_length + PARTITION_BYTES_IN_POS); + alloc_len+= table->s->max_key_length; + if (!m_ordered_rec_buffer) + { + if (!(m_ordered_rec_buffer= (byte*)my_malloc(alloc_len, MYF(MY_WME)))) + { + DBUG_RETURN(1); + } + { + /* + We set-up one record per partition and each record has 2 bytes in + front where the partition id is written. This is used by ordered + index_read. + We also set-up a reference to the first record for temporary use in + setting up the scan. + */ + char *ptr= (char*)m_ordered_rec_buffer; + uint i= 0; + do + { + int2store(ptr, i); + ptr+= m_rec_length + PARTITION_BYTES_IN_POS; + } while (++i < m_tot_parts); + m_start_key.key= (const byte*)ptr; + } + } + file= m_file; + do + { + create_partition_name(name_buff, name, name_buffer_ptr); + if ((error= (*file)->ha_open((const char*) name_buff, mode, + test_if_locked))) + goto err_handler; + m_no_locks+= (*file)->lock_count(); + name_buffer_ptr+= strlen(name_buffer_ptr) + 1; + set_if_bigger(ref_length, ((*file)->ref_length)); + } while (*(++file)); + /* + Add 2 bytes for partition id in position ref length. + ref_length=max_in_all_partitions(ref_length) + PARTITION_BYTES_IN_POS + */ + ref_length+= PARTITION_BYTES_IN_POS; + m_ref_length= ref_length; + /* + Release buffer read from .par file. It will not be reused again after + being opened once. + */ + clear_handler_file(); + /* + Initialise priority queue, initialised to reading forward. + */ + if ((error= init_queue(&queue, m_tot_parts, (uint) PARTITION_BYTES_IN_POS, + 0, key_rec_cmp, (void*)this))) + goto err_handler; + /* + Some handlers update statistics as part of the open call. This will in + some cases corrupt the statistics of the partition handler and thus + to ensure we have correct statistics we call info from open after + calling open on all individual handlers. + */ + info(HA_STATUS_VARIABLE | HA_STATUS_CONST); + DBUG_RETURN(0); + +err_handler: + while (file-- != m_file) + (*file)->close(); + DBUG_RETURN(error); +} + +/* + Closes a table. We call the free_share() function to free any resources + that we have allocated in the "shared" structure. + + Called from sql_base.cc, sql_select.cc, and table.cc. + In sql_select.cc it is only used to close up temporary tables or during + the process where a temporary table is converted over to being a + myisam table. + For sql_base.cc look at close_data_tables(). +*/ + +int ha_partition::close(void) +{ + handler **file; + DBUG_ENTER("ha_partition::close"); + + delete_queue(&queue); + file= m_file; + do + { + (*file)->close(); + } while (*(++file)); + DBUG_RETURN(0); +} + + +/**************************************************************************** + MODULE start/end statement +****************************************************************************/ +/* + A number of methods to define various constants for the handler. In + the case of the partition handler we need to use some max and min + of the underlying handlers in most cases. +*/ + +/* + First you should go read the section "locking functions for mysql" in + lock.cc to understand this. + This create a lock on the table. If you are implementing a storage engine + that can handle transactions look at ha_berkely.cc to see how you will + want to goo about doing this. Otherwise you should consider calling + flock() here. + Originally this method was used to set locks on file level to enable + several MySQL Servers to work on the same data. For transactional + engines it has been "abused" to also mean start and end of statements + to enable proper rollback of statements and transactions. When LOCK + TABLES has been issued the start_stmt method takes over the role of + indicating start of statement but in this case there is no end of + statement indicator(?). + + Called from lock.cc by lock_external() and unlock_external(). Also called + from sql_table.cc by copy_data_between_tables(). +*/ + +int ha_partition::external_lock(THD *thd, int lock_type) +{ + uint error; + handler **file; + DBUG_ENTER("ha_partition::external_lock"); + file= m_file; + do + { + if ((error= (*file)->external_lock(thd, lock_type))) + { + if (lock_type != F_UNLCK) + goto err_handler; + } + } while (*(++file)); + m_lock_type= lock_type; // For the future (2009?) + DBUG_RETURN(0); + +err_handler: + while (file-- != m_file) + (*file)->external_lock(thd, F_UNLCK); + DBUG_RETURN(error); +} + + +/* + The idea with handler::store_lock() is the following: + + The statement decided which locks we should need for the table + for updates/deletes/inserts we get WRITE locks, for SELECT... we get + read locks. + + Before adding the lock into the table lock handler (see thr_lock.c) + mysqld calls store lock with the requested locks. Store lock can now + modify a write lock to a read lock (or some other lock), ignore the + lock (if we don't want to use MySQL table locks at all) or add locks + for many tables (like we do when we are using a MERGE handler). + + Berkeley DB for partition changes all WRITE locks to TL_WRITE_ALLOW_WRITE + (which signals that we are doing WRITES, but we are still allowing other + reader's and writer's. + + When releasing locks, store_lock() are also called. In this case one + usually doesn't have to do anything. + + store_lock is called when holding a global mutex to ensure that only + one thread at a time changes the locking information of tables. + + In some exceptional cases MySQL may send a request for a TL_IGNORE; + This means that we are requesting the same lock as last time and this + should also be ignored. (This may happen when someone does a flush + table when we have opened a part of the tables, in which case mysqld + closes and reopens the tables and tries to get the same locks at last + time). In the future we will probably try to remove this. + + Called from lock.cc by get_lock_data(). +*/ + +THR_LOCK_DATA **ha_partition::store_lock(THD *thd, + THR_LOCK_DATA **to, + enum thr_lock_type lock_type) +{ + handler **file; + DBUG_ENTER("ha_partition::store_lock"); + file= m_file; + do + { + to= (*file)->store_lock(thd, to, lock_type); + } while (*(++file)); + DBUG_RETURN(to); +} + + +int ha_partition::start_stmt(THD *thd, thr_lock_type lock_type) +{ + int error= 0; + handler **file; + DBUG_ENTER("ha_partition::start_stmt"); + file= m_file; + do + { + if ((error= (*file)->start_stmt(thd, lock_type))) + break; + } while (*(++file)); + DBUG_RETURN(error); +} + + +/* + Returns the number of store locks needed in call to store lock. + We return number of partitions since we call store_lock on each + underlying handler. Assists the above functions in allocating + sufficient space for lock structures. +*/ + +uint ha_partition::lock_count() const +{ + DBUG_ENTER("ha_partition::lock_count"); + DBUG_RETURN(m_no_locks); +} + + +/* + Record currently processed was not in the result set of the statement + and is thus unlocked. Used for UPDATE and DELETE queries. +*/ + +void ha_partition::unlock_row() +{ + m_file[m_last_part]->unlock_row(); + return; +} + + +/**************************************************************************** + MODULE change record +****************************************************************************/ + +/* + write_row() inserts a row. buf() is a byte array of data, normally record[0]. + + You can use the field information to extract the data from the native byte + array type. + + Example of this would be: + for (Field **field=table->field ; *field ; field++) + { + ... + } + + See ha_tina.cc for an partition of extracting all of the data as strings. + ha_berekly.cc has an partition of how to store it intact by "packing" it + for ha_berkeley's own native storage type. + + See the note for update_row() on auto_increments and timestamps. This + case also applied to write_row(). + + Called from item_sum.cc, item_sum.cc, sql_acl.cc, sql_insert.cc, + sql_insert.cc, sql_select.cc, sql_table.cc, sql_udf.cc, and sql_update.cc. + + ADDITIONAL INFO: + + Most handlers set timestamp when calling write row if any such fields + exists. Since we are calling an underlying handler we assume the + underlying handler will assume this responsibility. + + Underlying handlers will also call update_auto_increment to calculate + the new auto increment value. We will catch the call to + get_auto_increment and ensure this increment value is maintained by + only one of the underlying handlers. +*/ + +int ha_partition::write_row(byte * buf) +{ + uint32 part_id; + int error; +#ifdef NOT_NEEDED + byte *rec0= m_rec0; +#endif + DBUG_ENTER("ha_partition::write_row"); + DBUG_ASSERT(buf == m_rec0); + +#ifdef NOT_NEEDED + if (likely(buf == rec0)) +#endif + error= m_part_info->get_partition_id(m_part_info, &part_id); +#ifdef NOT_NEEDED + else + { + set_field_ptr(m_part_field_array, buf, rec0); + error= m_part_info->get_partition_id(m_part_info, &part_id); + set_field_ptr(m_part_field_array, rec0, buf); + } +#endif + if (unlikely(error)) + DBUG_RETURN(error); + m_last_part= part_id; + DBUG_PRINT("info", ("Insert in partition %d", part_id)); + DBUG_RETURN(m_file[part_id]->write_row(buf)); +} + + +/* + Yes, update_row() does what you expect, it updates a row. old_data will + have the previous row record in it, while new_data will have the newest + data in it. + Keep in mind that the server can do updates based on ordering if an + ORDER BY clause was used. Consecutive ordering is not guarenteed. + + Currently new_data will not have an updated auto_increament record, or + and updated timestamp field. You can do these for partition by doing these: + if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE) + table->timestamp_field->set_time(); + if (table->next_number_field && record == table->record[0]) + update_auto_increment(); + + Called from sql_select.cc, sql_acl.cc, sql_update.cc, and sql_insert.cc. + new_data is always record[0] + old_data is normally record[1] but may be anything + +*/ + +int ha_partition::update_row(const byte *old_data, byte *new_data) +{ + uint32 new_part_id, old_part_id; + int error; + DBUG_ENTER("ha_partition::update_row"); + + if ((error= get_parts_for_update(old_data, new_data, table->record[0], + m_part_info, &old_part_id, &new_part_id))) + { + DBUG_RETURN(error); + } + + /* + TODO: + set_internal_auto_increment= + max(set_internal_auto_increment, new_data->auto_increment) + */ + m_last_part= new_part_id; + if (new_part_id == old_part_id) + { + DBUG_PRINT("info", ("Update in partition %d", new_part_id)); + DBUG_RETURN(m_file[new_part_id]->update_row(old_data, new_data)); + } + else + { + DBUG_PRINT("info", ("Update from partition %d to partition %d", + old_part_id, new_part_id)); + if ((error= m_file[new_part_id]->write_row(new_data))) + DBUG_RETURN(error); + if ((error= m_file[old_part_id]->delete_row(old_data))) + { +#ifdef IN_THE_FUTURE + (void) m_file[new_part_id]->delete_last_inserted_row(new_data); +#endif + DBUG_RETURN(error); + } + } + DBUG_RETURN(0); +} + + +/* + This will delete a row. buf will contain a copy of the row to be deleted. + The server will call this right after the current row has been read + (from either a previous rnd_xxx() or index_xxx() call). + If you keep a pointer to the last row or can access a primary key it will + make doing the deletion quite a bit easier. + Keep in mind that the server does no guarentee consecutive deletions. + ORDER BY clauses can be used. + + Called in sql_acl.cc and sql_udf.cc to manage internal table information. + Called in sql_delete.cc, sql_insert.cc, and sql_select.cc. In sql_select + it is used for removing duplicates while in insert it is used for REPLACE + calls. + + buf is either record[0] or record[1] + +*/ + +int ha_partition::delete_row(const byte *buf) +{ + uint32 part_id; + int error; + DBUG_ENTER("ha_partition::delete_row"); + + if ((error= get_part_for_delete(buf, m_rec0, m_part_info, &part_id))) + { + DBUG_RETURN(error); + } + m_last_part= part_id; + DBUG_RETURN(m_file[part_id]->delete_row(buf)); +} + + +/* + Used to delete all rows in a table. Both for cases of truncate and + for cases where the optimizer realizes that all rows will be + removed as a result of a SQL statement. + + Called from item_sum.cc by Item_func_group_concat::clear(), + Item_sum_count_distinct::clear(), and Item_func_group_concat::clear(). + Called from sql_delete.cc by mysql_delete(). + Called from sql_select.cc by JOIN::reinit(). + Called from sql_union.cc by st_select_lex_unit::exec(). +*/ + +int ha_partition::delete_all_rows() +{ + int error; + handler **file; + DBUG_ENTER("ha_partition::delete_all_rows"); + file= m_file; + do + { + if ((error= (*file)->delete_all_rows())) + DBUG_RETURN(error); + } while (*(++file)); + DBUG_RETURN(0); +} + +/* + rows == 0 means we will probably insert many rows +*/ + +void ha_partition::start_bulk_insert(ha_rows rows) +{ + handler **file; + DBUG_ENTER("ha_partition::start_bulk_insert"); + if (!rows) + { + /* Avoid allocation big caches in all underlaying handlers */ + DBUG_VOID_RETURN; + } + rows= rows/m_tot_parts + 1; + file= m_file; + do + { + (*file)->start_bulk_insert(rows); + } while (*(++file)); + DBUG_VOID_RETURN; +} + + +int ha_partition::end_bulk_insert() +{ + int error= 0; + handler **file; + DBUG_ENTER("ha_partition::end_bulk_insert"); + + file= m_file; + do + { + int tmp; + /* We want to execute end_bulk_insert() on all handlers */ + if ((tmp= (*file)->end_bulk_insert())) + error= tmp; + } while (*(++file)); + DBUG_RETURN(error); +} + +/**************************************************************************** + MODULE full table scan +****************************************************************************/ +/* + Initialize engine for random reads + + SYNOPSIS + ha_partition::rnd_init() + scan 0 Initialize for random reads through rnd_pos() + 1 Initialize for random scan through rnd_next() + + NOTES + rnd_init() is called when the server wants the storage engine to do a + table scan or when the server wants to access data through rnd_pos. + + When scan is used we will scan one handler partition at a time. + When preparing for rnd_pos we will init all handler partitions. + No extra cache handling is needed when scannning is not performed. + + Before initialising we will call rnd_end to ensure that we clean up from + any previous incarnation of a table scan. + Called from filesort.cc, records.cc, sql_handler.cc, sql_select.cc, + sql_table.cc, and sql_update.cc. +*/ + +int ha_partition::rnd_init(bool scan) +{ + int error; + handler **file; + DBUG_ENTER("ha_partition::rnd_init"); + + include_partition_fields_in_used_fields(); + if (scan) + { + /* + rnd_end() is needed for partitioning to reset internal data if scan + is already in use + */ + + rnd_end(); + if (partition_scan_set_up(rec_buf(0), FALSE)) + { + /* + The set of partitions to scan is empty. We return success and return + end of file on first rnd_next. + */ + DBUG_RETURN(0); + } + /* + We will use the partition set in our scan, using the start and stop + partition and checking each scan before start dependent on bittfields. + */ + late_extra_cache(m_part_spec.start_part); + DBUG_PRINT("info", ("rnd_init on partition %d",m_part_spec.start_part)); + error= m_file[m_part_spec.start_part]->ha_rnd_init(1); + m_scan_value= 1; // Scan active + if (error) + m_scan_value= 2; // No scan active + DBUG_RETURN(error); + } + file= m_file; + do + { + if ((error= (*file)->ha_rnd_init(0))) + goto err; + } while (*(++file)); + m_scan_value= 0; + DBUG_RETURN(0); + +err: + while (file--) + (*file)->ha_rnd_end(); + DBUG_RETURN(error); +} + + +int ha_partition::rnd_end() +{ + handler **file; + DBUG_ENTER("ha_partition::rnd_end"); + switch (m_scan_value) { + case 2: // Error + break; + case 1: // Table scan + if (m_part_spec.start_part != NO_CURRENT_PART_ID) + { + late_extra_no_cache(m_part_spec.start_part); + m_file[m_part_spec.start_part]->ha_rnd_end(); + } + break; + case 0: + file= m_file; + do + { + (*file)->ha_rnd_end(); + } while (*(++file)); + break; + } + m_part_spec.start_part= NO_CURRENT_PART_ID; + m_scan_value= 2; + DBUG_RETURN(0); +} + + +/* + read next row during full table scan (scan in random row order) + + SYNOPSIS + rnd_next() + buf buffer that should be filled with data + + This is called for each row of the table scan. When you run out of records + you should return HA_ERR_END_OF_FILE. + The Field structure for the table is the key to getting data into buf + in a manner that will allow the server to understand it. + + Called from filesort.cc, records.cc, sql_handler.cc, sql_select.cc, + sql_table.cc, and sql_update.cc. +*/ + +int ha_partition::rnd_next(byte *buf) +{ + DBUG_ASSERT(m_scan_value); + uint part_id= m_part_spec.start_part; // Cache of this variable + handler *file= m_file[part_id]; + int result= HA_ERR_END_OF_FILE; + DBUG_ENTER("ha_partition::rnd_next"); + + DBUG_ASSERT(m_scan_value == 1); + + if (part_id > m_part_spec.end_part) + { + /* + The original set of partitions to scan was empty and thus we report + the result here. + */ + goto end; + } + while (TRUE) + { + if ((result= file->rnd_next(buf))) + { + if (result == HA_ERR_RECORD_DELETED) + continue; // Probably MyISAM + + if (result != HA_ERR_END_OF_FILE) + break; // Return error + + /* End current partition */ + late_extra_no_cache(part_id); + DBUG_PRINT("info", ("rnd_end on partition %d", part_id)); + if ((result= file->ha_rnd_end())) + break; + /* Shift to next partition */ + if (++part_id > m_part_spec.end_part) + { + result= HA_ERR_END_OF_FILE; + break; + } + file= m_file[part_id]; + DBUG_PRINT("info", ("rnd_init on partition %d", part_id)); + if ((result= file->ha_rnd_init(1))) + break; + late_extra_cache(part_id); + } + else + { + m_part_spec.start_part= part_id; + m_last_part= part_id; + table->status= 0; + DBUG_RETURN(0); + } + } + +end: + m_part_spec.start_part= NO_CURRENT_PART_ID; + table->status= STATUS_NOT_FOUND; + DBUG_RETURN(result); +} + + +inline void store_part_id_in_pos(byte *pos, uint part_id) +{ + int2store(pos, part_id); +} + +inline uint get_part_id_from_pos(const byte *pos) +{ + return uint2korr(pos); +} + +/* + position() is called after each call to rnd_next() if the data needs + to be ordered. You can do something like the following to store + the position: + ha_store_ptr(ref, ref_length, current_position); + + The server uses ref to store data. ref_length in the above case is + the size needed to store current_position. ref is just a byte array + that the server will maintain. If you are using offsets to mark rows, then + current_position should be the offset. If it is a primary key like in + BDB, then it needs to be a primary key. + + Called from filesort.cc, sql_select.cc, sql_delete.cc and sql_update.cc. +*/ + +void ha_partition::position(const byte *record) +{ + handler *file= m_file[m_last_part]; + DBUG_ENTER("ha_partition::position"); + file->position(record); + store_part_id_in_pos(ref, m_last_part); + memcpy((ref + PARTITION_BYTES_IN_POS), file->ref, + (ref_length - PARTITION_BYTES_IN_POS)); + +#ifdef SUPPORTING_PARTITION_OVER_DIFFERENT_ENGINES +#ifdef HAVE_purify + bzero(ref + PARTITION_BYTES_IN_POS + ref_length, max_ref_length-ref_length); +#endif /* HAVE_purify */ +#endif + DBUG_VOID_RETURN; +} + +/* + This is like rnd_next, but you are given a position to use + to determine the row. The position will be of the type that you stored in + ref. You can use ha_get_ptr(pos,ref_length) to retrieve whatever key + or position you saved when position() was called. + Called from filesort.cc records.cc sql_insert.cc sql_select.cc + sql_update.cc. +*/ + +int ha_partition::rnd_pos(byte * buf, byte *pos) +{ + uint part_id; + handler *file; + DBUG_ENTER("ha_partition::rnd_pos"); + + part_id= get_part_id_from_pos((const byte *) pos); + DBUG_ASSERT(part_id < m_tot_parts); + file= m_file[part_id]; + m_last_part= part_id; + DBUG_RETURN(file->rnd_pos(buf, (pos + PARTITION_BYTES_IN_POS))); +} + + +/**************************************************************************** + MODULE index scan +****************************************************************************/ +/* + Positions an index cursor to the index specified in the handle. Fetches the + row if available. If the key value is null, begin at the first key of the + index. + + There are loads of optimisations possible here for the partition handler. + The same optimisations can also be checked for full table scan although + only through conditions and not from index ranges. + Phase one optimisations: + Check if the fields of the partition function are bound. If so only use + the single partition it becomes bound to. + Phase two optimisations: + If it can be deducted through range or list partitioning that only a + subset of the partitions are used, then only use those partitions. +*/ + +/* + index_init is always called before starting index scans (except when + starting through index_read_idx and using read_range variants). +*/ + +int ha_partition::index_init(uint inx, bool sorted) +{ + int error= 0; + handler **file; + DBUG_ENTER("ha_partition::index_init"); + + active_index= inx; + m_part_spec.start_part= NO_CURRENT_PART_ID; + m_start_key.length= 0; + m_ordered= sorted; + m_curr_key_info= table->key_info+inx; + include_partition_fields_in_used_fields(); + + file= m_file; + do + { + /* TODO RONM: Change to index_init() when code is stable */ + if ((error= (*file)->ha_index_init(inx, sorted))) + { + DBUG_ASSERT(0); // Should never happen + break; + } + } while (*(++file)); + DBUG_RETURN(error); +} + + +/* + index_end is called at the end of an index scan to clean up any + things needed to clean up. +*/ + +int ha_partition::index_end() +{ + int error= 0; + handler **file; + DBUG_ENTER("ha_partition::index_end"); + + active_index= MAX_KEY; + m_part_spec.start_part= NO_CURRENT_PART_ID; + file= m_file; + do + { + int tmp; + /* We want to execute index_end() on all handlers */ + /* TODO RONM: Change to index_end() when code is stable */ + if ((tmp= (*file)->ha_index_end())) + error= tmp; + } while (*(++file)); + DBUG_RETURN(error); +} + + +/* + index_read starts a new index scan using a start key. The MySQL Server + will check the end key on its own. Thus to function properly the + partitioned handler need to ensure that it delivers records in the sort + order of the MySQL Server. + index_read can be restarted without calling index_end on the previous + index scan and without calling index_init. In this case the index_read + is on the same index as the previous index_scan. This is particularly + used in conjuntion with multi read ranges. +*/ + +int ha_partition::index_read(byte * buf, const byte * key, + uint key_len, enum ha_rkey_function find_flag) +{ + DBUG_ENTER("ha_partition::index_read"); + end_range= 0; + DBUG_RETURN(common_index_read(buf, key, key_len, find_flag)); +} + + +int ha_partition::common_index_read(byte *buf, const byte *key, uint key_len, + enum ha_rkey_function find_flag) +{ + int error; + DBUG_ENTER("ha_partition::common_index_read"); + + memcpy((void*)m_start_key.key, key, key_len); + m_start_key.length= key_len; + m_start_key.flag= find_flag; + m_index_scan_type= partition_index_read; + + if ((error= partition_scan_set_up(buf, TRUE))) + { + DBUG_RETURN(error); + } + + if (!m_ordered_scan_ongoing || + (find_flag == HA_READ_KEY_EXACT && + (key_len >= m_curr_key_info->key_length || + key_len == 0))) + { + /* + We use unordered index scan either when read_range is used and flag + is set to not use ordered or when an exact key is used and in this + case all records will be sorted equal and thus the sort order of the + resulting records doesn't matter. + We also use an unordered index scan when the number of partitions to + scan is only one. + The unordered index scan will use the partition set created. + Need to set unordered scan ongoing since we can come here even when + it isn't set. + */ + m_ordered_scan_ongoing= FALSE; + error= handle_unordered_scan_next_partition(buf); + } + else + { + /* + In all other cases we will use the ordered index scan. This will use + the partition set created by the get_partition_set method. + */ + error= handle_ordered_index_scan(buf); + } + DBUG_RETURN(error); +} + + +/* + index_first() asks for the first key in the index. + This is similar to index_read except that there is no start key since + the scan starts from the leftmost entry and proceeds forward with + index_next. + + Called from opt_range.cc, opt_sum.cc, sql_handler.cc, + and sql_select.cc. +*/ + +int ha_partition::index_first(byte * buf) +{ + DBUG_ENTER("ha_partition::index_first"); + end_range= 0; + m_index_scan_type= partition_index_first; + DBUG_RETURN(common_first_last(buf)); +} + + +/* + index_last() asks for the last key in the index. + This is similar to index_read except that there is no start key since + the scan starts from the rightmost entry and proceeds forward with + index_prev. + + Called from opt_range.cc, opt_sum.cc, sql_handler.cc, + and sql_select.cc. +*/ + +int ha_partition::index_last(byte * buf) +{ + DBUG_ENTER("ha_partition::index_last"); + m_index_scan_type= partition_index_last; + DBUG_RETURN(common_first_last(buf)); +} + +int ha_partition::common_first_last(byte *buf) +{ + int error; + if ((error= partition_scan_set_up(buf, FALSE))) + return error; + if (!m_ordered_scan_ongoing) + return handle_unordered_scan_next_partition(buf); + return handle_ordered_index_scan(buf); +} + +/* + Positions an index cursor to the index specified in key. Fetches the + row if any. This is only used to read whole keys. + TODO: Optimise this code to avoid index_init and index_end +*/ + +int ha_partition::index_read_idx(byte * buf, uint index, const byte * key, + uint key_len, + enum ha_rkey_function find_flag) +{ + int res; + DBUG_ENTER("ha_partition::index_read_idx"); + index_init(index, 0); + res= index_read(buf, key, key_len, find_flag); + index_end(); + DBUG_RETURN(res); +} + +/* + This is used in join_read_last_key to optimise away an ORDER BY. + Can only be used on indexes supporting HA_READ_ORDER +*/ + +int ha_partition::index_read_last(byte *buf, const byte *key, uint keylen) +{ + DBUG_ENTER("ha_partition::index_read_last"); + m_ordered= TRUE; // Safety measure + DBUG_RETURN(index_read(buf, key, keylen, HA_READ_PREFIX_LAST)); +} + + +/* + Used to read forward through the index. +*/ + +int ha_partition::index_next(byte * buf) +{ + DBUG_ENTER("ha_partition::index_next"); + /* + TODO(low priority): + If we want partition to work with the HANDLER commands, we + must be able to do index_last() -> index_prev() -> index_next() + */ + DBUG_ASSERT(m_index_scan_type != partition_index_last); + if (!m_ordered_scan_ongoing) + { + DBUG_RETURN(handle_unordered_next(buf, FALSE)); + } + DBUG_RETURN(handle_ordered_next(buf, FALSE)); +} + + +/* + This routine is used to read the next but only if the key is the same + as supplied in the call. +*/ + +int ha_partition::index_next_same(byte *buf, const byte *key, uint keylen) +{ + DBUG_ENTER("ha_partition::index_next_same"); + DBUG_ASSERT(keylen == m_start_key.length); + DBUG_ASSERT(m_index_scan_type != partition_index_last); + if (!m_ordered_scan_ongoing) + DBUG_RETURN(handle_unordered_next(buf, TRUE)); + DBUG_RETURN(handle_ordered_next(buf, TRUE)); +} + +/* + Used to read backwards through the index. +*/ + +int ha_partition::index_prev(byte * buf) +{ + DBUG_ENTER("ha_partition::index_prev"); + /* TODO: read comment in index_next */ + DBUG_ASSERT(m_index_scan_type != partition_index_first); + DBUG_RETURN(handle_ordered_prev(buf)); +} + + +/* + We reimplement read_range_first since we don't want the compare_key + check at the end. This is already performed in the partition handler. + read_range_next is very much different due to that we need to scan + all underlying handlers. +*/ + +int ha_partition::read_range_first(const key_range *start_key, + const key_range *end_key, + bool eq_range_arg, bool sorted) +{ + int error; + DBUG_ENTER("ha_partition::read_range_first"); + m_ordered= sorted; + eq_range= eq_range_arg; + end_range= 0; + if (end_key) + { + end_range= &save_end_range; + save_end_range= *end_key; + key_compare_result_on_equal= + ((end_key->flag == HA_READ_BEFORE_KEY) ? 1 : + (end_key->flag == HA_READ_AFTER_KEY) ? -1 : 0); + } + range_key_part= m_curr_key_info->key_part; + + if (!start_key) // Read first record + { + m_index_scan_type= partition_index_first; + error= common_first_last(m_rec0); + } + else + { + error= common_index_read(m_rec0, + start_key->key, + start_key->length, start_key->flag); + } + DBUG_RETURN(error); +} + + +int ha_partition::read_range_next() +{ + DBUG_ENTER("ha_partition::read_range_next"); + if (m_ordered) + { + DBUG_RETURN(handler::read_range_next()); + } + DBUG_RETURN(handle_unordered_next(m_rec0, eq_range)); +} + + +int ha_partition::partition_scan_set_up(byte * buf, bool idx_read_flag) +{ + DBUG_ENTER("ha_partition::partition_scan_set_up"); + + if (idx_read_flag) + get_partition_set(table,buf,active_index,&m_start_key,&m_part_spec); + else + get_partition_set(table, buf, MAX_KEY, 0, &m_part_spec); + if (m_part_spec.start_part > m_part_spec.end_part) + { + /* + We discovered a partition set but the set was empty so we report + key not found. + */ + DBUG_PRINT("info", ("scan with no partition to scan")); + DBUG_RETURN(HA_ERR_END_OF_FILE); + } + if (m_part_spec.start_part == m_part_spec.end_part) + { + /* + We discovered a single partition to scan, this never needs to be + performed using the ordered index scan. + */ + DBUG_PRINT("info", ("index scan using the single partition %d", + m_part_spec.start_part)); + m_ordered_scan_ongoing= FALSE; + } + else + { + /* + Set m_ordered_scan_ongoing according how the scan should be done + */ + m_ordered_scan_ongoing= m_ordered; + } + DBUG_ASSERT(m_part_spec.start_part < m_tot_parts && + m_part_spec.end_part < m_tot_parts); + DBUG_RETURN(0); +} + + +/**************************************************************************** + Unordered Index Scan Routines +****************************************************************************/ +/* + These routines are used to scan partitions without considering order. + This is performed in two situations. + 1) In read_multi_range this is the normal case + 2) When performing any type of index_read, index_first, index_last where + all fields in the partition function is bound. In this case the index + scan is performed on only one partition and thus it isn't necessary to + perform any sort. +*/ + +int ha_partition::handle_unordered_next(byte *buf, bool next_same) +{ + handler *file= file= m_file[m_part_spec.start_part]; + int error; + DBUG_ENTER("ha_partition::handle_unordered_next"); + + /* + We should consider if this should be split into two functions as + next_same is alwas a local constant + */ + if (next_same) + { + if (!(error= file->index_next_same(buf, m_start_key.key, + m_start_key.length))) + { + m_last_part= m_part_spec.start_part; + DBUG_RETURN(0); + } + } + else if (!(error= file->index_next(buf))) + { + if (compare_key(end_range) <= 0) + { + m_last_part= m_part_spec.start_part; + DBUG_RETURN(0); // Row was in range + } + error= HA_ERR_END_OF_FILE; + } + + if (error == HA_ERR_END_OF_FILE) + { + m_part_spec.start_part++; // Start using next part + error= handle_unordered_scan_next_partition(buf); + } + DBUG_RETURN(error); +} + + +/* + This routine is used to start the index scan on the next partition. + Both initial start and after completing scan on one partition. +*/ + +int ha_partition::handle_unordered_scan_next_partition(byte * buf) +{ + uint i; + DBUG_ENTER("ha_partition::handle_unordered_scan_next_partition"); + + for (i= m_part_spec.start_part; i <= m_part_spec.end_part; i++) + { + int error; + handler *file= m_file[i]; + + m_part_spec.start_part= i; + switch (m_index_scan_type) { + case partition_index_read: + DBUG_PRINT("info", ("index_read on partition %d", i)); + error= file->index_read(buf, m_start_key.key, + m_start_key.length, + m_start_key.flag); + break; + case partition_index_first: + DBUG_PRINT("info", ("index_first on partition %d", i)); + error= file->index_first(buf); + break; + default: + DBUG_ASSERT(FALSE); + DBUG_RETURN(1); + } + if (!error) + { + if (compare_key(end_range) <= 0) + { + m_last_part= i; + DBUG_RETURN(0); + } + error= HA_ERR_END_OF_FILE; + } + if ((error != HA_ERR_END_OF_FILE) && (error != HA_ERR_KEY_NOT_FOUND)) + DBUG_RETURN(error); + DBUG_PRINT("info", ("HA_ERR_END_OF_FILE on partition %d", i)); + } + m_part_spec.start_part= NO_CURRENT_PART_ID; + DBUG_RETURN(HA_ERR_END_OF_FILE); +} + + +/* + This part contains the logic to handle index scans that require ordered + output. This includes all except those started by read_range_first with + the flag ordered set to FALSE. Thus most direct index_read and all + index_first and index_last. + + We implement ordering by keeping one record plus a key buffer for each + partition. Every time a new entry is requested we will fetch a new + entry from the partition that is currently not filled with an entry. + Then the entry is put into its proper sort position. + + Returning a record is done by getting the top record, copying the + record to the request buffer and setting the partition as empty on + entries. +*/ + +int ha_partition::handle_ordered_index_scan(byte *buf) +{ + uint i, j= 0; + bool found= FALSE; + bool reverse_order= FALSE; + DBUG_ENTER("ha_partition::handle_ordered_index_scan"); + + m_top_entry= NO_CURRENT_PART_ID; + queue_remove_all(&queue); + for (i= m_part_spec.start_part; i <= m_part_spec.end_part; i++) + { + int error; + byte *rec_buf_ptr= rec_buf(i); + handler *file= m_file[i]; + + switch (m_index_scan_type) { + case partition_index_read: + error= file->index_read(rec_buf_ptr, + m_start_key.key, + m_start_key.length, + m_start_key.flag); + reverse_order= FALSE; + break; + case partition_index_first: + error= file->index_first(rec_buf_ptr); + reverse_order= FALSE; + break; + case partition_index_last: + error= file->index_last(rec_buf_ptr); + reverse_order= TRUE; + break; + default: + DBUG_ASSERT(FALSE); + DBUG_RETURN(HA_ERR_END_OF_FILE); + } + if (!error) + { + found= TRUE; + /* + Initialise queue without order first, simply insert + */ + queue_element(&queue, j++)= (byte*)queue_buf(i); + } + else if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) + { + DBUG_RETURN(error); + } + } + if (found) + { + /* + We found at least one partition with data, now sort all entries and + after that read the first entry and copy it to the buffer to return in. + */ + queue_set_max_at_top(&queue, reverse_order); + queue_set_cmp_arg(&queue, (void*)m_curr_key_info); + queue.elements= j; + queue_fix(&queue); + return_top_record(buf); + DBUG_PRINT("info", ("Record returned from partition %d", m_top_entry)); + DBUG_RETURN(0); + } + DBUG_RETURN(HA_ERR_END_OF_FILE); +} + + +void ha_partition::return_top_record(byte *buf) +{ + uint part_id; + byte *key_buffer= queue_top(&queue); + byte *rec_buffer= key_buffer + PARTITION_BYTES_IN_POS; + part_id= uint2korr(key_buffer); + memcpy(buf, rec_buffer, m_rec_length); + m_last_part= part_id; + m_top_entry= part_id; +} + + +int ha_partition::handle_ordered_next(byte *buf, bool next_same) +{ + int error; + uint part_id= m_top_entry; + handler *file= m_file[part_id]; + DBUG_ENTER("ha_partition::handle_ordered_next"); + + if (!next_same) + error= file->index_next(rec_buf(part_id)); + else + error= file->index_next_same(rec_buf(part_id), m_start_key.key, + m_start_key.length); + if (error) + { + if (error == HA_ERR_END_OF_FILE) + { + /* Return next buffered row */ + queue_remove(&queue, (uint) 0); + if (queue.elements) + { + DBUG_PRINT("info", ("Record returned from partition %u (2)", + m_top_entry)); + return_top_record(buf); + error= 0; + } + } + DBUG_RETURN(error); + } + queue_replaced(&queue); + return_top_record(buf); + DBUG_PRINT("info", ("Record returned from partition %u", m_top_entry)); + DBUG_RETURN(0); +} + + +int ha_partition::handle_ordered_prev(byte *buf) +{ + int error; + uint part_id= m_top_entry; + handler *file= m_file[part_id]; + DBUG_ENTER("ha_partition::handle_ordered_prev"); + if ((error= file->index_prev(rec_buf(part_id)))) + { + if (error == HA_ERR_END_OF_FILE) + { + queue_remove(&queue, (uint) 0); + if (queue.elements) + { + return_top_record(buf); + DBUG_PRINT("info", ("Record returned from partition %d (2)", + m_top_entry)); + error= 0; + } + } + DBUG_RETURN(error); + } + queue_replaced(&queue); + return_top_record(buf); + DBUG_PRINT("info", ("Record returned from partition %d", m_top_entry)); + DBUG_RETURN(0); +} + + +void ha_partition::include_partition_fields_in_used_fields() +{ + DBUG_ENTER("ha_partition::include_partition_fields_in_used_fields"); + Field **ptr= m_part_field_array; + do + { + ha_set_bit_in_read_set((*ptr)->fieldnr); + } while (*(++ptr)); + DBUG_VOID_RETURN; +} + + +/**************************************************************************** + MODULE information calls +****************************************************************************/ + +/* + These are all first approximations of the extra, info, scan_time + and read_time calls +*/ + +/* + ::info() is used to return information to the optimizer. + Currently this table handler doesn't implement most of the fields + really needed. SHOW also makes use of this data + Another note, if your handler doesn't proved exact record count, + you will probably want to have the following in your code: + if (records < 2) + records = 2; + The reason is that the server will optimize for cases of only a single + record. If in a table scan you don't know the number of records + it will probably be better to set records to two so you can return + as many records as you need. + + Along with records a few more variables you may wish to set are: + records + deleted + data_file_length + index_file_length + delete_length + check_time + Take a look at the public variables in handler.h for more information. + + Called in: + filesort.cc + ha_heap.cc + item_sum.cc + opt_sum.cc + sql_delete.cc + sql_delete.cc + sql_derived.cc + sql_select.cc + sql_select.cc + sql_select.cc + sql_select.cc + sql_select.cc + sql_show.cc + sql_show.cc + sql_show.cc + sql_show.cc + sql_table.cc + sql_union.cc + sql_update.cc + + Some flags that are not implemented + HA_STATUS_POS: + This parameter is never used from the MySQL Server. It is checked in a + place in MyISAM so could potentially be used by MyISAM specific programs. + HA_STATUS_NO_LOCK: + This is declared and often used. It's only used by MyISAM. + It means that MySQL doesn't need the absolute latest statistics + information. This may save the handler from doing internal locks while + retrieving statistics data. +*/ + +void ha_partition::info(uint flag) +{ + handler *file, **file_array; + DBUG_ENTER("ha_partition:info"); + + if (flag & HA_STATUS_AUTO) + { + DBUG_PRINT("info", ("HA_STATUS_AUTO")); + /* + The auto increment value is only maintained by the first handler + so we will only call this. + */ + m_file[0]->info(HA_STATUS_AUTO); + } + if (flag & HA_STATUS_VARIABLE) + { + DBUG_PRINT("info", ("HA_STATUS_VARIABLE")); + /* + Calculates statistical variables + records: Estimate of number records in table + We report sum (always at least 2) + deleted: Estimate of number holes in the table due to + deletes + We report sum + data_file_length: Length of data file, in principle bytes in table + We report sum + index_file_length: Length of index file, in principle bytes in + indexes in the table + We report sum + mean_record_length:Mean record length in the table + We calculate this + check_time: Time of last check (only applicable to MyISAM) + We report last time of all underlying handlers + */ + records= 0; + deleted= 0; + data_file_length= 0; + index_file_length= 0; + check_time= 0; + file_array= m_file; + do + { + file= *file_array; + file->info(HA_STATUS_VARIABLE); + records+= file->records; + deleted+= file->deleted; + data_file_length+= file->data_file_length; + index_file_length+= file->index_file_length; + if (file->check_time > check_time) + check_time= file->check_time; + } while (*(++file_array)); + if (records < 2 && + m_table_flags & HA_NOT_EXACT_COUNT) + records= 2; + if (records > 0) + mean_rec_length= (ulong) (data_file_length / records); + else + mean_rec_length= 1; //? What should we set here + } + if (flag & HA_STATUS_CONST) + { + DBUG_PRINT("info", ("HA_STATUS_CONST")); + /* + Recalculate loads of constant variables. MyISAM also sets things + directly on the table share object. + + Check whether this should be fixed since handlers should not + change things directly on the table object. + + Monty comment: This should NOT be changed! It's the handlers + responsibility to correct table->s->keys_xxxx information if keys + have been disabled. + + The most important parameters set here is records per key on + all indexes. block_size and primar key ref_length. + + For each index there is an array of rec_per_key. + As an example if we have an index with three attributes a,b and c + we will have an array of 3 rec_per_key. + rec_per_key[0] is an estimate of number of records divided by + number of unique values of the field a. + rec_per_key[1] is an estimate of the number of records divided + by the number of unique combinations of the fields a and b. + rec_per_key[2] is an estimate of the number of records divided + by the number of unique combinations of the fields a,b and c. + + Many handlers only set the value of rec_per_key when all fields + are bound (rec_per_key[2] in the example above). + + If the handler doesn't support statistics, it should set all of the + above to 0. + + We will allow the first handler to set the rec_per_key and use + this as an estimate on the total table. + + max_data_file_length: Maximum data file length + We ignore it, is only used in + SHOW TABLE STATUS + max_index_file_length: Maximum index file length + We ignore it since it is never used + block_size: Block size used + We set it to the value of the first handler + sortkey: Never used at any place so ignored + ref_length: We set this to the value calculated + and stored in local object + raid_type: Set by first handler (MyISAM) + raid_chunks: Set by first handler (MyISAM) + raid_chunksize: Set by first handler (MyISAM) + create_time: Creation time of table + Set by first handler + + So we calculate these constants by using the variables on the first + handler. + */ + + file= m_file[0]; + file->info(HA_STATUS_CONST); + create_time= file->create_time; + raid_type= file->raid_type; + raid_chunks= file->raid_chunks; + raid_chunksize= file->raid_chunksize; + ref_length= m_ref_length; + } + if (flag & HA_STATUS_ERRKEY) + { + handler *file= m_file[m_last_part]; + DBUG_PRINT("info", ("info: HA_STATUS_ERRKEY")); + /* + This flag is used to get index number of the unique index that + reported duplicate key + We will report the errkey on the last handler used and ignore the rest + */ + file->info(HA_STATUS_ERRKEY); + if (file->errkey != (uint) -1) + errkey= file->errkey; + } + if (flag & HA_STATUS_TIME) + { + DBUG_PRINT("info", ("info: HA_STATUS_TIME")); + /* + This flag is used to set the latest update time of the table. + Used by SHOW commands + We will report the maximum of these times + */ + update_time= 0; + file_array= m_file; + do + { + file= *file_array; + file->info(HA_STATUS_TIME); + if (file->update_time > update_time) + update_time= file->update_time; + } while (*(++file_array)); + } + DBUG_VOID_RETURN; +} + + +/* + extra() is called whenever the server wishes to send a hint to + the storage engine. The MyISAM engine implements the most hints. + + We divide the parameters into the following categories: + 1) Parameters used by most handlers + 2) Parameters used by some non-MyISAM handlers + 3) Parameters used only by MyISAM + 4) Parameters only used by temporary tables for query processing + 5) Parameters only used by MyISAM internally + 6) Parameters not used at all + + The partition handler need to handle category 1), 2) and 3). + + 1) Parameters used by most handlers + ----------------------------------- + HA_EXTRA_RESET: + This option is used by most handlers and it resets the handler state + to the same state as after an open call. This includes releasing + any READ CACHE or WRITE CACHE or other internal buffer used. + + It is called from the reset method in the handler interface. There are + three instances where this is called. + 1) After completing a INSERT ... SELECT ... query the handler for the + table inserted into is reset + 2) It is called from close_thread_table which in turn is called from + close_thread_tables except in the case where the tables are locked + in which case ha_commit_stmt is called instead. + It is only called from here if flush_version hasn't changed and the + table is not an old table when calling close_thread_table. + close_thread_tables is called from many places as a general clean up + function after completing a query. + 3) It is called when deleting the QUICK_RANGE_SELECT object if the + QUICK_RANGE_SELECT object had its own handler object. It is called + immediatley before close of this local handler object. + HA_EXTRA_KEYREAD: + HA_EXTRA_NO_KEYREAD: + These parameters are used to provide an optimisation hint to the handler. + If HA_EXTRA_KEYREAD is set it is enough to read the index fields, for + many handlers this means that the index-only scans can be used and it + is not necessary to use the real records to satisfy this part of the + query. Index-only scans is a very important optimisation for disk-based + indexes. For main-memory indexes most indexes contain a reference to the + record and thus KEYREAD only says that it is enough to read key fields. + HA_EXTRA_NO_KEYREAD disables this for the handler, also HA_EXTRA_RESET + will disable this option. + The handler will set HA_KEYREAD_ONLY in its table flags to indicate this + feature is supported. + HA_EXTRA_FLUSH: + Indication to flush tables to disk, called at close_thread_table to + ensure disk based tables are flushed at end of query execution. + + 2) Parameters used by some non-MyISAM handlers + ---------------------------------------------- + HA_EXTRA_RETRIEVE_ALL_COLS: + Many handlers have implemented optimisations to avoid fetching all + fields when retrieving data. In certain situations all fields need + to be retrieved even though the query_id is not set on all field + objects. + + It is called from copy_data_between_tables where all fields are + copied without setting query_id before calling the handlers. + It is called from UPDATE statements when the fields of the index + used is updated or ORDER BY is used with UPDATE. + And finally when calculating checksum of a table using the CHECKSUM + command. + HA_EXTRA_RETRIEVE_PRIMARY_KEY: + In some situations it is mandatory to retrieve primary key fields + independent of the query id's. This extra flag specifies that fetch + of primary key fields is mandatory. + HA_EXTRA_KEYREAD_PRESERVE_FIELDS: + This is a strictly InnoDB feature that is more or less undocumented. + When it is activated InnoDB copies field by field from its fetch + cache instead of all fields in one memcpy. Have no idea what the + purpose of this is. + Cut from include/my_base.h: + When using HA_EXTRA_KEYREAD, overwrite only key member fields and keep + other fields intact. When this is off (by default) InnoDB will use memcpy + to overwrite entire row. + HA_EXTRA_IGNORE_DUP_KEY: + HA_EXTRA_NO_IGNORE_DUP_KEY: + Informs the handler to we will not stop the transaction if we get an + duplicate key errors during insert/upate. + Always called in pair, triggered by INSERT IGNORE and other similar + SQL constructs. + Not used by MyISAM. + + 3) Parameters used only by MyISAM + --------------------------------- + HA_EXTRA_NORMAL: + Only used in MyISAM to reset quick mode, not implemented by any other + handler. Quick mode is also reset in MyISAM by HA_EXTRA_RESET. + + It is called after completing a successful DELETE query if the QUICK + option is set. + + HA_EXTRA_QUICK: + When the user does DELETE QUICK FROM table where-clause; this extra + option is called before the delete query is performed and + HA_EXTRA_NORMAL is called after the delete query is completed. + Temporary tables used internally in MySQL always set this option + + The meaning of quick mode is that when deleting in a B-tree no merging + of leafs is performed. This is a common method and many large DBMS's + actually only support this quick mode since it is very difficult to + merge leaves in a tree used by many threads concurrently. + + HA_EXTRA_CACHE: + This flag is usually set with extra_opt along with a cache size. + The size of this buffer is set by the user variable + record_buffer_size. The value of this cache size is the amount of + data read from disk in each fetch when performing a table scan. + This means that before scanning a table it is normal to call + extra with HA_EXTRA_CACHE and when the scan is completed to call + HA_EXTRA_NO_CACHE to release the cache memory. + + Some special care is taken when using this extra parameter since there + could be a write ongoing on the table in the same statement. In this + one has to take special care since there might be a WRITE CACHE as + well. HA_EXTRA_CACHE specifies using a READ CACHE and using + READ CACHE and WRITE CACHE at the same time is not possible. + + Only MyISAM currently use this option. + + It is set when doing full table scans using rr_sequential and + reset when completing such a scan with end_read_record + (resetting means calling extra with HA_EXTRA_NO_CACHE). + + It is set in filesort.cc for MyISAM internal tables and it is set in + a multi-update where HA_EXTRA_CACHE is called on a temporary result + table and after that ha_rnd_init(0) on table to be updated + and immediately after that HA_EXTRA_NO_CACHE on table to be updated. + + Apart from that it is always used from init_read_record but not when + used from UPDATE statements. It is not used from DELETE statements + with ORDER BY and LIMIT but it is used in normal scan loop in DELETE + statements. The reason here is that DELETE's in MyISAM doesn't move + existings data rows. + + It is also set in copy_data_between_tables when scanning the old table + to copy over to the new table. + And it is set in join_init_read_record where quick objects are used + to perform a scan on the table. In this case the full table scan can + even be performed multiple times as part of the nested loop join. + + For purposes of the partition handler it is obviously necessary to have + special treatment of this extra call. If we would simply pass this + extra call down to each handler we would allocate + cache size * no of partitions amount of memory and this is not + necessary since we will only scan one partition at a time when doing + full table scans. + + Thus we treat it by first checking whether we have MyISAM handlers in + the table, if not we simply ignore the call and if we have we will + record the call but will not call any underlying handler yet. Then + when performing the sequential scan we will check this recorded value + and call extra_opt whenever we start scanning a new partition. + + monty: Neads to be fixed so that it's passed to all handlers when we + move to another partition during table scan. + + HA_EXTRA_NO_CACHE: + When performing a UNION SELECT HA_EXTRA_NO_CACHE is called from the + flush method in the select_union class. + It is used to some extent when insert delayed inserts. + See HA_EXTRA_RESET_STATE for use in conjunction with delete_all_rows(). + + It should be ok to call HA_EXTRA_NO_CACHE on all underlying handlers + if they are MyISAM handlers. Other handlers we can ignore the call + for. If no cache is in use they will quickly return after finding + this out. And we also ensure that all caches are disabled and no one + is left by mistake. + In the future this call will probably be deleted an we will instead call + ::reset(); + + HA_EXTRA_WRITE_CACHE: + See above, called from various places. It is mostly used when we + do INSERT ... SELECT + No special handling to save cache space is developed currently. + + HA_EXTRA_PREPARE_FOR_UPDATE: + This is called as part of a multi-table update. When the table to be + updated is also scanned then this informs MyISAM handler to drop any + caches if dynamic records are used (fixed size records do not care + about this call). We pass this along to all underlying MyISAM handlers + and ignore it for the rest. + + HA_EXTRA_PREPARE_FOR_DELETE: + Only used by MyISAM, called in preparation for a DROP TABLE. + It's used mostly by Windows that cannot handle dropping an open file. + On other platforms it has the same effect as HA_EXTRA_FORCE_REOPEN. + + HA_EXTRA_READCHECK: + HA_EXTRA_NO_READCHECK: + Only one call to HA_EXTRA_NO_READCHECK from ha_open where it says that + this is not needed in SQL. The reason for this call is that MyISAM sets + the READ_CHECK_USED in the open call so the call is needed for MyISAM + to reset this feature. + The idea with this parameter was to inform of doing/not doing a read + check before applying an update. Since SQL always performs a read before + applying the update No Read Check is needed in MyISAM as well. + + This is a cut from Docs/myisam.txt + Sometimes you might want to force an update without checking whether + another user has changed the record since you last read it. This is + somewhat dangerous, so it should ideally not be used. That can be + accomplished by wrapping the mi_update() call in two calls to mi_extra(), + using these functions: + HA_EXTRA_NO_READCHECK=5 No readcheck on update + HA_EXTRA_READCHECK=6 Use readcheck (def) + + HA_EXTRA_FORCE_REOPEN: + Only used by MyISAM, called when altering table, closing tables to + enforce a reopen of the table files. + + 4) Parameters only used by temporary tables for query processing + ---------------------------------------------------------------- + HA_EXTRA_RESET_STATE: + Same as HA_EXTRA_RESET except that buffers are not released. If there is + a READ CACHE it is reinit'ed. A cache is reinit'ed to restart reading + or to change type of cache between READ CACHE and WRITE CACHE. + + This extra function is always called immediately before calling + delete_all_rows on the handler for temporary tables. + There are cases however when HA_EXTRA_RESET_STATE isn't called in + a similar case for a temporary table in sql_union.cc and in two other + cases HA_EXTRA_NO_CACHE is called before and HA_EXTRA_WRITE_CACHE + called afterwards. + The case with HA_EXTRA_NO_CACHE and HA_EXTRA_WRITE_CACHE means + disable caching, delete all rows and enable WRITE CACHE. This is + used for temporary tables containing distinct sums and a + functional group. + + The only case that delete_all_rows is called on non-temporary tables + is in sql_delete.cc when DELETE FROM table; is called by a user. + In this case no special extra calls are performed before or after this + call. + + The partition handler should not need to bother about this one. It + should never be called. + + HA_EXTRA_NO_ROWS: + Don't insert rows indication to HEAP and MyISAM, only used by temporary + tables used in query processing. + Not handled by partition handler. + + 5) Parameters only used by MyISAM internally + -------------------------------------------- + HA_EXTRA_REINIT_CACHE: + This call reinitialises the READ CACHE described above if there is one + and otherwise the call is ignored. + + We can thus safely call it on all underlying handlers if they are + MyISAM handlers. It is however never called so we don't handle it at all. + HA_EXTRA_FLUSH_CACHE: + Flush WRITE CACHE in MyISAM. It is only from one place in the code. + This is in sql_insert.cc where it is called if the table_flags doesn't + contain HA_DUPP_POS. The only handler having the HA_DUPP_POS set is the + MyISAM handler and so the only handler not receiving this call is MyISAM. + Thus in effect this call is called but never used. Could be removed + from sql_insert.cc + HA_EXTRA_NO_USER_CHANGE: + Only used by MyISAM, never called. + Simulates lock_type as locked. + HA_EXTRA_WAIT_LOCK: + HA_EXTRA_WAIT_NOLOCK: + Only used by MyISAM, called from MyISAM handler but never from server + code on top of the handler. + Sets lock_wait on/off + HA_EXTRA_NO_KEYS: + Only used MyISAM, only used internally in MyISAM handler, never called + from server level. + HA_EXTRA_KEYREAD_CHANGE_POS: + HA_EXTRA_REMEMBER_POS: + HA_EXTRA_RESTORE_POS: + HA_EXTRA_PRELOAD_BUFFER_SIZE: + HA_EXTRA_CHANGE_KEY_TO_DUP: + HA_EXTRA_CHANGE_KEY_TO_UNIQUE: + Only used by MyISAM, never called. + + 6) Parameters not used at all + ----------------------------- + HA_EXTRA_KEY_CACHE: + HA_EXTRA_NO_KEY_CACHE: + This parameters are no longer used and could be removed. +*/ + +int ha_partition::extra(enum ha_extra_function operation) +{ + DBUG_ENTER("ha_partition:extra"); + DBUG_PRINT("info", ("operation: %d", (int) operation)); + + switch (operation) { + /* Category 1), used by most handlers */ + case HA_EXTRA_KEYREAD: + case HA_EXTRA_NO_KEYREAD: + case HA_EXTRA_FLUSH: + DBUG_RETURN(loop_extra(operation)); + + /* Category 2), used by non-MyISAM handlers */ + case HA_EXTRA_IGNORE_DUP_KEY: + case HA_EXTRA_NO_IGNORE_DUP_KEY: + case HA_EXTRA_RETRIEVE_ALL_COLS: + case HA_EXTRA_RETRIEVE_PRIMARY_KEY: + case HA_EXTRA_KEYREAD_PRESERVE_FIELDS: + { + if (!m_myisam) + DBUG_RETURN(loop_extra(operation)); + break; + } + + /* Category 3), used by MyISAM handlers */ + case HA_EXTRA_NORMAL: + case HA_EXTRA_QUICK: + case HA_EXTRA_NO_READCHECK: + case HA_EXTRA_PREPARE_FOR_UPDATE: + case HA_EXTRA_PREPARE_FOR_DELETE: + case HA_EXTRA_FORCE_REOPEN: + { + if (m_myisam) + DBUG_RETURN(loop_extra(operation)); + break; + } + case HA_EXTRA_CACHE: + { + prepare_extra_cache(0); + break; + } + case HA_EXTRA_NO_CACHE: + { + m_extra_cache= FALSE; + m_extra_cache_size= 0; + DBUG_RETURN(loop_extra(operation)); + } + default: + { + /* Temporary crash to discover what is wrong */ + DBUG_ASSERT(0); + break; + } + } + DBUG_RETURN(0); +} + + +/* + This will in the future be called instead of extra(HA_EXTRA_RESET) as this + is such a common call +*/ + +int ha_partition::reset(void) +{ + int result= 0, tmp; + handler **file; + DBUG_ENTER("ha_partition::reset"); + file= m_file; + do + { + if ((tmp= (*file)->reset())) + result= tmp; + } while (*(++file)); + DBUG_RETURN(result); +} + + +int ha_partition::extra_opt(enum ha_extra_function operation, ulong cachesize) +{ + DBUG_ENTER("ha_partition::extra_opt()"); + DBUG_ASSERT(HA_EXTRA_CACHE == operation); + prepare_extra_cache(cachesize); + DBUG_RETURN(0); +} + + +void ha_partition::prepare_extra_cache(uint cachesize) +{ + DBUG_ENTER("ha_partition::prepare_extra_cache()"); + + m_extra_cache= TRUE; + m_extra_cache_size= cachesize; + if (m_part_spec.start_part != NO_CURRENT_PART_ID) + { + DBUG_ASSERT(m_part_spec.start_part == 0); + late_extra_cache(0); + } + DBUG_VOID_RETURN; +} + + +int ha_partition::loop_extra(enum ha_extra_function operation) +{ + int result= 0, tmp; + handler **file; + DBUG_ENTER("ha_partition::loop_extra()"); + for (file= m_file; *file; file++) + { + if ((tmp= (*file)->extra(operation))) + result= tmp; + } + DBUG_RETURN(result); +} + + +void ha_partition::late_extra_cache(uint partition_id) +{ + handler *file; + DBUG_ENTER("ha_partition::late_extra_cache"); + if (!m_extra_cache) + DBUG_VOID_RETURN; + file= m_file[partition_id]; + if (m_extra_cache_size == 0) + VOID(file->extra(HA_EXTRA_CACHE)); + else + VOID(file->extra_opt(HA_EXTRA_CACHE, m_extra_cache_size)); + DBUG_VOID_RETURN; +} + + +void ha_partition::late_extra_no_cache(uint partition_id) +{ + handler *file; + DBUG_ENTER("ha_partition::late_extra_no_cache"); + if (!m_extra_cache) + DBUG_VOID_RETURN; + file= m_file[partition_id]; + VOID(file->extra(HA_EXTRA_NO_CACHE)); + DBUG_VOID_RETURN; +} + + +/**************************************************************************** + MODULE optimiser support +****************************************************************************/ + +const key_map *ha_partition::keys_to_use_for_scanning() +{ + DBUG_ENTER("ha_partition::keys_to_use_for_scanning"); + DBUG_RETURN(m_file[0]->keys_to_use_for_scanning()); +} + +double ha_partition::scan_time() +{ + double scan_time= 0; + handler **file; + DBUG_ENTER("ha_partition::scan_time"); + + for (file= m_file; *file; file++) + scan_time+= (*file)->scan_time(); + DBUG_RETURN(scan_time); +} + + +/* + This will be optimised later to include whether or not the index can + be used with partitioning. To achieve we need to add another parameter + that specifies how many of the index fields that are bound in the ranges. + Possibly added as a new call to handlers. +*/ + +double ha_partition::read_time(uint index, uint ranges, ha_rows rows) +{ + DBUG_ENTER("ha_partition::read_time"); + DBUG_RETURN(m_file[0]->read_time(index, ranges, rows)); +} + +/* + Given a starting key, and an ending key estimate the number of rows that + will exist between the two. end_key may be empty which in case determine + if start_key matches any rows. + + Called from opt_range.cc by check_quick_keys(). + + monty: MUST be called for each range and added. + Note that MySQL will assume that if this returns 0 there is no + matching rows for the range! +*/ + +ha_rows ha_partition::records_in_range(uint inx, key_range *min_key, + key_range *max_key) +{ + ha_rows in_range= 0; + handler **file; + DBUG_ENTER("ha_partition::records_in_range"); + + file= m_file; + do + { + in_range+= (*file)->records_in_range(inx, min_key, max_key); + } while (*(++file)); + DBUG_RETURN(in_range); +} + + +ha_rows ha_partition::estimate_rows_upper_bound() +{ + ha_rows rows, tot_rows= 0; + handler **file; + DBUG_ENTER("ha_partition::estimate_rows_upper_bound"); + + file= m_file; + do + { + rows= (*file)->estimate_rows_upper_bound(); + if (rows == HA_POS_ERROR) + DBUG_RETURN(HA_POS_ERROR); + tot_rows+= rows; + } while (*(++file)); + DBUG_RETURN(tot_rows); +} + + +uint8 ha_partition::table_cache_type() +{ + DBUG_ENTER("ha_partition::table_cache_type"); + DBUG_RETURN(m_file[0]->table_cache_type()); +} + + +/**************************************************************************** + MODULE print messages +****************************************************************************/ + +const char *ha_partition::index_type(uint inx) +{ + DBUG_ENTER("ha_partition::index_type"); + DBUG_RETURN(m_file[0]->index_type(inx)); +} + + +void ha_partition::print_error(int error, myf errflag) +{ + DBUG_ENTER("ha_partition::print_error"); + /* Should probably look for my own errors first */ + /* monty: needs to be called for the last used partition ! */ + m_file[0]->print_error(error, errflag); + DBUG_VOID_RETURN; +} + + +bool ha_partition::get_error_message(int error, String *buf) +{ + DBUG_ENTER("ha_partition::get_error_message"); + /* Should probably look for my own errors first */ + /* monty: needs to be called for the last used partition ! */ + DBUG_RETURN(m_file[0]->get_error_message(error, buf)); +} + + +/**************************************************************************** + MODULE handler characteristics +****************************************************************************/ +/* + If frm_error() is called then we will use this to to find out what file + extensions exist for the storage engine. This is also used by the default + rename_table and delete_table method in handler.cc. +*/ + +static const char *ha_partition_ext[]= +{ + ha_par_ext, NullS +}; + +const char **ha_partition::bas_ext() const +{ return ha_partition_ext; } + + +uint ha_partition::min_of_the_max_uint(uint (handler::*operator_func)(void) const) const +{ + handler **file; + uint min_of_the_max= ((*m_file)->*operator_func)(); + + for (file= m_file+1; *file; file++) + { + uint tmp= ((*file)->*operator_func)(); + set_if_smaller(min_of_the_max, tmp); + } + return min_of_the_max; +} + + +uint ha_partition::max_supported_key_parts() const +{ + return min_of_the_max_uint(&handler::max_supported_key_parts); +} + + +uint ha_partition::max_supported_key_length() const +{ + return min_of_the_max_uint(&handler::max_supported_key_length); +} + + +uint ha_partition::max_supported_key_part_length() const +{ + return min_of_the_max_uint(&handler::max_supported_key_part_length); +} + + +uint ha_partition::max_supported_record_length() const +{ + return min_of_the_max_uint(&handler::max_supported_record_length); +} + + +uint ha_partition::max_supported_keys() const +{ + return min_of_the_max_uint(&handler::max_supported_keys); +} + + +uint ha_partition::extra_rec_buf_length() const +{ + handler **file; + uint max= (*m_file)->extra_rec_buf_length(); + for (file= m_file, file++; *file; file++) + if (max < (*file)->extra_rec_buf_length()) + max= (*file)->extra_rec_buf_length(); + return max; +} + + +uint ha_partition::min_record_length(uint options) const +{ + handler **file; + uint max= (*m_file)->min_record_length(options); + for (file= m_file, file++; *file; file++) + if (max < (*file)->min_record_length(options)) + max= (*file)->min_record_length(options); + return max; +} + + +/**************************************************************************** + MODULE compare records +****************************************************************************/ +/* + We get two references and need to check if those records are the same. + If they belong to different partitions we decide that they are not + the same record. Otherwise we use the particular handler to decide if + they are the same. Sort in partition id order if not equal. +*/ + +int ha_partition::cmp_ref(const byte *ref1, const byte *ref2) +{ + uint part_id; + my_ptrdiff_t diff1, diff2; + handler *file; + DBUG_ENTER("ha_partition::cmp_ref"); + if ((ref1[0] == ref2[0]) && (ref1[1] == ref2[1])) + { + part_id= get_part_id_from_pos(ref1); + file= m_file[part_id]; + DBUG_ASSERT(part_id < m_tot_parts); + DBUG_RETURN(file->cmp_ref((ref1 + PARTITION_BYTES_IN_POS), + (ref2 + PARTITION_BYTES_IN_POS))); + } + diff1= ref2[1] - ref1[1]; + diff2= ref2[0] - ref1[0]; + if (diff1 > 0) + { + DBUG_RETURN(-1); + } + if (diff1 < 0) + { + DBUG_RETURN(+1); + } + if (diff2 > 0) + { + DBUG_RETURN(-1); + } + DBUG_RETURN(+1); +} + + +/**************************************************************************** + MODULE auto increment +****************************************************************************/ + +void ha_partition::restore_auto_increment() +{ + DBUG_ENTER("ha_partition::restore_auto_increment"); + DBUG_VOID_RETURN; +} + + +/* + This method is called by update_auto_increment which in turn is called + by the individual handlers as part of write_row. We will always let + the first handler keep track of the auto increment value for all + partitions. +*/ + +ulonglong ha_partition::get_auto_increment() +{ + DBUG_ENTER("ha_partition::get_auto_increment"); + DBUG_RETURN(m_file[0]->get_auto_increment()); +} + + +/**************************************************************************** + MODULE initialise handler for HANDLER call +****************************************************************************/ + +void ha_partition::init_table_handle_for_HANDLER() +{ + return; +} + + +/**************************************************************************** + MODULE Partition Share +****************************************************************************/ +/* + Service routines for ... methods. +------------------------------------------------------------------------- + Variables for partition share methods. A hash used to track open tables. + A mutex for the hash table and an init variable to check if hash table + is initialised. + There is also a constant ending of the partition handler file name. +*/ + +#ifdef NOT_USED +static HASH partition_open_tables; +static pthread_mutex_t partition_mutex; +static int partition_init= 0; + + +/* + Function we use in the creation of our hash to get key. +*/ +static byte *partition_get_key(PARTITION_SHARE *share, uint *length, + my_bool not_used __attribute__ ((unused))) +{ + *length= share->table_name_length; + return (byte *) share->table_name; +} + +/* + Example of simple lock controls. The "share" it creates is structure we + will pass to each partition handler. Do you have to have one of these? + Well, you have pieces that are used for locking, and they are needed to + function. +*/ + + +static PARTITION_SHARE *get_share(const char *table_name, TABLE *table) +{ + PARTITION_SHARE *share; + uint length; + char *tmp_name; + + /* + So why does this exist? There is no way currently to init a storage + engine. + Innodb and BDB both have modifications to the server to allow them to + do this. Since you will not want to do this, this is probably the next + best method. + */ + if (!partition_init) + { + /* Hijack a mutex for init'ing the storage engine */ + pthread_mutex_lock(&LOCK_mysql_create_db); + if (!partition_init) + { + partition_init++; + VOID(pthread_mutex_init(&partition_mutex, MY_MUTEX_INIT_FAST)); + (void) hash_init(&partition_open_tables, system_charset_info, 32, 0, 0, + (hash_get_key) partition_get_key, 0, 0); + } + pthread_mutex_unlock(&LOCK_mysql_create_db); + } + pthread_mutex_lock(&partition_mutex); + length= (uint) strlen(table_name); + + if (!(share= (PARTITION_SHARE *) hash_search(&partition_open_tables, + (byte *) table_name, length))) + { + if (!(share= (PARTITION_SHARE *) + my_multi_malloc(MYF(MY_WME | MY_ZEROFILL), + &share, sizeof(*share), + &tmp_name, length + 1, NullS))) + { + pthread_mutex_unlock(&partition_mutex); + return NULL; + } + + share->use_count= 0; + share->table_name_length= length; + share->table_name= tmp_name; + strmov(share->table_name, table_name); + if (my_hash_insert(&partition_open_tables, (byte *) share)) + goto error; + thr_lock_init(&share->lock); + pthread_mutex_init(&share->mutex, MY_MUTEX_INIT_FAST); + } + share->use_count++; + pthread_mutex_unlock(&partition_mutex); + + return share; + +error: + pthread_mutex_unlock(&partition_mutex); + my_free((gptr) share, MYF(0)); + + return NULL; +} + + +/* + Free lock controls. We call this whenever we close a table. If the table + had the last reference to the share then we free memory associated with + it. +*/ + +static int free_share(PARTITION_SHARE *share) +{ + pthread_mutex_lock(&partition_mutex); + if (!--share->use_count) + { + hash_delete(&partition_open_tables, (byte *) share); + thr_lock_delete(&share->lock); + pthread_mutex_destroy(&share->mutex); + my_free((gptr) share, MYF(0)); + } + pthread_mutex_unlock(&partition_mutex); + + return 0; +} +#endif /* NOT_USED */ diff --git a/sql/ha_partition.h b/sql/ha_partition.h new file mode 100644 index 00000000000..fb180fdf666 --- /dev/null +++ b/sql/ha_partition.h @@ -0,0 +1,923 @@ +/* Copyright (C) 2005 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifdef __GNUC__ +#pragma interface /* gcc class implementation */ +#endif + +/* + PARTITION_SHARE is a structure that will be shared amoung all open handlers + The partition implements the minimum of what you will probably need. +*/ + +typedef struct st_partition_share +{ + char *table_name; + uint table_name_length, use_count; + pthread_mutex_t mutex; + THR_LOCK lock; +} PARTITION_SHARE; + + +#define PARTITION_BYTES_IN_POS 2 +class ha_partition :public handler +{ +private: + enum partition_index_scan_type + { + partition_index_read= 0, + partition_index_first= 1, + partition_index_last= 2, + partition_no_index_scan= 3 + }; + /* Data for the partition handler */ + char *m_file_buffer; // Buffer with names + char *m_name_buffer_ptr; // Pointer to first partition name + uchar *m_engine_array; // Array of types of the handlers + handler **m_file; // Array of references to handler inst. + partition_info *m_part_info; // local reference to partition + byte *m_start_key_ref; // Reference of start key in current + // index scan info + Field **m_part_field_array; // Part field array locally to save acc + byte *m_ordered_rec_buffer; // Row and key buffer for ord. idx scan + KEY *m_curr_key_info; // Current index + byte *m_rec0; // table->record[0] + QUEUE queue; // Prio queue used by sorted read + /* + Since the partition handler is a handler on top of other handlers, it + is necessary to keep information about what the underlying handler + characteristics is. It is not possible to keep any handler instances + for this since the MySQL Server sometimes allocating the handler object + without freeing them. + */ + u_long m_table_flags; + u_long m_low_byte_first; + + uint m_tot_parts; // Total number of partitions; + uint m_no_locks; // For engines like ha_blackhole, which needs no locks + uint m_last_part; // Last file that we update,write + int m_lock_type; // Remembers type of last + // external_lock + part_id_range m_part_spec; // Which parts to scan + uint m_scan_value; // Value passed in rnd_init + // call + uint m_ref_length; // Length of position in this + // handler object + key_range m_start_key; // index read key range + enum partition_index_scan_type m_index_scan_type;// What type of index + // scan + uint m_top_entry; // Which partition is to + // deliver next result + uint m_rec_length; // Local copy of record length + + bool m_ordered; // Ordered/Unordered index scan + bool m_has_transactions; // Can we support transactions + bool m_pkey_is_clustered; // Is primary key clustered + bool m_create_handler; // Handler used to create table + bool m_is_sub_partitioned; // Is subpartitioned + bool m_ordered_scan_ongoing; + bool m_use_bit_array; + + /* + We keep track if all underlying handlers are MyISAM since MyISAM has a + great number of extra flags not needed by other handlers. + */ + bool m_myisam; // Are all underlying handlers + // MyISAM + /* + We keep track of InnoDB handlers below since it requires proper setting + of query_id in fields at index_init and index_read calls. + */ + bool m_innodb; // Are all underlying handlers + // InnoDB + /* + When calling extra(HA_EXTRA_CACHE) we do not pass this to the underlying + handlers immediately. Instead we cache it and call the underlying + immediately before starting the scan on the partition. This is to + prevent allocating a READ CACHE for each partition in parallel when + performing a full table scan on MyISAM partitioned table. + This state is cleared by extra(HA_EXTRA_NO_CACHE). + */ + bool m_extra_cache; + uint m_extra_cache_size; + + void init_handler_variables(); + /* + Variables for lock structures. + */ + THR_LOCK_DATA lock; /* MySQL lock */ + PARTITION_SHARE *share; /* Shared lock info */ + +public: + /* + ------------------------------------------------------------------------- + MODULE create/delete handler object + ------------------------------------------------------------------------- + Object create/delete methode. The normal called when a table object + exists. There is also a method to create the handler object with only + partition information. This is used from mysql_create_table when the + table is to be created and the engine type is deduced to be the + partition handler. + ------------------------------------------------------------------------- + */ + ha_partition(TABLE * table); + ha_partition(partition_info * part_info); + ~ha_partition(); + /* + A partition handler has no characteristics in itself. It only inherits + those from the underlying handlers. Here we set-up those constants to + enable later calls of the methods to retrieve constants from the under- + lying handlers. Returns false if not successful. + */ + int ha_initialise(); + + /* + ------------------------------------------------------------------------- + MODULE meta data changes + ------------------------------------------------------------------------- + Meta data routines to CREATE, DROP, RENAME table and often used at + ALTER TABLE (update_create_info used from ALTER TABLE and SHOW ..). + + update_table_comment is used in SHOW TABLE commands to provide a + chance for the handler to add any interesting comments to the table + comments not provided by the users comment. + + create_handler_files is called before opening a new handler object + with openfrm to call create. It is used to create any local handler + object needed in opening the object in openfrm + ------------------------------------------------------------------------- + */ + virtual int delete_table(const char *from); + virtual int rename_table(const char *from, const char *to); + virtual int create(const char *name, TABLE * form, + HA_CREATE_INFO * create_info); + virtual int create_handler_files(const char *name); + virtual void update_create_info(HA_CREATE_INFO * create_info); + virtual char *update_table_comment(const char *comment); + virtual int drop_partitions(const char *path); +private: + /* + delete_table, rename_table and create uses very similar logic which + is packed into this routine. + */ + uint del_ren_cre_table(const char *from, + const char *to= NULL, + TABLE * table_arg= NULL, + HA_CREATE_INFO * create_info= NULL); + /* + One method to create the table_name.par file containing the names of the + underlying partitions, their engine and the number of partitions. + And one method to read it in. + */ + bool create_handler_file(const char *name); + bool get_from_handler_file(const char *name); + bool new_handlers_from_part_info(); + bool create_handlers(); + void clear_handler_file(); + void set_up_table_before_create(TABLE * table_arg, HA_CREATE_INFO * info, + uint part_id); + partition_element *find_partition_element(uint part_id); +public: + + /* + ------------------------------------------------------------------------- + MODULE open/close object + ------------------------------------------------------------------------- + Open and close handler object to ensure all underlying files and + objects allocated and deallocated for query handling is handled + properly. + ------------------------------------------------------------------------- + + A handler object is opened as part of its initialisation and before + being used for normal queries (not before meta-data changes always. + If the object was opened it will also be closed before being deleted. + */ + virtual int open(const char *name, int mode, uint test_if_locked); + virtual int close(void); + + /* + ------------------------------------------------------------------------- + MODULE start/end statement + ------------------------------------------------------------------------- + This module contains methods that are used to understand start/end of + statements, transaction boundaries, and aid for proper concurrency + control. + The partition handler need not implement abort and commit since this + will be handled by any underlying handlers implementing transactions. + There is only one call to each handler type involved per transaction + and these go directly to the handlers supporting transactions + currently InnoDB, BDB and NDB). + ------------------------------------------------------------------------- + */ + virtual THR_LOCK_DATA **store_lock(THD * thd, THR_LOCK_DATA ** to, + enum thr_lock_type lock_type); + virtual int external_lock(THD * thd, int lock_type); + /* + When table is locked a statement is started by calling start_stmt + instead of external_lock + */ + virtual int start_stmt(THD * thd, thr_lock_type lock_type); + /* + Lock count is number of locked underlying handlers (I assume) + */ + virtual uint lock_count(void) const; + /* + Call to unlock rows not to be updated in transaction + */ + virtual void unlock_row(); + + /* + ------------------------------------------------------------------------- + MODULE change record + ------------------------------------------------------------------------- + This part of the handler interface is used to change the records + after INSERT, DELETE, UPDATE, REPLACE method calls but also other + special meta-data operations as ALTER TABLE, LOAD DATA, TRUNCATE. + ------------------------------------------------------------------------- + + These methods are used for insert (write_row), update (update_row) + and delete (delete_row). All methods to change data always work on + one row at a time. update_row and delete_row also contains the old + row. + delete_all_rows will delete all rows in the table in one call as a + special optimisation for DELETE from table; + + Bulk inserts are supported if all underlying handlers support it. + start_bulk_insert and end_bulk_insert is called before and after a + number of calls to write_row. + Not yet though. + */ + virtual int write_row(byte * buf); + virtual int update_row(const byte * old_data, byte * new_data); + virtual int delete_row(const byte * buf); + virtual int delete_all_rows(void); + virtual void start_bulk_insert(ha_rows rows); + virtual int end_bulk_insert(); + + /* + ------------------------------------------------------------------------- + MODULE full table scan + ------------------------------------------------------------------------- + This module is used for the most basic access method for any table + handler. This is to fetch all data through a full table scan. No + indexes are needed to implement this part. + It contains one method to start the scan (rnd_init) that can also be + called multiple times (typical in a nested loop join). Then proceeding + to the next record (rnd_next) and closing the scan (rnd_end). + To remember a record for later access there is a method (position) + and there is a method used to retrieve the record based on the stored + position. + The position can be a file position, a primary key, a ROWID dependent + on the handler below. + ------------------------------------------------------------------------- + */ + /* + unlike index_init(), rnd_init() can be called two times + without rnd_end() in between (it only makes sense if scan=1). + then the second call should prepare for the new table scan + (e.g if rnd_init allocates the cursor, second call should + position it to the start of the table, no need to deallocate + and allocate it again + */ + virtual int rnd_init(bool scan); + virtual int rnd_end(); + virtual int rnd_next(byte * buf); + virtual int rnd_pos(byte * buf, byte * pos); + virtual void position(const byte * record); + + /* + ------------------------------------------------------------------------- + MODULE index scan + ------------------------------------------------------------------------- + This part of the handler interface is used to perform access through + indexes. The interface is defined as a scan interface but the handler + can also use key lookup if the index is a unique index or a primary + key index. + Index scans are mostly useful for SELECT queries but are an important + part also of UPDATE, DELETE, REPLACE and CREATE TABLE table AS SELECT + and so forth. + Naturally an index is needed for an index scan and indexes can either + be ordered, hash based. Some ordered indexes can return data in order + but not necessarily all of them. + There are many flags that define the behavior of indexes in the + various handlers. These methods are found in the optimizer module. + ------------------------------------------------------------------------- + + index_read is called to start a scan of an index. The find_flag defines + the semantics of the scan. These flags are defined in + include/my_base.h + index_read_idx is the same but also initializes index before calling doing + the same thing as index_read. Thus it is similar to index_init followed + by index_read. This is also how we implement it. + + index_read/index_read_idx does also return the first row. Thus for + key lookups, the index_read will be the only call to the handler in + the index scan. + + index_init initializes an index before using it and index_end does + any end processing needed. + */ + virtual int index_read(byte * buf, const byte * key, + uint key_len, enum ha_rkey_function find_flag); + virtual int index_read_idx(byte * buf, uint idx, const byte * key, + uint key_len, enum ha_rkey_function find_flag); + virtual int index_init(uint idx, bool sorted); + virtual int index_end(); + + /* + These methods are used to jump to next or previous entry in the index + scan. There are also methods to jump to first and last entry. + */ + virtual int index_next(byte * buf); + virtual int index_prev(byte * buf); + virtual int index_first(byte * buf); + virtual int index_last(byte * buf); + virtual int index_next_same(byte * buf, const byte * key, uint keylen); + virtual int index_read_last(byte * buf, const byte * key, uint keylen); + + /* + read_first_row is virtual method but is only implemented by + handler.cc, no storage engine has implemented it so neither + will the partition handler. + + virtual int read_first_row(byte *buf, uint primary_key); + */ + + /* + We don't implement multi read range yet, will do later. + virtual int read_multi_range_first(KEY_MULTI_RANGE **found_range_p, + KEY_MULTI_RANGE *ranges, uint range_count, + bool sorted, HANDLER_BUFFER *buffer); + virtual int read_multi_range_next(KEY_MULTI_RANGE **found_range_p); + */ + + + virtual int read_range_first(const key_range * start_key, + const key_range * end_key, + bool eq_range, bool sorted); + virtual int read_range_next(); + +private: + int common_index_read(byte * buf, const byte * key, + uint key_len, enum ha_rkey_function find_flag); + int common_first_last(byte * buf); + int partition_scan_set_up(byte * buf, bool idx_read_flag); + int handle_unordered_next(byte * buf, bool next_same); + int handle_unordered_scan_next_partition(byte * buf); + byte *queue_buf(uint part_id) + { + return (m_ordered_rec_buffer + + (part_id * (m_rec_length + PARTITION_BYTES_IN_POS))); + } + byte *rec_buf(uint part_id) + { + return (queue_buf(part_id) + + PARTITION_BYTES_IN_POS); + } + int handle_ordered_index_scan(byte * buf); + int handle_ordered_next(byte * buf, bool next_same); + int handle_ordered_prev(byte * buf); + void return_top_record(byte * buf); + void include_partition_fields_in_used_fields(); +public: + /* + ------------------------------------------------------------------------- + MODULE information calls + ------------------------------------------------------------------------- + This calls are used to inform the handler of specifics of the ongoing + scans and other actions. Most of these are used for optimisation + purposes. + ------------------------------------------------------------------------- + */ + virtual void info(uint); + virtual int extra(enum ha_extra_function operation); + virtual int extra_opt(enum ha_extra_function operation, ulong cachesize); + virtual int reset(void); + +private: + static const uint NO_CURRENT_PART_ID= 0xFFFFFFFF; + int loop_extra(enum ha_extra_function operation); + void late_extra_cache(uint partition_id); + void late_extra_no_cache(uint partition_id); + void prepare_extra_cache(uint cachesize); +public: + + /* + ------------------------------------------------------------------------- + MODULE optimiser support + ------------------------------------------------------------------------- + ------------------------------------------------------------------------- + */ + + /* + NOTE !!!!!! + ------------------------------------------------------------------------- + ------------------------------------------------------------------------- + One important part of the public handler interface that is not depicted in + the methods is the attribute records + + which is defined in the base class. This is looked upon directly and is + set by calling info(HA_STATUS_INFO) ? + ------------------------------------------------------------------------- + */ + + /* + keys_to_use_for_scanning can probably be implemented as the + intersection of all underlying handlers if mixed handlers are used. + This method is used to derive whether an index can be used for + index-only scanning when performing an ORDER BY query. + Only called from one place in sql_select.cc + */ + virtual const key_map *keys_to_use_for_scanning(); + + /* + Called in test_quick_select to determine if indexes should be used. + */ + virtual double scan_time(); + + /* + The next method will never be called if you do not implement indexes. + */ + virtual double read_time(uint index, uint ranges, ha_rows rows); + /* + For the given range how many records are estimated to be in this range. + Used by optimiser to calculate cost of using a particular index. + */ + virtual ha_rows records_in_range(uint inx, key_range * min_key, + key_range * max_key); + + /* + Upper bound of number records returned in scan is sum of all + underlying handlers. + */ + virtual ha_rows estimate_rows_upper_bound(); + + /* + table_cache_type is implemented by the underlying handler but all + underlying handlers must have the same implementation for it to work. + */ + virtual uint8 table_cache_type(); + + /* + ------------------------------------------------------------------------- + MODULE print messages + ------------------------------------------------------------------------- + This module contains various methods that returns text messages for + table types, index type and error messages. + ------------------------------------------------------------------------- + */ + /* + The name of the index type that will be used for display + Here we must ensure that all handlers use the same index type + for each index created. + */ + virtual const char *index_type(uint inx); + + /* The name of the table type that will be used for display purposes */ + virtual const char *table_type() const + { return "PARTITION"; } + + /* + Handler specific error messages + */ + virtual void print_error(int error, myf errflag); + virtual bool get_error_message(int error, String * buf); + /* + ------------------------------------------------------------------------- + MODULE handler characteristics + ------------------------------------------------------------------------- + This module contains a number of methods defining limitations and + characteristics of the handler. The partition handler will calculate + this characteristics based on underlying handler characteristics. + ------------------------------------------------------------------------- + + This is a list of flags that says what the storage engine + implements. The current table flags are documented in handler.h + The partition handler will support whatever the underlying handlers + support except when specifically mentioned below about exceptions + to this rule. + + HA_READ_RND_SAME: + Not currently used. (Means that the handler supports the rnd_same() call) + (MyISAM, HEAP) + + HA_TABLE_SCAN_ON_INDEX: + Used to avoid scanning full tables on an index. If this flag is set then + the handler always has a primary key (hidden if not defined) and this + index is used for scanning rather than a full table scan in all + situations. + (InnoDB, BDB, Federated) + + HA_REC_NOT_IN_SEQ: + This flag is set for handlers that cannot guarantee that the rows are + returned accroding to incremental positions (0, 1, 2, 3...). + This also means that rnd_next() should return HA_ERR_RECORD_DELETED + if it finds a deleted row. + (MyISAM (not fixed length row), BDB, HEAP, NDB, InooDB) + + HA_CAN_GEOMETRY: + Can the storage engine handle spatial data. + Used to check that no spatial attributes are declared unless + the storage engine is capable of handling it. + (MyISAM) + + HA_FAST_KEY_READ: + Setting this flag indicates that the handler is equally fast in + finding a row by key as by position. + This flag is used in a very special situation in conjunction with + filesort's. For further explanation see intro to init_read_record. + (BDB, HEAP, InnoDB) + + HA_NULL_IN_KEY: + Is NULL values allowed in indexes. + If this is not allowed then it is not possible to use an index on a + NULLable field. + (BDB, HEAP, MyISAM, NDB, InnoDB) + + HA_DUPP_POS: + Tells that we can the position for the conflicting duplicate key + record is stored in table->file->dupp_ref. (insert uses rnd_pos() on + this to find the duplicated row) + (MyISAM) + + HA_CAN_INDEX_BLOBS: + Is the storage engine capable of defining an index of a prefix on + a BLOB attribute. + (BDB, Federated, MyISAM, InnoDB) + + HA_AUTO_PART_KEY: + Auto increment fields can be part of a multi-part key. For second part + auto-increment keys, the auto_incrementing is done in handler.cc + (BDB, Federated, MyISAM, NDB) + + HA_REQUIRE_PRIMARY_KEY: + Can't define a table without primary key (and cannot handle a table + with hidden primary key) + (No handler has this limitation currently) + + HA_NOT_EXACT_COUNT: + Does the counter of records after the info call specify an exact + value or not. If it doesn't this flag is set. + Only MyISAM and HEAP uses exact count. + (MyISAM, HEAP, BDB, InnoDB, NDB, Federated) + + HA_CAN_INSERT_DELAYED: + Can the storage engine support delayed inserts. + To start with the partition handler will not support delayed inserts. + Further investigation needed. + (HEAP, MyISAM) + + HA_PRIMARY_KEY_IN_READ_INDEX: + This parameter is set when the handler will also return the primary key + when doing read-only-key on another index. + + HA_NOT_DELETE_WITH_CACHE: + Seems to be an old MyISAM feature that is no longer used. No handler + has it defined but it is checked in init_read_record. + Further investigation needed. + (No handler defines it) + + HA_NO_PREFIX_CHAR_KEYS: + Indexes on prefixes of character fields is not allowed. + (NDB) + + HA_CAN_FULLTEXT: + Does the storage engine support fulltext indexes + The partition handler will start by not supporting fulltext indexes. + (MyISAM) + + HA_CAN_SQL_HANDLER: + Can the HANDLER interface in the MySQL API be used towards this + storage engine. + (MyISAM, InnoDB) + + HA_NO_AUTO_INCREMENT: + Set if the storage engine does not support auto increment fields. + (Currently not set by any handler) + + HA_HAS_CHECKSUM: + Special MyISAM feature. Has special SQL support in CREATE TABLE. + No special handling needed by partition handler. + (MyISAM) + + HA_FILE_BASED: + Should file names always be in lower case (used by engines + that map table names to file names. + Since partition handler has a local file this flag is set. + (BDB, Federated, MyISAM) + + HA_CAN_BIT_FIELD: + Is the storage engine capable of handling bit fields? + (MyISAM, NDB) + + HA_NEED_READ_RANGE_BUFFER: + Is Read Multi-Range supported => need multi read range buffer + This parameter specifies whether a buffer for read multi range + is needed by the handler. Whether the handler supports this + feature or not is dependent of whether the handler implements + read_multi_range* calls or not. The only handler currently + supporting this feature is NDB so the partition handler need + not handle this call. There are methods in handler.cc that will + transfer those calls into index_read and other calls in the + index scan module. + (NDB) + */ + virtual ulong alter_table_flags(void) const + { + //return HA_ONLINE_ADD_EMPTY_PARTITION + HA_ONLINE_DROP_PARTITION; + return HA_ONLINE_DROP_PARTITION; + } + virtual ulong table_flags() const + { return m_table_flags; } + /* + HA_CAN_PARTITION: + Used by storage engines that can handle partitioning without this + partition handler + (Partition, NDB) + + HA_CAN_UPDATE_PARTITION_KEY: + Set if the handler can update fields that are part of the partition + function. + + HA_CAN_PARTITION_UNIQUE: + Set if the handler can handle unique indexes where the fields of the + unique key are not part of the fields of the partition function. Thus + a unique key can be set on all fields. + */ + virtual ulong partition_flags() const + { return HA_CAN_PARTITION; } + + /* + This is a bitmap of flags that says how the storage engine + implements indexes. The current index flags are documented in + handler.h. If you do not implement indexes, just return zero + here. + + part is the key part to check. First key part is 0 + If all_parts it's set, MySQL want to know the flags for the combined + index up to and including 'part'. + + HA_READ_NEXT: + Does the index support read next, this is assumed in the server + code and never checked so all indexes must support this. + Note that the handler can be used even if it doesn't have any index. + (BDB, HEAP, MyISAM, Federated, NDB, InnoDB) + + HA_READ_PREV: + Can the index be used to scan backwards. + (BDB, HEAP, MyISAM, NDB, InnoDB) + + HA_READ_ORDER: + Can the index deliver its record in index order. Typically true for + all ordered indexes and not true for hash indexes. + In first step this is not true for partition handler until a merge + sort has been implemented in partition handler. + Used to set keymap part_of_sortkey + This keymap is only used to find indexes usable for resolving an ORDER BY + in the query. Thus in most cases index_read will work just fine without + order in result production. When this flag is set it is however safe to + order all output started by index_read since most engines do this. With + read_multi_range calls there is a specific flag setting order or not + order so in those cases ordering of index output can be avoided. + (BDB, InnoDB, HEAP, MyISAM, NDB) + + HA_READ_RANGE: + Specify whether index can handle ranges, typically true for all + ordered indexes and not true for hash indexes. + Used by optimiser to check if ranges (as key >= 5) can be optimised + by index. + (BDB, InnoDB, NDB, MyISAM, HEAP) + + HA_ONLY_WHOLE_INDEX: + Can't use part key searches. This is typically true for hash indexes + and typically not true for ordered indexes. + (Federated, NDB, HEAP) + + HA_KEYREAD_ONLY: + Does the storage engine support index-only scans on this index. + Enables use of HA_EXTRA_KEYREAD and HA_EXTRA_NO_KEYREAD + Used to set key_map keys_for_keyread and to check in optimiser for + index-only scans. When doing a read under HA_EXTRA_KEYREAD the handler + only have to fill in the columns the key covers. If + HA_PRIMARY_KEY_IN_READ_INDEX is set then also the PRIMARY KEY columns + must be updated in the row. + (BDB, InnoDB, MyISAM) + */ + virtual ulong index_flags(uint inx, uint part, bool all_parts) const + { + return m_file[0]->index_flags(inx, part, all_parts); + } + + /* + extensions of table handler files + */ + virtual const char **bas_ext() const; + /* + unireg.cc will call the following to make sure that the storage engine + can handle the data it is about to send. + + The maximum supported values is the minimum of all handlers in the table + */ + uint min_of_the_max_uint(uint (handler::*operator_func)(void) const) const; + virtual uint max_supported_record_length() const; + virtual uint max_supported_keys() const; + virtual uint max_supported_key_parts() const; + virtual uint max_supported_key_length() const; + virtual uint max_supported_key_part_length() const; + + /* + All handlers in a partitioned table must have the same low_byte_first + */ + virtual bool low_byte_first() const + { return m_low_byte_first; } + + /* + The extra record buffer length is the maximum needed by all handlers. + The minimum record length is the maximum of all involved handlers. + */ + virtual uint extra_rec_buf_length() const; + virtual uint min_record_length(uint options) const; + + /* + Transactions on the table is supported if all handlers below support + transactions. + */ + virtual bool has_transactions() + { return m_has_transactions; } + + /* + Primary key is clustered can only be true if all underlying handlers have + this feature. + */ + virtual bool primary_key_is_clustered() + { return m_pkey_is_clustered; } + + /* + ------------------------------------------------------------------------- + MODULE compare records + ------------------------------------------------------------------------- + cmp_ref checks if two references are the same. For most handlers this is + a simple memcmp of the reference. However some handlers use primary key + as reference and this can be the same even if memcmp says they are + different. This is due to character sets and end spaces and so forth. + For the partition handler the reference is first two bytes providing the + partition identity of the referred record and then the reference of the + underlying handler. + Thus cmp_ref for the partition handler always returns FALSE for records + not in the same partition and uses cmp_ref on the underlying handler + to check whether the rest of the reference part is also the same. + ------------------------------------------------------------------------- + */ + virtual int cmp_ref(const byte * ref1, const byte * ref2); + /* + ------------------------------------------------------------------------- + MODULE auto increment + ------------------------------------------------------------------------- + This module is used to handle the support of auto increments. + + This variable in the handler is used as part of the handler interface + It is maintained by the parent handler object and should not be + touched by child handler objects (see handler.cc for its use). + + auto_increment_column_changed + ------------------------------------------------------------------------- + */ + virtual void restore_auto_increment(); + virtual ulonglong get_auto_increment(); + + /* + ------------------------------------------------------------------------- + MODULE initialise handler for HANDLER call + ------------------------------------------------------------------------- + This method is a special InnoDB method called before a HANDLER query. + ------------------------------------------------------------------------- + */ + virtual void init_table_handle_for_HANDLER(); + + /* + The remainder of this file defines the handler methods not implemented + by the partition handler + */ + + /* + ------------------------------------------------------------------------- + MODULE foreign key support + ------------------------------------------------------------------------- + The following methods are used to implement foreign keys as supported by + InnoDB. Implement this ?? + get_foreign_key_create_info is used by SHOW CREATE TABLE to get a textual + description of how the CREATE TABLE part to define FOREIGN KEY's is done. + free_foreign_key_create_info is used to free the memory area that provided + this description. + ------------------------------------------------------------------------- + + virtual char* get_foreign_key_create_info() + virtual void free_foreign_key_create_info(char* str) + + virtual int get_foreign_key_list(THD *thd, + List<FOREIGN_KEY_INFO> *f_key_list) + virtual uint referenced_by_foreign_key() + */ + + /* + ------------------------------------------------------------------------- + MODULE fulltext index + ------------------------------------------------------------------------- + Fulltext stuff not yet. + ------------------------------------------------------------------------- + virtual int ft_init() { return HA_ERR_WRONG_COMMAND; } + virtual FT_INFO *ft_init_ext(uint flags,uint inx,const byte *key, + uint keylen) + { return NULL; } + virtual int ft_read(byte *buf) { return HA_ERR_WRONG_COMMAND; } + */ + + /* + ------------------------------------------------------------------------- + MODULE restart full table scan at position (MyISAM) + ------------------------------------------------------------------------- + The following method is only used by MyISAM when used as + temporary tables in a join. + virtual int restart_rnd_next(byte *buf, byte *pos); + */ + + /* + ------------------------------------------------------------------------- + MODULE on-line ALTER TABLE + ------------------------------------------------------------------------- + These methods are in the handler interface but never used (yet) + They are to be used by on-line alter table add/drop index: + ------------------------------------------------------------------------- + virtual ulong index_ddl_flags(KEY *wanted_index) const + virtual int add_index(TABLE *table_arg,KEY *key_info,uint num_of_keys); + virtual int drop_index(TABLE *table_arg,uint *key_num,uint num_of_keys); + */ + + /* + ------------------------------------------------------------------------- + MODULE tablespace support + ------------------------------------------------------------------------- + Admin of table spaces is not applicable to the partition handler (InnoDB) + This means that the following method is not implemented: + ------------------------------------------------------------------------- + virtual int discard_or_import_tablespace(my_bool discard) + */ + + /* + ------------------------------------------------------------------------- + MODULE admin MyISAM + ------------------------------------------------------------------------- + Admin commands not supported currently (almost purely MyISAM routines) + This means that the following methods are not implemented: + ------------------------------------------------------------------------- + + virtual int check(THD* thd, HA_CHECK_OPT *check_opt); + virtual int backup(TD* thd, HA_CHECK_OPT *check_opt); + virtual int restore(THD* thd, HA_CHECK_OPT *check_opt); + virtual int repair(THD* thd, HA_CHECK_OPT *check_opt); + virtual int optimize(THD* thd, HA_CHECK_OPT *check_opt); + virtual int analyze(THD* thd, HA_CHECK_OPT *check_opt); + virtual int assign_to_keycache(THD* thd, HA_CHECK_OPT *check_opt); + virtual int preload_keys(THD *thd, HA_CHECK_OPT *check_opt); + virtual bool check_and_repair(THD *thd); + virtual int dump(THD* thd, int fd = -1); + virtual int net_read_dump(NET* net); + virtual uint checksum() const; + virtual bool is_crashed() const; + virtual bool auto_repair() const; + + ------------------------------------------------------------------------- + MODULE enable/disable indexes + ------------------------------------------------------------------------- + Enable/Disable Indexes are not supported currently (Heap, MyISAM) + This means that the following methods are not implemented: + ------------------------------------------------------------------------- + virtual int disable_indexes(uint mode); + virtual int enable_indexes(uint mode); + virtual int indexes_are_disabled(void); + */ + + /* + ------------------------------------------------------------------------- + MODULE append_create_info + ------------------------------------------------------------------------- + append_create_info is only used by MyISAM MERGE tables and the partition + handler will not support this handler as underlying handler. + Implement this?? + ------------------------------------------------------------------------- + virtual void append_create_info(String *packet) + */ +}; diff --git a/sql/handler.cc b/sql/handler.cc index 477a6f9bdae..0977fb311d3 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -27,102 +27,19 @@ #include "ha_myisammrg.h" -/* - We have dummy hanldertons in case the handler has not been compiled - in. This will be removed in 5.1. -*/ -#ifdef HAVE_BERKELEY_DB -#include "ha_berkeley.h" -extern handlerton berkeley_hton; -#else -handlerton berkeley_hton = { "BerkeleyDB", SHOW_OPTION_NO, - "Supports transactions and page-level locking", DB_TYPE_BERKELEY_DB, NULL, - 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, HTON_NO_FLAGS }; -#endif -#ifdef HAVE_BLACKHOLE_DB -#include "ha_blackhole.h" -extern handlerton blackhole_hton; -#else -handlerton blackhole_hton = { "BLACKHOLE", SHOW_OPTION_NO, - "/dev/null storage engine (anything you write to it disappears)", - DB_TYPE_BLACKHOLE_DB, NULL, 0, 0, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - HTON_NO_FLAGS }; -#endif -#ifdef HAVE_EXAMPLE_DB -#include "examples/ha_example.h" -extern handlerton example_hton; -#else -handlerton example_hton = { "EXAMPLE", SHOW_OPTION_NO, - "Example storage engine", - DB_TYPE_EXAMPLE_DB, NULL, 0, 0, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - HTON_NO_FLAGS }; -#endif -#ifdef HAVE_ARCHIVE_DB -#include "ha_archive.h" -extern handlerton archive_hton; -#else -handlerton archive_hton = { "ARCHIVE", SHOW_OPTION_NO, - "Archive storage engine", DB_TYPE_ARCHIVE_DB, NULL, 0, 0, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - HTON_NO_FLAGS }; -#endif -#ifdef HAVE_CSV_DB -#include "examples/ha_tina.h" -extern handlerton tina_hton; -#else -handlerton tina_hton = { "CSV", SHOW_OPTION_NO, "CSV storage engine", - DB_TYPE_CSV_DB, NULL, 0, 0, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - HTON_NO_FLAGS }; -#endif -#ifdef HAVE_INNOBASE_DB -#include "ha_innodb.h" -extern handlerton innobase_hton; -#else -handlerton innobase_hton = { "InnoDB", SHOW_OPTION_NO, - "Supports transactions, row-level locking, and foreign keys", - DB_TYPE_INNODB, NULL, 0, 0, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - HTON_NO_FLAGS }; -#endif -#ifdef HAVE_NDBCLUSTER_DB +#include <myisampack.h> +#include <errno.h> + +#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE +#define NDB_MAX_ATTRIBUTES_IN_TABLE 128 #include "ha_ndbcluster.h" -extern handlerton ndbcluster_hton; -#else -handlerton ndbcluster_hton = { "ndbcluster", SHOW_OPTION_NO, - "Clustered, fault-tolerant, memory-based tables", - DB_TYPE_NDBCLUSTER, NULL, 0, 0, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - HTON_NO_FLAGS }; #endif -#ifdef HAVE_FEDERATED_DB -#include "ha_federated.h" -extern handlerton federated_hton; -#else -handlerton federated_hton = { "FEDERATED", SHOW_OPTION_NO, - "Federated MySQL storage engine", DB_TYPE_FEDERATED_DB, NULL, 0, 0, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - HTON_NO_FLAGS }; +#ifdef WITH_PARTITION_STORAGE_ENGINE +#include "ha_partition.h" #endif -#include <myisampack.h> -#include <errno.h> - -extern handlerton myisam_hton; -extern handlerton myisammrg_hton; -extern handlerton heap_hton; -extern handlerton binlog_hton; - -/* - Obsolete -*/ -handlerton isam_hton = { "ISAM", SHOW_OPTION_NO, "Obsolete storage engine", - DB_TYPE_ISAM, NULL, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, HTON_NO_FLAGS }; - +extern handlerton *sys_table_types[]; + /* static functions defined in this file */ static SHOW_COMP_OPTION have_yes= SHOW_OPTION_YES; @@ -134,27 +51,6 @@ ulong total_ha_2pc; /* size of savepoint storage area (see ha_init) */ ulong savepoint_alloc_size; -/* - This array is used for processing compiled in engines. -*/ -handlerton *sys_table_types[]= -{ - &myisam_hton, - &heap_hton, - &innobase_hton, - &berkeley_hton, - &blackhole_hton, - &example_hton, - &archive_hton, - &tina_hton, - &ndbcluster_hton, - &federated_hton, - &myisammrg_hton, - &binlog_hton, - &isam_hton, - NULL -}; - struct show_table_alias_st sys_table_aliases[]= { {"INNOBASE", "InnoDB"}, @@ -192,9 +88,11 @@ enum db_type ha_resolve_by_name(const char *name, uint namelen) retest: for (types= sys_table_types; *types; types++) { - if (!my_strnncoll(&my_charset_latin1, - (const uchar *)name, namelen, - (const uchar *)(*types)->name, strlen((*types)->name))) + if ((!my_strnncoll(&my_charset_latin1, + (const uchar *)name, namelen, + (const uchar *)(*types)->name, + strlen((*types)->name))) && + !((*types)->flags & HTON_NOT_USER_SELECTABLE)) return (enum db_type) (*types)->db_type; } @@ -247,9 +145,8 @@ my_bool ha_storage_engine_is_enabled(enum db_type database_type) handlerton **types; for (types= sys_table_types; *types; types++) { - if ((database_type == (*types)->db_type) && - ((*types)->state == SHOW_OPTION_YES)) - return TRUE; + if (database_type == (*types)->db_type) + return ((*types)->state == SHOW_OPTION_YES) ? TRUE : FALSE; } return FALSE; } @@ -262,7 +159,6 @@ enum db_type ha_checktype(THD *thd, enum db_type database_type, { if (ha_storage_engine_is_enabled(database_type)) return database_type; - if (no_substitute) { if (report_error) @@ -295,62 +191,67 @@ enum db_type ha_checktype(THD *thd, enum db_type database_type, handler *get_new_handler(TABLE *table, MEM_ROOT *alloc, enum db_type db_type) { - switch (db_type) { -#ifndef NO_HASH - case DB_TYPE_HASH: - return new (alloc) ha_hash(table); -#endif - case DB_TYPE_MRG_ISAM: - return new (alloc) ha_myisammrg(table); -#ifdef HAVE_BERKELEY_DB - case DB_TYPE_BERKELEY_DB: - return new (alloc) ha_berkeley(table); -#endif -#ifdef HAVE_INNOBASE_DB - case DB_TYPE_INNODB: - return new (alloc) ha_innobase(table); -#endif -#ifdef HAVE_EXAMPLE_DB - case DB_TYPE_EXAMPLE_DB: - return new (alloc) ha_example(table); -#endif -#ifdef HAVE_ARCHIVE_DB - case DB_TYPE_ARCHIVE_DB: - return new (alloc) ha_archive(table); -#endif -#ifdef HAVE_BLACKHOLE_DB - case DB_TYPE_BLACKHOLE_DB: - return new (alloc) ha_blackhole(table); -#endif -#ifdef HAVE_FEDERATED_DB - case DB_TYPE_FEDERATED_DB: - return new (alloc) ha_federated(table); -#endif -#ifdef HAVE_CSV_DB - case DB_TYPE_CSV_DB: - return new (alloc) ha_tina(table); -#endif -#ifdef HAVE_NDBCLUSTER_DB - case DB_TYPE_NDBCLUSTER: - return new (alloc) ha_ndbcluster(table); -#endif - case DB_TYPE_HEAP: - return new (alloc) ha_heap(table); - default: // should never happen + handler *file= NULL; + handlerton **types; + /* + handlers are allocated with new in the handlerton create() function + we need to set the thd mem_root for these to be allocated correctly + */ + THD *thd= current_thd; + MEM_ROOT *thd_save_mem_root= thd->mem_root; + thd->mem_root= alloc; + for (types= sys_table_types; *types; types++) + { + if (db_type == (*types)->db_type && (*types)->create) + { + file= ((*types)->state == SHOW_OPTION_YES) ? + (*types)->create(table) : NULL; + break; + } + } + thd->mem_root= thd_save_mem_root; + + if (!file) { enum db_type def=(enum db_type) current_thd->variables.table_type; /* Try first with 'default table type' */ if (db_type != def) return get_new_handler(table, alloc, def); } - /* Fall back to MyISAM */ - case DB_TYPE_MYISAM: - return new (alloc) ha_myisam(table); - case DB_TYPE_MRG_MYISAM: - return new (alloc) ha_myisammrg(table); + if (file) + { + if (file->ha_initialise()) + { + delete file; + file=0; + } } + return file; } + +#ifdef WITH_PARTITION_STORAGE_ENGINE +handler *get_ha_partition(partition_info *part_info) +{ + ha_partition *partition; + DBUG_ENTER("get_ha_partition"); + if ((partition= new ha_partition(part_info))) + { + if (partition->ha_initialise()) + { + delete partition; + partition= 0; + } + } + else + { + my_error(ER_OUTOFMEMORY, MYF(0), sizeof(ha_partition)); + } + DBUG_RETURN(((handler*) partition)); +} +#endif + + /* Register handler error messages for use with my_error(). @@ -487,40 +388,13 @@ int ha_init() int ha_panic(enum ha_panic_function flag) { int error=0; -#ifndef NO_HASH - error|=h_panic(flag); /* fix hash */ -#endif -#ifdef HAVE_ISAM - error|=mrg_panic(flag); - error|=nisam_panic(flag); -#endif - error|=heap_panic(flag); - error|=mi_panic(flag); - error|=myrg_panic(flag); -#ifdef HAVE_BERKELEY_DB - if (have_berkeley_db == SHOW_OPTION_YES) - error|=berkeley_end(); -#endif -#ifdef HAVE_INNOBASE_DB - if (have_innodb == SHOW_OPTION_YES) - error|=innobase_end(); -#endif -#ifdef HAVE_NDBCLUSTER_DB - if (have_ndbcluster == SHOW_OPTION_YES) - error|=ndbcluster_end(); -#endif -#ifdef HAVE_FEDERATED_DB - if (have_federated_db == SHOW_OPTION_YES) - error|= federated_db_end(); -#endif -#ifdef HAVE_ARCHIVE_DB - if (have_archive_db == SHOW_OPTION_YES) - error|= archive_db_end(); -#endif -#ifdef HAVE_CSV_DB - if (have_csv_db == SHOW_OPTION_YES) - error|= tina_end(); -#endif + handlerton **types; + + for (types= sys_table_types; *types; types++) + { + if ((*types)->state == SHOW_OPTION_YES && (*types)->panic) + error|= (*types)->panic(flag); + } if (ha_finish_errors()) error= 1; return error; @@ -528,14 +402,13 @@ int ha_panic(enum ha_panic_function flag) void ha_drop_database(char* path) { -#ifdef HAVE_INNOBASE_DB - if (have_innodb == SHOW_OPTION_YES) - innobase_drop_database(path); -#endif -#ifdef HAVE_NDBCLUSTER_DB - if (have_ndbcluster == SHOW_OPTION_YES) - ndbcluster_drop_database(path); -#endif + handlerton **types; + + for (types= sys_table_types; *types; types++) + { + if ((*types)->state == SHOW_OPTION_YES && (*types)->drop_database) + (*types)->drop_database(path); + } } /* don't bother to rollback here, it's done already */ @@ -543,7 +416,8 @@ void ha_close_connection(THD* thd) { handlerton **types; for (types= sys_table_types; *types; types++) - if (thd->ha_data[(*types)->slot]) + /* XXX Maybe do a rollback if close_connection == NULL ? */ + if (thd->ha_data[(*types)->slot] && (*types)->close_connection) (*types)->close_connection(thd); } @@ -1120,10 +994,14 @@ bool mysql_xa_recover(THD *thd) int ha_release_temporary_latches(THD *thd) { -#ifdef HAVE_INNOBASE_DB - if (opt_innodb) - innobase_release_temporary_latches(thd); -#endif + handlerton **types; + + for (types= sys_table_types; *types; types++) + { + if ((*types)->state == SHOW_OPTION_YES && + (*types)->release_temporary_latches) + (*types)->release_temporary_latches(thd); + } return 0; } @@ -1135,10 +1013,13 @@ int ha_release_temporary_latches(THD *thd) int ha_update_statistics() { -#ifdef HAVE_INNOBASE_DB - if (opt_innodb) - innodb_export_status(); -#endif + handlerton **types; + + for (types= sys_table_types; *types; types++) + { + if ((*types)->state == SHOW_OPTION_YES && (*types)->update_statistics) + (*types)->update_statistics(); + } return 0; } @@ -1248,35 +1129,45 @@ int ha_release_savepoint(THD *thd, SAVEPOINT *sv) int ha_start_consistent_snapshot(THD *thd) { -#ifdef HAVE_INNOBASE_DB - if ((have_innodb == SHOW_OPTION_YES) && - !innobase_start_trx_and_assign_read_view(thd)) - return 0; -#endif + bool warn= true; + handlerton **types; + + for (types= sys_table_types; *types; types++) + { + if ((*types)->state == SHOW_OPTION_YES && + (*types)->start_consistent_snapshot) + { + (*types)->start_consistent_snapshot(thd); + warn= false; /* hope user is using engine */ + } + } /* Same idea as when one wants to CREATE TABLE in one engine which does not exist: */ - push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR, - "This MySQL server does not support any " - "consistent-read capable storage engine"); + if (warn) + push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR, + "This MySQL server does not support any " + "consistent-read capable storage engine"); return 0; } -bool ha_flush_logs() +bool ha_flush_logs(enum db_type db_type) { bool result=0; -#ifdef HAVE_BERKELEY_DB - if ((have_berkeley_db == SHOW_OPTION_YES) && - berkeley_flush_logs()) - result=1; -#endif -#ifdef HAVE_INNOBASE_DB - if ((have_innodb == SHOW_OPTION_YES) && - innobase_flush_logs()) - result=1; -#endif + handlerton **types; + + for (types= sys_table_types; *types; types++) + { + if ((*types)->state == SHOW_OPTION_YES && + (db_type == DB_TYPE_DEFAULT || db_type == (*types)->db_type) && + (*types)->flush_logs) + { + if ((*types)->flush_logs()) + result= 1; + } + } return result; } @@ -1400,6 +1291,111 @@ int handler::ha_open(const char *name, int mode, int test_if_locked) DBUG_RETURN(error); } +int handler::ha_initialise() +{ + DBUG_ENTER("ha_initialise"); + if (table && table->s->fields && + ha_allocate_read_write_set(table->s->fields)) + { + DBUG_RETURN(TRUE); + } + DBUG_RETURN(FALSE); +} + +int handler::ha_allocate_read_write_set(ulong no_fields) +{ + uint bitmap_size= 4*(((no_fields+1)+31)/32); + uint32 *read_buf, *write_buf; +#ifndef DEBUG_OFF + my_bool r; +#endif + DBUG_ENTER("ha_allocate_read_write_set"); + DBUG_PRINT("enter", ("no_fields = %d", no_fields)); + + if (table) + { + if (table->read_set == NULL) + { + read_set= (MY_BITMAP*)sql_alloc(sizeof(MY_BITMAP)); + write_set= (MY_BITMAP*)sql_alloc(sizeof(MY_BITMAP)); + read_buf= (uint32*)sql_alloc(bitmap_size); + write_buf= (uint32*)sql_alloc(bitmap_size); + if (!read_set || !write_set || !read_buf || !write_buf) + { + ha_deallocate_read_write_set(); + DBUG_RETURN(TRUE); + } +#ifndef DEBUG_OFF + r = +#endif + bitmap_init(read_set, read_buf, no_fields+1, FALSE); + DBUG_ASSERT(!r /*bitmap_init(read_set...)*/); +#ifndef DEBUG_OFF + r = +#endif + bitmap_init(write_set, write_buf, no_fields+1, FALSE); + DBUG_ASSERT(!r /*bitmap_init(write_set...)*/); + table->read_set= read_set; + table->write_set= write_set; + ha_clear_all_set(); + } + else + { + read_set= table->read_set; + write_set= table->write_set; + } + } + DBUG_RETURN(FALSE); +} + +void handler::ha_deallocate_read_write_set() +{ + DBUG_ENTER("ha_deallocate_read_write_set"); + read_set=write_set=0; + DBUG_VOID_RETURN; +} + +void handler::ha_clear_all_set() +{ + DBUG_ENTER("ha_clear_all_set"); + bitmap_clear_all(read_set); + bitmap_clear_all(write_set); + bitmap_set_bit(read_set, 0); + bitmap_set_bit(write_set, 0); + DBUG_VOID_RETURN; +} + +int handler::ha_retrieve_all_cols() +{ + DBUG_ENTER("handler::ha_retrieve_all_cols"); + bitmap_set_all(read_set); + DBUG_RETURN(0); +} + +int handler::ha_retrieve_all_pk() +{ + DBUG_ENTER("ha_retrieve_all_pk"); + ha_set_primary_key_in_read_set(); + DBUG_RETURN(0); +} + +void handler::ha_set_primary_key_in_read_set() +{ + ulong prim_key= table->s->primary_key; + DBUG_ENTER("handler::ha_set_primary_key_in_read_set"); + DBUG_PRINT("info", ("Primary key = %d", prim_key)); + if (prim_key != MAX_KEY) + { + KEY_PART_INFO *key_part= table->key_info[prim_key].key_part; + KEY_PART_INFO *key_part_end= key_part + + table->key_info[prim_key].key_parts; + for (;key_part != key_part_end; ++key_part) + ha_set_bit_in_read_set(key_part->fieldnr); + } + DBUG_VOID_RETURN; +} + + /* Read first row (only) from a table This is never called for InnoDB or BDB tables, as these table types @@ -1428,7 +1424,7 @@ int handler::read_first_row(byte * buf, uint primary_key) else { /* Find the first row through the primary key */ - (void) ha_index_init(primary_key); + (void) ha_index_init(primary_key, 0); error=index_first(buf); (void) ha_index_end(); } @@ -1612,7 +1608,7 @@ ulonglong handler::get_auto_increment() int error; (void) extra(HA_EXTRA_KEYREAD); - index_init(table->s->next_number_index); + index_init(table->s->next_number_index, 1); if (!table->s->next_number_key_offset) { // Autoincrement at key-start error=index_last(table->record[1]); @@ -1888,6 +1884,14 @@ int handler::rename_table(const char * from, const char * to) return error; } + +void handler::drop_table(const char *name) +{ + close(); + delete_table(name); +} + + /* Tell the storage engine that it is allowed to "disable transaction" in the handler. It is a hint that ACID is not required - it is used in NDB for @@ -2144,7 +2148,7 @@ int ha_discover(THD *thd, const char *db, const char *name, DBUG_PRINT("enter", ("db: %s, name: %s", db, name)); if (is_prefix(name,tmp_file_prefix)) /* skip temporary tables */ DBUG_RETURN(error); -#ifdef HAVE_NDBCLUSTER_DB +#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE if (have_ndbcluster == SHOW_OPTION_YES) error= ndbcluster_discover(thd, db, name, frmblob, frmlen); #endif @@ -2168,7 +2172,7 @@ ha_find_files(THD *thd,const char *db,const char *path, DBUG_ENTER("ha_find_files"); DBUG_PRINT("enter", ("db: %s, path: %s, wild: %s, dir: %d", db, path, wild, dir)); -#ifdef HAVE_NDBCLUSTER_DB +#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE if (have_ndbcluster == SHOW_OPTION_YES) error= ndbcluster_find_files(thd, db, path, wild, dir, files); #endif @@ -2190,7 +2194,7 @@ int ha_table_exists_in_engine(THD* thd, const char* db, const char* name) int error= 0; DBUG_ENTER("ha_table_exists_in_engine"); DBUG_PRINT("enter", ("db: %s, name: %s", db, name)); -#ifdef HAVE_NDBCLUSTER_DB +#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE if (have_ndbcluster == SHOW_OPTION_YES) error= ndbcluster_table_exists_in_engine(thd, db, name); #endif @@ -2444,7 +2448,7 @@ int handler::compare_key(key_range *range) int handler::index_read_idx(byte * buf, uint index, const byte * key, uint key_len, enum ha_rkey_function find_flag) { - int error= ha_index_init(index); + int error= ha_index_init(index, 0); if (!error) error= index_read(buf, key, key_len, find_flag); if (!error) @@ -2517,6 +2521,54 @@ TYPELIB *ha_known_exts(void) return &known_extensions; } +static bool stat_print(THD *thd, const char *type, const char *file, + const char *status) +{ + Protocol *protocol= thd->protocol; + protocol->prepare_for_resend(); + protocol->store(type, system_charset_info); + protocol->store(file, system_charset_info); + protocol->store(status, system_charset_info); + if (protocol->write()) + return TRUE; + return FALSE; +} + +bool ha_show_status(THD *thd, enum db_type db_type, enum ha_stat_type stat) +{ + handlerton **types; + List<Item> field_list; + Protocol *protocol= thd->protocol; + + field_list.push_back(new Item_empty_string("Type",10)); + field_list.push_back(new Item_empty_string("Name",FN_REFLEN)); + field_list.push_back(new Item_empty_string("Status",10)); + + if (protocol->send_fields(&field_list, + Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF)) + return TRUE; + + for (types= sys_table_types; *types; types++) + { + if ((*types)->state == SHOW_OPTION_YES && + (db_type == DB_TYPE_DEFAULT || db_type == (*types)->db_type) && + (*types)->show_status) + { + if ((*types)->show_status(thd, stat_print, stat)) + return TRUE; + } + else if (db_type == (*types)->db_type && + (*types)->state != SHOW_OPTION_YES) + { + if (stat_print(thd, (*types)->name, "", "DISABLED")) + return TRUE; + } + } + + send_eof(thd); + return FALSE; +} + #ifdef HAVE_REPLICATION /* @@ -2540,11 +2592,19 @@ TYPELIB *ha_known_exts(void) int ha_repl_report_sent_binlog(THD *thd, char *log_file_name, my_off_t end_offset) { -#ifdef HAVE_INNOBASE_DB - return innobase_repl_report_sent_binlog(thd,log_file_name,end_offset); -#else - return 0; -#endif + int result= 0; + handlerton **types; + + for (types= sys_table_types; *types; types++) + { + if ((*types)->state == SHOW_OPTION_YES && + (*types)->repl_report_sent_binlog) + { + (*types)->repl_report_sent_binlog(thd,log_file_name,end_offset); + result= 0; + } + } + return result; } diff --git a/sql/handler.h b/sql/handler.h index be188f7cacd..aaba5da8799 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -28,10 +28,7 @@ #define NO_HASH /* Not yet implemented */ #endif -#if defined(HAVE_BERKELEY_DB) || defined(HAVE_INNOBASE_DB) || \ - defined(HAVE_NDBCLUSTER_DB) #define USING_TRANSACTIONS -#endif // the following is for checking tables @@ -90,6 +87,11 @@ #define HA_NEED_READ_RANGE_BUFFER (1 << 29) /* for read_multi_range */ #define HA_ANY_INDEX_MAY_BE_UNIQUE (1 << 30) +/* Flags for partition handlers */ +#define HA_CAN_PARTITION (1 << 0) /* Partition support */ +#define HA_CAN_UPDATE_PARTITION_KEY (1 << 1) +#define HA_CAN_PARTITION_UNIQUE (1 << 2) + /* bits in index_flags(index_number) for what you can do with index */ #define HA_READ_NEXT 1 /* TODO really use this flag */ @@ -99,6 +101,10 @@ #define HA_ONLY_WHOLE_INDEX 16 /* Can't use part key searches */ #define HA_KEYREAD_ONLY 64 /* Support HA_EXTRA_KEYREAD */ +/* bits in alter_table_flags */ +#define HA_ONLINE_ADD_EMPTY_PARTITION 1 +#define HA_ONLINE_DROP_PARTITION 2 + /* Index scan will not return records in rowid order. Not guaranteed to be set for unordered (e.g. HASH) indexes. @@ -118,7 +124,7 @@ example + csv + heap + blackhole + federated + 0 (yes, the sum is deliberately inaccurate) */ -#define MAX_HA 14 +#define MAX_HA 15 /* Bits in index_ddl_flags(KEY *wanted_index) @@ -182,6 +188,8 @@ enum db_type DB_TYPE_EXAMPLE_DB, DB_TYPE_ARCHIVE_DB, DB_TYPE_CSV_DB, DB_TYPE_FEDERATED_DB, DB_TYPE_BLACKHOLE_DB, + DB_TYPE_PARTITION_DB, + DB_TYPE_BINLOG, DB_TYPE_DEFAULT // Must be last }; @@ -222,6 +230,9 @@ typedef ulonglong my_xid; // this line is the same as in log_event.h #define MAXGTRIDSIZE 64 #define MAXBQUALSIZE 64 +#define COMPATIBLE_DATA_YES 0 +#define COMPATIBLE_DATA_NO 1 + struct xid_t { long formatID; long gtrid_length; @@ -296,6 +307,16 @@ typedef struct xid_t XID; #define MAX_XID_LIST_SIZE (1024*128) #endif +/* The handler for a table type. Will be included in the TABLE structure */ + +struct st_table; +typedef struct st_table TABLE; +struct st_foreign_key_info; +typedef struct st_foreign_key_info FOREIGN_KEY_INFO; +typedef bool (stat_print_fn)(THD *thd, const char *type, const char *file, + const char *status); +enum ha_stat_type { HA_ENGINE_STATUS, HA_ENGINE_LOGS, HA_ENGINE_MUTEX }; + /* handlerton is a singleton structure - one instance per storage engine - to provide access to storage engine functionality that works on the @@ -390,6 +411,16 @@ typedef struct void *(*create_cursor_read_view)(); void (*set_cursor_read_view)(void *); void (*close_cursor_read_view)(void *); + handler *(*create)(TABLE *table); + void (*drop_database)(char* path); + int (*panic)(enum ha_panic_function flag); + int (*release_temporary_latches)(THD *thd); + int (*update_statistics)(); + int (*start_consistent_snapshot)(THD *thd); + bool (*flush_logs)(); + bool (*show_status)(THD *thd, stat_print_fn *print, enum ha_stat_type stat); + int (*repl_report_sent_binlog)(THD *thd, char *log_file_name, + my_off_t end_offset); uint32 flags; /* global handler flags */ } handlerton; @@ -404,6 +435,8 @@ struct show_table_alias_st { #define HTON_ALTER_NOT_SUPPORTED (1 << 1) //Engine does not support alter #define HTON_CAN_RECREATE (1 << 2) //Delete all is used fro truncate #define HTON_HIDDEN (1 << 3) //Engine does not appear in lists +#define HTON_FLUSH_AFTER_RENAME (1 << 4) +#define HTON_NOT_USER_SELECTABLE (1 << 5) typedef struct st_thd_trans { @@ -418,6 +451,222 @@ typedef struct st_thd_trans enum enum_tx_isolation { ISO_READ_UNCOMMITTED, ISO_READ_COMMITTED, ISO_REPEATABLE_READ, ISO_SERIALIZABLE}; + +enum ndb_distribution { ND_KEYHASH= 0, ND_LINHASH= 1 }; + +typedef struct { + uint32 start_part; + uint32 end_part; + bool use_bit_array; +} part_id_range; +/** + * An enum and a struct to handle partitioning and subpartitioning. + */ +enum partition_type { + NOT_A_PARTITION= 0, + RANGE_PARTITION, + HASH_PARTITION, + LIST_PARTITION +}; + +enum partition_state { + PART_NORMAL= 0, + PART_IS_DROPPED= 1, + PART_TO_BE_DROPPED= 2, + PART_DROPPING= 3, + PART_IS_ADDED= 4, + PART_ADDING= 5, + PART_ADDED= 6 +}; + +#define UNDEF_NODEGROUP 65535 +class Item; + +class partition_element :public Sql_alloc { +public: + List<partition_element> subpartitions; + List<longlong> list_val_list; + ulonglong part_max_rows; + ulonglong part_min_rows; + char *partition_name; + char *tablespace_name; + longlong range_value; + char* part_comment; + char* data_file_name; + char* index_file_name; + enum db_type engine_type; + enum partition_state part_state; + uint16 nodegroup_id; + + partition_element() + : part_max_rows(0), part_min_rows(0), partition_name(NULL), + tablespace_name(NULL), range_value(0), part_comment(NULL), + data_file_name(NULL), index_file_name(NULL), + engine_type(DB_TYPE_UNKNOWN),part_state(PART_NORMAL), + nodegroup_id(UNDEF_NODEGROUP) + { + subpartitions.empty(); + list_val_list.empty(); + } + ~partition_element() {} +}; + +typedef struct { + longlong list_value; + uint partition_id; +} LIST_PART_ENTRY; + +class partition_info; + +typedef bool (*get_part_id_func)(partition_info *part_info, + uint32 *part_id); +typedef uint32 (*get_subpart_id_func)(partition_info *part_info); + +class partition_info :public Sql_alloc { +public: + /* + * Here comes a set of definitions needed for partitioned table handlers. + */ + List<partition_element> partitions; + List<partition_element> temp_partitions; + + List<char> part_field_list; + List<char> subpart_field_list; + + get_part_id_func get_partition_id; + get_part_id_func get_part_partition_id; + get_subpart_id_func get_subpartition_id; + + Field **part_field_array; + Field **subpart_field_array; + Field **full_part_field_array; + + Item *part_expr; + Item *subpart_expr; + + Item *item_free_list; + + union { + longlong *range_int_array; + LIST_PART_ENTRY *list_array; + }; + char* part_info_string; + + char *part_func_string; + char *subpart_func_string; + + partition_element *curr_part_elem; + partition_element *current_partition; + /* + These key_map's are used for Partitioning to enable quick decisions + on whether we can derive more information about which partition to + scan just by looking at what index is used. + */ + key_map all_fields_in_PF, all_fields_in_PPF, all_fields_in_SPF; + key_map some_fields_in_PF; + + enum db_type default_engine_type; + Item_result part_result_type; + partition_type part_type; + partition_type subpart_type; + + uint part_info_len; + uint part_func_len; + uint subpart_func_len; + + uint no_parts; + uint no_subparts; + uint count_curr_parts; + uint count_curr_subparts; + + uint part_error_code; + + uint no_list_values; + + uint no_part_fields; + uint no_subpart_fields; + uint no_full_part_fields; + + uint16 linear_hash_mask; + + bool use_default_partitions; + bool use_default_subpartitions; + bool defined_max_value; + bool list_of_part_fields; + bool list_of_subpart_fields; + bool linear_hash_ind; + + partition_info() + : get_partition_id(NULL), get_part_partition_id(NULL), + get_subpartition_id(NULL), + part_field_array(NULL), subpart_field_array(NULL), + full_part_field_array(NULL), + part_expr(NULL), subpart_expr(NULL), item_free_list(NULL), + list_array(NULL), + part_info_string(NULL), + part_func_string(NULL), subpart_func_string(NULL), + curr_part_elem(NULL), current_partition(NULL), + default_engine_type(DB_TYPE_UNKNOWN), + part_result_type(INT_RESULT), + part_type(NOT_A_PARTITION), subpart_type(NOT_A_PARTITION), + part_info_len(0), part_func_len(0), subpart_func_len(0), + no_parts(0), no_subparts(0), + count_curr_parts(0), count_curr_subparts(0), part_error_code(0), + no_list_values(0), no_part_fields(0), no_subpart_fields(0), + no_full_part_fields(0), linear_hash_mask(0), + use_default_partitions(TRUE), + use_default_subpartitions(TRUE), defined_max_value(FALSE), + list_of_part_fields(FALSE), list_of_subpart_fields(FALSE), + linear_hash_ind(FALSE) + { + all_fields_in_PF.clear_all(); + all_fields_in_PPF.clear_all(); + all_fields_in_SPF.clear_all(); + some_fields_in_PF.clear_all(); + partitions.empty(); + temp_partitions.empty(); + part_field_list.empty(); + subpart_field_list.empty(); + } + ~partition_info() {} +}; + + +#ifdef WITH_PARTITION_STORAGE_ENGINE +/* + Answers the question if subpartitioning is used for a certain table + SYNOPSIS + is_sub_partitioned() + part_info A reference to the partition_info struct + RETURN VALUE + Returns true if subpartitioning used and false otherwise + DESCRIPTION + A routine to check for subpartitioning for improved readability of code +*/ +inline +bool is_sub_partitioned(partition_info *part_info) +{ return (part_info->subpart_type == NOT_A_PARTITION ? FALSE : TRUE); } + + +/* + Returns the total number of partitions on the leaf level. + SYNOPSIS + get_tot_partitions() + part_info A reference to the partition_info struct + RETURN VALUE + Returns the number of partitions + DESCRIPTION + A routine to check for number of partitions for improved readability + of code +*/ +inline +uint get_tot_partitions(partition_info *part_info) +{ + return part_info->no_parts * + (is_sub_partitioned(part_info) ? part_info->no_subparts : 1); +} +#endif + typedef struct st_ha_create_information { CHARSET_INFO *table_charset, *default_table_charset; @@ -445,12 +694,6 @@ typedef struct st_ha_create_information } HA_CREATE_INFO; -/* The handler for a table type. Will be included in the TABLE structure */ - -struct st_table; -typedef struct st_table TABLE; -struct st_foreign_key_info; -typedef struct st_foreign_key_info FOREIGN_KEY_INFO; typedef struct st_savepoint SAVEPOINT; extern ulong savepoint_alloc_size; @@ -468,6 +711,39 @@ typedef struct st_ha_check_opt } HA_CHECK_OPT; +#ifdef WITH_PARTITION_STORAGE_ENGINE +bool is_partition_in_list(char *part_name, List<char> list_part_names); +bool is_partitions_in_table(partition_info *new_part_info, + partition_info *old_part_info); +bool set_up_defaults_for_partitioning(partition_info *part_info, + handler *file, + ulonglong max_rows, + uint start_no); +handler *get_ha_partition(partition_info *part_info); +int get_parts_for_update(const byte *old_data, byte *new_data, + const byte *rec0, partition_info *part_info, + uint32 *old_part_id, uint32 *new_part_id); +int get_part_for_delete(const byte *buf, const byte *rec0, + partition_info *part_info, uint32 *part_id); +bool check_partition_info(partition_info *part_info,enum db_type eng_type, + handler *file, ulonglong max_rows); +bool fix_partition_func(THD *thd, const char *name, TABLE *table); +char *generate_partition_syntax(partition_info *part_info, + uint *buf_length, bool use_sql_alloc, + bool add_default_info); +bool partition_key_modified(TABLE *table, List<Item> &fields); +void get_partition_set(const TABLE *table, byte *buf, const uint index, + const key_range *key_spec, + part_id_range *part_spec); +void get_full_part_id_from_key(const TABLE *table, byte *buf, + KEY *key_info, + const key_range *key_spec, + part_id_range *part_spec); +bool mysql_unpack_partition(THD *thd, uchar *part_buf, uint part_info_len, + TABLE* table, enum db_type default_db_type); +#endif + + /* This is a buffer area that the handler can use to store rows. 'end_of_used_area' should be kept updated after calls to @@ -485,10 +761,13 @@ typedef struct st_handler_buffer class handler :public Sql_alloc { +#ifdef WITH_PARTITION_STORAGE_ENGINE + friend class ha_partition; +#endif protected: struct st_table *table; /* The table definition */ - virtual int index_init(uint idx) { active_index=idx; return 0; } + virtual int index_init(uint idx, bool sorted) { active_index=idx; return 0; } virtual int index_end() { active_index=MAX_KEY; return 0; } /* rnd_init() can be called two times without rnd_end() in between @@ -500,6 +779,8 @@ class handler :public Sql_alloc virtual int rnd_init(bool scan) =0; virtual int rnd_end() { return 0; } +private: + virtual int reset() { return extra(HA_EXTRA_RESET); } public: const handlerton *ht; /* storage engine of this handler */ byte *ref; /* Pointer to current row */ @@ -542,6 +823,8 @@ public: bool auto_increment_column_changed; bool implicit_emptied; /* Can be !=0 only if HEAP */ const COND *pushed_cond; + MY_BITMAP *read_set; + MY_BITMAP *write_set; handler(const handlerton *ht_arg, TABLE *table_arg) :table(table_arg), ht(ht_arg), @@ -554,7 +837,12 @@ public: raid_type(0), ft_handler(0), inited(NONE), implicit_emptied(0), pushed_cond(NULL) {} - virtual ~handler(void) { /* TODO: DBUG_ASSERT(inited == NONE); */ } + virtual ~handler(void) + { + ha_deallocate_read_write_set(); + /* TODO: DBUG_ASSERT(inited == NONE); */ + } + virtual int ha_initialise(); int ha_open(const char *name, int mode, int test_if_locked); bool update_auto_increment(); virtual void print_error(int error, myf errflag); @@ -567,7 +855,7 @@ public: { return rows2double(ranges+rows); } virtual const key_map *keys_to_use_for_scanning() { return &key_map_empty; } virtual bool has_transactions(){ return 0;} - virtual uint extra_rec_buf_length() { return 0; } + virtual uint extra_rec_buf_length() const { return 0; } /* Return upper bound of current number of records in the table @@ -586,12 +874,12 @@ public: virtual const char *index_type(uint key_number) { DBUG_ASSERT(0); return "";} - int ha_index_init(uint idx) + int ha_index_init(uint idx, bool sorted) { DBUG_ENTER("ha_index_init"); DBUG_ASSERT(inited==NONE); inited=INDEX; - DBUG_RETURN(index_init(idx)); + DBUG_RETURN(index_init(idx, sorted)); } int ha_index_end() { @@ -614,11 +902,140 @@ public: inited=NONE; DBUG_RETURN(rnd_end()); } + int ha_reset() + { + DBUG_ENTER("ha_reset"); + ha_clear_all_set(); + DBUG_RETURN(reset()); + } + /* this is necessary in many places, e.g. in HANDLER command */ int ha_index_or_rnd_end() { return inited == INDEX ? ha_index_end() : inited == RND ? ha_rnd_end() : 0; } + /* + These are a set of routines used to enable handlers to only read/write + partial lists of the fields in the table. The bit vector is maintained + by the server part and is used by the handler at calls to read/write + data in the table. + It replaces the use of query id's for this purpose. The benefit is that + the handler can also set bits in the read/write set if it has special + needs and it is also easy for other parts of the server to interact + with the handler (e.g. the replication part for row-level logging). + The routines are all part of the general handler and are not possible + to override by a handler. A handler can however set/reset bits by + calling these routines. + + The methods ha_retrieve_all_cols and ha_retrieve_all_pk are made + virtual to handle InnoDB specifics. If InnoDB doesn't need the + extra parameters HA_EXTRA_RETRIEVE_ALL_COLS and + HA_EXTRA_RETRIEVE_PRIMARY_KEY anymore then these methods need not be + virtual anymore. + */ + virtual int ha_retrieve_all_cols(); + virtual int ha_retrieve_all_pk(); + void ha_set_all_bits_in_read_set() + { + DBUG_ENTER("ha_set_all_bits_in_read_set"); + bitmap_set_all(read_set); + DBUG_VOID_RETURN; + } + void ha_set_all_bits_in_write_set() + { + DBUG_ENTER("ha_set_all_bits_in_write_set"); + bitmap_set_all(write_set); + DBUG_VOID_RETURN; + } + void ha_set_bit_in_read_set(uint fieldnr) + { + DBUG_ENTER("ha_set_bit_in_read_set"); + DBUG_PRINT("info", ("fieldnr = %d", fieldnr)); + bitmap_set_bit(read_set, fieldnr); + DBUG_VOID_RETURN; + } + void ha_clear_bit_in_read_set(uint fieldnr) + { + DBUG_ENTER("ha_clear_bit_in_read_set"); + DBUG_PRINT("info", ("fieldnr = %d", fieldnr)); + bitmap_clear_bit(read_set, fieldnr); + DBUG_VOID_RETURN; + } + void ha_set_bit_in_write_set(uint fieldnr) + { + DBUG_ENTER("ha_set_bit_in_write_set"); + DBUG_PRINT("info", ("fieldnr = %d", fieldnr)); + bitmap_set_bit(write_set, fieldnr); + DBUG_VOID_RETURN; + } + void ha_clear_bit_in_write_set(uint fieldnr) + { + DBUG_ENTER("ha_clear_bit_in_write_set"); + DBUG_PRINT("info", ("fieldnr = %d", fieldnr)); + bitmap_clear_bit(write_set, fieldnr); + DBUG_VOID_RETURN; + } + void ha_set_bit_in_rw_set(uint fieldnr, bool write_op) + { + DBUG_ENTER("ha_set_bit_in_rw_set"); + DBUG_PRINT("info", ("Set bit %u in read set", fieldnr)); + bitmap_set_bit(read_set, fieldnr); + if (!write_op) { + DBUG_VOID_RETURN; + } + else + { + DBUG_PRINT("info", ("Set bit %u in read and write set", fieldnr)); + bitmap_set_bit(write_set, fieldnr); + } + DBUG_VOID_RETURN; + } + bool ha_get_bit_in_read_set(uint fieldnr) + { + bool bit_set=bitmap_is_set(read_set,fieldnr); + DBUG_ENTER("ha_get_bit_in_read_set"); + DBUG_PRINT("info", ("bit %u = %u", fieldnr, bit_set)); + DBUG_RETURN(bit_set); + } + bool ha_get_bit_in_write_set(uint fieldnr) + { + bool bit_set=bitmap_is_set(write_set,fieldnr); + DBUG_ENTER("ha_get_bit_in_write_set"); + DBUG_PRINT("info", ("bit %u = %u", fieldnr, bit_set)); + DBUG_RETURN(bit_set); + } + bool ha_get_all_bit_in_read_set() + { + bool all_bits_set= bitmap_is_set_all(read_set); + DBUG_ENTER("ha_get_all_bit_in_read_set"); + DBUG_PRINT("info", ("all bits set = %u", all_bits_set)); + DBUG_RETURN(all_bits_set); + } + bool ha_get_all_bit_in_read_clear() + { + bool all_bits_set= bitmap_is_clear_all(read_set); + DBUG_ENTER("ha_get_all_bit_in_read_clear"); + DBUG_PRINT("info", ("all bits clear = %u", all_bits_set)); + DBUG_RETURN(all_bits_set); + } + bool ha_get_all_bit_in_write_set() + { + bool all_bits_set= bitmap_is_set_all(write_set); + DBUG_ENTER("ha_get_all_bit_in_write_set"); + DBUG_PRINT("info", ("all bits set = %u", all_bits_set)); + DBUG_RETURN(all_bits_set); + } + bool ha_get_all_bit_in_write_clear() + { + bool all_bits_set= bitmap_is_clear_all(write_set); + DBUG_ENTER("ha_get_all_bit_in_write_clear"); + DBUG_PRINT("info", ("all bits clear = %u", all_bits_set)); + DBUG_RETURN(all_bits_set); + } + void ha_set_primary_key_in_read_set(); + int ha_allocate_read_write_set(ulong no_fields); + void ha_deallocate_read_write_set(); + void ha_clear_all_set(); uint get_index(void) const { return active_index; } virtual int open(const char *name, int mode, uint test_if_locked)=0; virtual int close(void)=0; @@ -627,6 +1044,85 @@ public: { return HA_ERR_WRONG_COMMAND; } virtual int delete_row(const byte * buf) { return HA_ERR_WRONG_COMMAND; } + /* + SYNOPSIS + start_bulk_update() + RETURN + 0 Bulk update used by handler + 1 Bulk update not used, normal operation used + */ + virtual bool start_bulk_update() { return 1; } + /* + SYNOPSIS + start_bulk_delete() + RETURN + 0 Bulk delete used by handler + 1 Bulk delete not used, normal operation used + */ + virtual bool start_bulk_delete() { return 1; } + /* + SYNOPSIS + This method is similar to update_row, however the handler doesn't need + to execute the updates at this point in time. The handler can be certain + that another call to bulk_update_row will occur OR a call to + exec_bulk_update before the set of updates in this query is concluded. + + bulk_update_row() + old_data Old record + new_data New record + dup_key_found Number of duplicate keys found + RETURN + 0 Bulk delete used by handler + 1 Bulk delete not used, normal operation used + */ + virtual int bulk_update_row(const byte *old_data, byte *new_data, + uint *dup_key_found) + { + DBUG_ASSERT(FALSE); + return HA_ERR_WRONG_COMMAND; + } + /* + SYNOPSIS + After this call all outstanding updates must be performed. The number + of duplicate key errors are reported in the duplicate key parameter. + It is allowed to continue to the batched update after this call, the + handler has to wait until end_bulk_update with changing state. + + exec_bulk_update() + dup_key_found Number of duplicate keys found + RETURN + 0 Success + >0 Error code + */ + virtual int exec_bulk_update(uint *dup_key_found) + { + DBUG_ASSERT(FALSE); + return HA_ERR_WRONG_COMMAND; + } + /* + SYNOPSIS + Perform any needed clean-up, no outstanding updates are there at the + moment. + + end_bulk_update() + RETURN + Nothing + */ + virtual void end_bulk_update() { return; } + /* + SYNOPSIS + Execute all outstanding deletes and close down the bulk delete. + + end_bulk_delete() + RETURN + 0 Success + >0 Error code + */ + virtual int end_bulk_delete() + { + DBUG_ASSERT(FALSE); + return HA_ERR_WRONG_COMMAND; + } virtual int index_read(byte * buf, const byte * key, uint key_len, enum ha_rkey_function find_flag) { return HA_ERR_WRONG_COMMAND; } @@ -677,7 +1173,6 @@ public: { return 0; } virtual int extra_opt(enum ha_extra_function operation, ulong cache_size) { return extra(operation); } - virtual int reset() { return extra(HA_EXTRA_RESET); } virtual int external_lock(THD *thd, int lock_type) { return 0; } virtual void unlock_row() {} virtual int start_stmt(THD *thd, thr_lock_type lock_type) {return 0;} @@ -739,6 +1234,20 @@ public: virtual char *update_table_comment(const char * comment) { return (char*) comment;} virtual void append_create_info(String *packet) {} + /* + SYNOPSIS + is_fk_defined_on_table_or_index() + index Index to check if foreign key uses it + RETURN VALUE + TRUE Foreign key defined on table or index + FALSE No foreign key defined + DESCRIPTION + If index == MAX_KEY then a check for table is made and if index < + MAX_KEY then a check is made if the table has foreign keys and if + a foreign key uses this index (and thus the index cannot be dropped). + */ + virtual bool is_fk_defined_on_table_or_index(uint index) + { return FALSE; } virtual char* get_foreign_key_create_info() { return(NULL);} /* gets foreign key create string from InnoDB */ /* used in ALTER TABLE; 1 if changing storage engine is allowed */ @@ -754,6 +1263,11 @@ public: virtual const char *table_type() const =0; virtual const char **bas_ext() const =0; virtual ulong table_flags(void) const =0; + virtual ulong alter_table_flags(void) const { return 0; } +#ifdef WITH_PARTITION_STORAGE_ENGINE + virtual ulong partition_flags(void) const { return 0;} + virtual int get_default_no_partitions(ulonglong max_rows) { return 1;} +#endif virtual ulong index_flags(uint idx, uint part, bool all_parts) const =0; virtual ulong index_ddl_flags(KEY *wanted_index) const { return (HA_DDL_SUPPORT); } @@ -791,9 +1305,24 @@ public: */ virtual int rename_table(const char *from, const char *to); virtual int delete_table(const char *name); + virtual void drop_table(const char *name); virtual int create(const char *name, TABLE *form, HA_CREATE_INFO *info)=0; + virtual int create_handler_files(const char *name) { return FALSE;} + /* + SYNOPSIS + drop_partitions() + path Complete path of db and table name + RETURN VALUE + TRUE Failure + FALSE Success + DESCRIPTION + Drop a partition, during this operation no other activity is ongoing + in this server on the table. + */ + virtual int drop_partitions(const char *path) + { return HA_ERR_WRONG_COMMAND; } /* lock_count() can be more than one if the table is a MERGE */ virtual uint lock_count(void) const { return 1; } virtual THR_LOCK_DATA **store_lock(THD *thd, @@ -857,6 +1386,9 @@ public: Pops the top if condition stack, if stack is not empty */ virtual void cond_pop() { return; }; + virtual bool check_if_incompatible_data(HA_CREATE_INFO *create_info, + uint table_changes) + { return COMPATIBLE_DATA_NO; } }; /* Some extern variables used with handlers */ @@ -888,13 +1420,16 @@ int ha_panic(enum ha_panic_function flag); int ha_update_statistics(); void ha_close_connection(THD* thd); my_bool ha_storage_engine_is_enabled(enum db_type database_type); -bool ha_flush_logs(void); +bool ha_flush_logs(enum db_type db_type=DB_TYPE_DEFAULT); void ha_drop_database(char* path); int ha_create_table(const char *name, HA_CREATE_INFO *create_info, bool update_create_info); int ha_delete_table(THD *thd, enum db_type db_type, const char *path, const char *alias, bool generate_warning); +/* statistics and info */ +bool ha_show_status(THD *thd, enum db_type db_type, enum ha_stat_type stat); + /* discovery */ int ha_create_table_from_engine(THD* thd, const char *db, const char *name); int ha_discover(THD* thd, const char* dbname, const char* name, diff --git a/sql/handlerton.cc.in b/sql/handlerton.cc.in new file mode 100644 index 00000000000..55af8cdd8cf --- /dev/null +++ b/sql/handlerton.cc.in @@ -0,0 +1,14 @@ + +#include "mysql_priv.h" + +extern handlerton heap_hton,myisam_hton,myisammrg_hton, + binlog_hton@mysql_se_decls@; + +/* + This array is used for processing compiled in engines. +*/ +handlerton *sys_table_types[]= +{ + &heap_hton,&myisam_hton@mysql_se_htons@,&myisammrg_hton,&binlog_hton,NULL +}; + diff --git a/sql/item.cc b/sql/item.cc index 6d5855cd0ca..a1213d60aa8 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -3464,13 +3464,18 @@ bool Item_field::fix_fields(THD *thd, Item **reference) set_field(from_field); } - else if (thd->set_query_id && field->query_id != thd->query_id) + else if (thd->set_query_id) { - /* We only come here in unions */ - TABLE *table=field->table; - field->query_id=thd->query_id; - table->used_fields++; - table->used_keys.intersect(field->part_of_key); + TABLE *table= field->table; + table->file->ha_set_bit_in_rw_set(field->fieldnr, + (bool)(thd->set_query_id-1)); + if (field->query_id != thd->query_id) + { + /* We only come here in unions */ + field->query_id=thd->query_id; + table->used_fields++; + table->used_keys.intersect(field->part_of_key); + } } #ifndef NO_EMBEDDED_ACCESS_CHECKS if (any_privileges) diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc index f2d6f2b5899..8c4bc996100 100644 --- a/sql/item_subselect.cc +++ b/sql/item_subselect.cc @@ -1609,7 +1609,7 @@ int subselect_uniquesubquery_engine::exec() } if (!table->file->inited) - table->file->ha_index_init(tab->ref.key); + table->file->ha_index_init(tab->ref.key, 0); error= table->file->index_read(table->record[0], tab->ref.key_buff, tab->ref.key_length,HA_READ_KEY_EXACT); @@ -1662,7 +1662,7 @@ int subselect_indexsubquery_engine::exec() } if (!table->file->inited) - table->file->ha_index_init(tab->ref.key); + table->file->ha_index_init(tab->ref.key, 1); error= table->file->index_read(table->record[0], tab->ref.key_buff, tab->ref.key_length,HA_READ_KEY_EXACT); diff --git a/sql/key.cc b/sql/key.cc index 9d86095f33e..5d88ea0a9c3 100644 --- a/sql/key.cc +++ b/sql/key.cc @@ -429,3 +429,86 @@ int key_cmp(KEY_PART_INFO *key_part, const byte *key, uint key_length) } return 0; // Keys are equal } + + +/* + Compare two records in index order + SYNOPSIS + key_rec_cmp() + key Index information + rec0 Pointer to table->record[0] + first_rec Pointer to record compare with + second_rec Pointer to record compare against first_rec + DESCRIPTION + This method is set-up such that it can be called directly from the + priority queue and it is attempted to be optimised as much as possible + since this will be called O(N * log N) times while performing a merge + sort in various places in the code. + + We retrieve the pointer to table->record[0] using the fact that key_parts + have an offset making it possible to calculate the start of the record. + We need to get the diff to the compared record since none of the records + being compared are stored in table->record[0]. + + We first check for NULL values, if there are no NULL values we use + a compare method that gets two field pointers and a max length + and return the result of the comparison. +*/ + +int key_rec_cmp(void *key, byte *first_rec, byte *second_rec) +{ + KEY *key_info= (KEY*)key; + uint key_parts= key_info->key_parts, i= 0; + KEY_PART_INFO *key_part= key_info->key_part; + char *rec0= key_part->field->ptr - key_part->offset; + my_ptrdiff_t first_diff= first_rec - (byte*)rec0, sec_diff= second_rec - (byte*)rec0; + int result= 0; + DBUG_ENTER("key_rec_cmp"); + + do + { + Field *field= key_part->field; + uint length; + + if (key_part->null_bit) + { + /* The key_part can contain NULL values */ + bool first_is_null= field->is_null_in_record_with_offset(first_diff); + bool sec_is_null= field->is_null_in_record_with_offset(sec_diff); + /* + NULL is smaller then everything so if first is NULL and the other + not then we know that we should return -1 and for the opposite + we should return +1. If both are NULL then we call it equality + although it is a strange form of equality, we have equally little + information of the real value. + */ + if (!first_is_null) + { + if (!sec_is_null) + ; /* Fall through, no NULL fields */ + else + { + DBUG_RETURN(+1); + } + } + else if (!sec_is_null) + { + DBUG_RETURN(-1); + } + else + goto next_loop; /* Both were NULL */ + } + /* + No null values in the fields + We use the virtual method cmp_max with a max length parameter. + For most field types this translates into a cmp without + max length. The exceptions are the BLOB and VARCHAR field types + that take the max length into account. + */ + result= field->cmp_max(field->ptr+first_diff, field->ptr+sec_diff, + key_part->length); +next_loop: + key_part++; + } while (!result && ++i < key_parts); + DBUG_RETURN(result); +} diff --git a/sql/lex.h b/sql/lex.h index efcb9b84f81..e3cbebf4629 100644 --- a/sql/lex.h +++ b/sql/lex.h @@ -74,6 +74,7 @@ static SYMBOL symbols[] = { { "ASC", SYM(ASC)}, { "ASCII", SYM(ASCII_SYM)}, { "ASENSITIVE", SYM(ASENSITIVE_SYM)}, + { "AUTHORS", SYM(AUTHORS_SYM)}, { "AUTO_INCREMENT", SYM(AUTO_INC)}, { "AVG", SYM(AVG_SYM)}, { "AVG_ROW_LENGTH", SYM(AVG_ROW_LENGTH)}, @@ -110,6 +111,7 @@ static SYMBOL symbols[] = { { "CIPHER", SYM(CIPHER_SYM)}, { "CLIENT", SYM(CLIENT_SYM)}, { "CLOSE", SYM(CLOSE_SYM)}, + { "COALESCE", SYM(COALESCE)}, { "CODE", SYM(CODE_SYM)}, { "COLLATE", SYM(COLLATE_SYM)}, { "COLLATION", SYM(COLLATION_SYM)}, @@ -246,6 +248,7 @@ static SYMBOL symbols[] = { { "INSENSITIVE", SYM(INSENSITIVE_SYM)}, { "INSERT", SYM(INSERT)}, { "INSERT_METHOD", SYM(INSERT_METHOD)}, + { "INSTALL", SYM(INSTALL_SYM)}, { "INT", SYM(INT_SYM)}, { "INT1", SYM(TINYINT)}, { "INT2", SYM(SMALLINT)}, @@ -275,11 +278,14 @@ static SYMBOL symbols[] = { { "LEAVE", SYM(LEAVE_SYM)}, { "LEAVES", SYM(LEAVES)}, { "LEFT", SYM(LEFT)}, + { "LESS", SYM(LESS_SYM)}, { "LEVEL", SYM(LEVEL_SYM)}, { "LIKE", SYM(LIKE)}, { "LIMIT", SYM(LIMIT)}, + { "LINEAR", SYM(LINEAR_SYM)}, { "LINES", SYM(LINES)}, { "LINESTRING", SYM(LINESTRING)}, + { "LIST", SYM(LIST_SYM)}, { "LOAD", SYM(LOAD)}, { "LOCAL", SYM(LOCAL_SYM)}, { "LOCALTIME", SYM(NOW_SYM)}, @@ -313,6 +319,7 @@ static SYMBOL symbols[] = { { "MAX_ROWS", SYM(MAX_ROWS)}, { "MAX_UPDATES_PER_HOUR", SYM(MAX_UPDATES_PER_HOUR)}, { "MAX_USER_CONNECTIONS", SYM(MAX_USER_CONNECTIONS_SYM)}, + { "MAXVALUE", SYM(MAX_VALUE_SYM)}, { "MEDIUM", SYM(MEDIUM_SYM)}, { "MEDIUMBLOB", SYM(MEDIUMBLOB)}, { "MEDIUMINT", SYM(MEDIUMINT)}, @@ -344,6 +351,7 @@ static SYMBOL symbols[] = { { "NEW", SYM(NEW_SYM)}, { "NEXT", SYM(NEXT_SYM)}, { "NO", SYM(NO_SYM)}, + { "NODEGROUP", SYM(NODEGROUP_SYM)}, { "NONE", SYM(NONE_SYM)}, { "NOT", SYM(NOT_SYM)}, { "NO_WRITE_TO_BINLOG", SYM(NO_WRITE_TO_BINLOG)}, @@ -365,9 +373,13 @@ static SYMBOL symbols[] = { { "OUTER", SYM(OUTER)}, { "OUTFILE", SYM(OUTFILE)}, { "PACK_KEYS", SYM(PACK_KEYS_SYM)}, + { "PARSER", SYM(PARSER_SYM)}, { "PARTIAL", SYM(PARTIAL)}, + { "PARTITION", SYM(PARTITION_SYM)}, + { "PARTITIONS", SYM(PARTITIONS_SYM)}, { "PASSWORD", SYM(PASSWORD)}, { "PHASE", SYM(PHASE_SYM)}, + { "PLUGIN", SYM(PLUGIN_SYM)}, { "POINT", SYM(POINT_SYM)}, { "POLYGON", SYM(POLYGON)}, { "PRECISION", SYM(PRECISION)}, @@ -386,6 +398,7 @@ static SYMBOL symbols[] = { { "RAID_CHUNKS", SYM(RAID_CHUNKS)}, { "RAID_CHUNKSIZE", SYM(RAID_CHUNKSIZE)}, { "RAID_TYPE", SYM(RAID_TYPE)}, + { "RANGE", SYM(RANGE_SYM)}, { "READ", SYM(READ_SYM)}, { "READS", SYM(READS_SYM)}, { "REAL", SYM(REAL)}, @@ -399,6 +412,7 @@ static SYMBOL symbols[] = { { "RELEASE", SYM(RELEASE_SYM)}, { "RELOAD", SYM(RELOAD)}, { "RENAME", SYM(RENAME)}, + { "REORGANISE", SYM(REORGANISE_SYM)}, { "REPAIR", SYM(REPAIR)}, { "REPEATABLE", SYM(REPEATABLE_SYM)}, { "REPLACE", SYM(REPLACE)}, @@ -443,7 +457,7 @@ static SYMBOL symbols[] = { { "SNAPSHOT", SYM(SNAPSHOT_SYM)}, { "SMALLINT", SYM(SMALLINT)}, { "SOME", SYM(ANY_SYM)}, - { "SONAME", SYM(UDF_SONAME_SYM)}, + { "SONAME", SYM(SONAME_SYM)}, { "SOUNDS", SYM(SOUNDS_SYM)}, { "SPATIAL", SYM(SPATIAL_SYM)}, { "SPECIFIC", SYM(SPECIFIC_SYM)}, @@ -477,6 +491,8 @@ static SYMBOL symbols[] = { { "STRING", SYM(STRING_SYM)}, { "STRIPED", SYM(RAID_STRIPED_SYM)}, { "SUBJECT", SYM(SUBJECT_SYM)}, + { "SUBPARTITION", SYM(SUBPARTITION_SYM)}, + { "SUBPARTITIONS", SYM(SUBPARTITIONS_SYM)}, { "SUPER", SYM(SUPER_SYM)}, { "SUSPEND", SYM(SUSPEND_SYM)}, { "TABLE", SYM(TABLE_SYM)}, @@ -486,6 +502,7 @@ static SYMBOL symbols[] = { { "TEMPTABLE", SYM(TEMPTABLE_SYM)}, { "TERMINATED", SYM(TERMINATED)}, { "TEXT", SYM(TEXT_SYM)}, + { "THAN", SYM(THAN_SYM)}, { "THEN", SYM(THEN_SYM)}, { "TIME", SYM(TIME_SYM)}, { "TIMESTAMP", SYM(TIMESTAMP)}, @@ -511,6 +528,7 @@ static SYMBOL symbols[] = { { "UNIQUE", SYM(UNIQUE_SYM)}, { "UNKNOWN", SYM(UNKNOWN_SYM)}, { "UNLOCK", SYM(UNLOCK_SYM)}, + { "UNINSTALL", SYM(UNINSTALL_SYM)}, { "UNSIGNED", SYM(UNSIGNED)}, { "UNTIL", SYM(UNTIL_SYM)}, { "UPDATE", SYM(UPDATE_SYM)}, @@ -577,7 +595,6 @@ static SYMBOL sql_functions[] = { { "CENTROID", F_SYM(FUNC_ARG1),0,CREATE_FUNC_GEOM(create_func_centroid)}, { "CHAR_LENGTH", F_SYM(FUNC_ARG1),0,CREATE_FUNC(create_func_char_length)}, { "CHARACTER_LENGTH", F_SYM(FUNC_ARG1),0,CREATE_FUNC(create_func_char_length)}, - { "COALESCE", SYM(COALESCE)}, { "COERCIBILITY", F_SYM(FUNC_ARG1),0,CREATE_FUNC(create_func_coercibility)}, { "COMPRESS", F_SYM(FUNC_ARG1),0,CREATE_FUNC(create_func_compress)}, { "CONCAT", SYM(CONCAT)}, diff --git a/sql/lock.cc b/sql/lock.cc index fe8dcb3aa5e..dc914d1b5f8 100644 --- a/sql/lock.cc +++ b/sql/lock.cc @@ -72,7 +72,7 @@ TODO: #ifndef MASTER #include "../srclib/myisammrg/myrg_def.h" #else -#include "../myisammrg/myrg_def.h" +#include "../storage/myisammrg/myrg_def.h" #endif static MYSQL_LOCK *get_lock_data(THD *thd, TABLE **table,uint count, @@ -146,6 +146,7 @@ MYSQL_LOCK *mysql_lock_tables(THD *thd, TABLE **tables, uint count, } thd->proc_info="System lock"; + DBUG_PRINT("info", ("thd->proc_info %s", thd->proc_info)); if (lock_external(thd, tables, count)) { my_free((gptr) sql_lock,MYF(0)); @@ -153,6 +154,7 @@ MYSQL_LOCK *mysql_lock_tables(THD *thd, TABLE **tables, uint count, break; } thd->proc_info="Table lock"; + DBUG_PRINT("info", ("thd->proc_info %s", thd->proc_info)); thd->locked=1; rc= thr_lock_errno_to_mysql[(int) thr_multi_lock(sql_lock->locks, sql_lock->lock_count, @@ -218,6 +220,7 @@ static int lock_external(THD *thd, TABLE **tables, uint count) int lock_type,error; DBUG_ENTER("lock_external"); + DBUG_PRINT("info", ("count %d", count)); for (i=1 ; i <= count ; i++, tables++) { DBUG_ASSERT((*tables)->reginfo.lock_type >= TL_READ); @@ -226,7 +229,6 @@ static int lock_external(THD *thd, TABLE **tables, uint count) ((*tables)->reginfo.lock_type >= TL_READ && (*tables)->reginfo.lock_type <= TL_READ_NO_INSERT)) lock_type=F_RDLCK; - if ((error=(*tables)->file->external_lock(thd,lock_type))) { print_lock_error(error, (*tables)->file->table_type()); @@ -461,6 +463,8 @@ static MYSQL_LOCK *get_lock_data(THD *thd, TABLE **table_ptr, uint count, THR_LOCK_DATA **locks; TABLE **to; + DBUG_ENTER("get_lock_data"); + DBUG_PRINT("info", ("count %d", count)); *write_lock_used=0; for (i=tables=lock_count=0 ; i < count ; i++) { @@ -480,7 +484,7 @@ static MYSQL_LOCK *get_lock_data(THD *thd, TABLE **table_ptr, uint count, { my_error(ER_WRONG_LOCK_OF_SYSTEM_TABLE, MYF(0), table_ptr[i]->s->db, table_ptr[i]->s->table_name); - return 0; + DBUG_RETURN(0); } } @@ -488,11 +492,13 @@ static MYSQL_LOCK *get_lock_data(THD *thd, TABLE **table_ptr, uint count, my_malloc(sizeof(*sql_lock)+ sizeof(THR_LOCK_DATA*)*tables+sizeof(table_ptr)*lock_count, MYF(0)))) - return 0; + DBUG_RETURN(0); locks=sql_lock->locks=(THR_LOCK_DATA**) (sql_lock+1); to=sql_lock->table=(TABLE**) (locks+tables); sql_lock->table_count=lock_count; sql_lock->lock_count=tables; + DBUG_PRINT("info", ("sql_lock->table_count %d sql_lock->lock_count %d", + sql_lock->table_count, sql_lock->lock_count)); for (i=0 ; i < count ; i++) { @@ -508,7 +514,7 @@ static MYSQL_LOCK *get_lock_data(THD *thd, TABLE **table_ptr, uint count, { my_error(ER_OPEN_AS_READONLY, MYF(0), table->alias); my_free((gptr) sql_lock,MYF(0)); - return 0; + DBUG_RETURN(0); } } THR_LOCK_DATA **org_locks = locks; @@ -518,7 +524,7 @@ static MYSQL_LOCK *get_lock_data(THD *thd, TABLE **table_ptr, uint count, for ( ; org_locks != locks ; org_locks++) (*org_locks)->debug_print_param= (void *) table; } - return sql_lock; + DBUG_RETURN(sql_lock); } diff --git a/sql/log.cc b/sql/log.cc index 6c37cb04c61..1fc7ba1f1fd 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -24,6 +24,7 @@ #include "mysql_priv.h" #include "sql_repl.h" +#include "rpl_filter.h" #include <my_dir.h> #include <stdarg.h> @@ -50,7 +51,7 @@ handlerton binlog_hton = { "binlog", SHOW_OPTION_YES, "This is a meta storage engine to represent the binlog in a transaction", - DB_TYPE_UNKNOWN, /* IGNORE for now */ + DB_TYPE_BINLOG, /* IGNORE for now */ binlog_init, 0, sizeof(my_off_t), /* savepoint size = binlog offset */ @@ -67,9 +68,19 @@ handlerton binlog_hton = { NULL, /* create_cursor_read_view */ NULL, /* set_cursor_read_view */ NULL, /* close_cursor_read_view */ - HTON_HIDDEN + NULL, /* Create a new handler */ + NULL, /* Drop a database */ + NULL, /* Panic call */ + NULL, /* Release temporary latches */ + NULL, /* Update Statistics */ + NULL, /* Start Consistent Snapshot */ + NULL, /* Flush logs */ + NULL, /* Show status */ + NULL, /* Replication Report Sent Binlog */ + HTON_NOT_USER_SELECTABLE | HTON_HIDDEN }; + /* this function is mostly a placeholder. conceptually, binlog initialization (now mostly done in MYSQL_LOG::open) @@ -1619,10 +1630,11 @@ bool MYSQL_LOG::write(Log_event *event_info) binlog_[wild_]{do|ignore}_table?" (WL#1049)" */ if ((thd && !(thd->options & OPTION_BIN_LOG)) || - (!db_ok(local_db, binlog_do_db, binlog_ignore_db))) + (!binlog_filter->db_ok(local_db))) { VOID(pthread_mutex_unlock(&LOCK_log)); - DBUG_PRINT("error",("!db_ok('%s')", local_db)); + DBUG_PRINT("info",("db_ok('%s')==%d", local_db, + binlog_filter->db_ok(local_db))); DBUG_RETURN(0); } #endif /* HAVE_REPLICATION */ diff --git a/sql/log_event.cc b/sql/log_event.cc index 056bcca1a02..c8f8ff40700 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -23,6 +23,7 @@ #include "mysql_priv.h" #include "slave.h" +#include "rpl_filter.h" #include <my_dir.h> #endif /* MYSQL_CLIENT */ @@ -1595,7 +1596,7 @@ int Query_log_event::exec_event(struct st_relay_log_info* rli, const char *query */ thd->catalog= catalog_len ? (char *) catalog : (char *)""; thd->db_length= db_len; - thd->db= (char*) rewrite_db(db, &thd->db_length); + thd->db= (char *) rpl_filter->get_rewrite_db(db, &thd->db_length); thd->variables.auto_increment_increment= auto_increment_increment; thd->variables.auto_increment_offset= auto_increment_offset; @@ -1624,7 +1625,7 @@ int Query_log_event::exec_event(struct st_relay_log_info* rli, const char *query ::exec_event(), then the companion SET also have so we don't need to reset_one_shot_variables(). */ - if (db_ok(thd->db, replicate_do_db, replicate_ignore_db)) + if (rpl_filter->db_ok(thd->db)) { thd->set_time((time_t)when); thd->query_length= q_len_arg; @@ -2745,7 +2746,7 @@ int Load_log_event::exec_event(NET* net, struct st_relay_log_info* rli, bool use_rli_only_for_errors) { thd->db_length= db_len; - thd->db= (char*) rewrite_db(db, &thd->db_length); + thd->db= (char *) rpl_filter->get_rewrite_db(db, &thd->db_length); DBUG_ASSERT(thd->query == 0); thd->query_length= 0; // Should not be needed thd->query_error= 0; @@ -2784,7 +2785,7 @@ int Load_log_event::exec_event(NET* net, struct st_relay_log_info* rli, ::exec_event(), then the companion SET also have so we don't need to reset_one_shot_variables(). */ - if (db_ok(thd->db, replicate_do_db, replicate_ignore_db)) + if (rpl_filter->db_ok(thd->db)) { thd->set_time((time_t)when); VOID(pthread_mutex_lock(&LOCK_thread_count)); @@ -2806,7 +2807,7 @@ int Load_log_event::exec_event(NET* net, struct st_relay_log_info* rli, tables.updating= 1; // the table will be opened in mysql_load - if (table_rules_on && !tables_ok(thd, &tables)) + if (rpl_filter->is_on() && !rpl_filter->tables_ok(thd->db, &tables)) { // TODO: this is a bug - this needs to be moved to the I/O thread if (net) diff --git a/sql/mysql_priv.h b/sql/mysql_priv.h index 2a65b99585a..c45035da5af 100644 --- a/sql/mysql_priv.h +++ b/sql/mysql_priv.h @@ -185,11 +185,6 @@ extern CHARSET_INFO *national_charset_info, *table_alias_charset; #define FLUSH_TIME 0 /* Don't flush tables */ #define MAX_CONNECT_ERRORS 10 // errors before disabling host -#ifdef HAVE_INNOBASE_DB -#define IF_INNOBASE_DB(A, B) (A) -#else -#define IF_INNOBASE_DB(A, B) (B) -#endif #ifdef __NETWARE__ #define IF_NETWARE(A,B) (A) #else @@ -408,6 +403,13 @@ void view_store_options(THD *thd, st_table_list *table, String *buff); #define STRING_BUFFER_USUAL_SIZE 80 +/* + Some defines for exit codes for ::is_equal class functions. +*/ +#define IS_EQUAL_NO 0 +#define IS_EQUAL_YES 1 +#define IS_EQUAL_PACK_LENGTH 2 + enum enum_parsing_place { NO_MATTER, @@ -488,6 +490,7 @@ typedef my_bool (*qc_engine_callback)(THD *thd, char *table_key, #include "sql_error.h" #include "field.h" /* Field definitions */ #include "protocol.h" +#include "sql_plugin.h" #include "sql_udf.h" class user_var_entry; class Security_context; @@ -641,6 +644,22 @@ bool check_table_access(THD *thd, ulong want_access, TABLE_LIST *tables, bool no_errors); bool check_global_access(THD *thd, ulong want_access); +/* + Support routine for SQL parser on partitioning syntax +*/ +my_bool is_partition_management(LEX *lex); +/* + General routine to change field->ptr of a NULL-terminated array of Field + objects. Useful when needed to call val_int, val_str or similar and the + field data is not in table->record[0] but in some other structure. + set_key_field_ptr changes all fields of an index using a key_info object. + All methods presume that there is at least one field to change. +*/ + +void set_field_ptr(Field **ptr, const byte *new_buf, const byte *old_buf); +void set_key_field_ptr(KEY *key_info, const byte *new_buf, + const byte *old_buf); + bool mysql_backup_table(THD* thd, TABLE_LIST* table_list); bool mysql_restore_table(THD* thd, TABLE_LIST* table_list); @@ -802,6 +821,8 @@ find_field_in_table(THD *thd, TABLE *table, const char *name, uint length, bool check_grants, bool allow_rowid, uint *cached_field_index_ptr, Security_context *sctx); +Field * +find_field_in_table_sef(TABLE *table, const char *name); #ifdef HAVE_OPENSSL #include <openssl/des.h> @@ -843,6 +864,7 @@ int mysqld_show_variables(THD *thd,const char *wild); int mysql_find_files(THD *thd,List<char> *files, const char *db, const char *path, const char *wild, bool dir); bool mysqld_show_storage_engines(THD *thd); +bool mysqld_show_authors(THD *thd); bool mysqld_show_privileges(THD *thd); bool mysqld_show_column_types(THD *thd); bool mysqld_help (THD *thd, const char *text); @@ -945,10 +967,10 @@ bool setup_tables(THD *thd, Name_resolution_context *context, int setup_wild(THD *thd, TABLE_LIST *tables, List<Item> &fields, List<Item> *sum_func_list, uint wild_num); bool setup_fields(THD *thd, Item** ref_pointer_array, - List<Item> &item, bool set_query_id, + List<Item> &item, ulong set_query_id, List<Item> *sum_func_list, bool allow_sum_func); inline bool setup_fields_with_no_wrap(THD *thd, Item **ref_pointer_array, - List<Item> &item, bool set_query_id, + List<Item> &item, ulong set_query_id, List<Item> *sum_func_list, bool allow_sum_func) { @@ -1050,6 +1072,8 @@ extern ulong volatile manager_status; extern bool volatile manager_thread_in_use, mqh_used; extern pthread_t manager_thread; pthread_handler_t handle_manager(void *arg); +bool mysql_manager_submit(void (*action)()); + /* sql_test.cc */ #ifndef DBUG_OFF @@ -1069,6 +1093,7 @@ bool key_cmp_if_same(TABLE *form,const byte *key,uint index,uint key_length); void key_unpack(String *to,TABLE *form,uint index); bool check_if_key_used(TABLE *table, uint idx, List<Item> &fields); int key_cmp(KEY_PART_INFO *key_part, const byte *key, uint key_length); +int key_rec_cmp(void *key_info, byte *a, byte *b); bool init_errmessage(void); void sql_perror(const char *message); @@ -1209,6 +1234,9 @@ extern pthread_mutex_t LOCK_mysql_create_db,LOCK_Acl,LOCK_open, #ifdef HAVE_OPENSSL extern pthread_mutex_t LOCK_des_key_file; #endif +extern pthread_mutex_t LOCK_server_started; +extern pthread_cond_t COND_server_started; +extern int mysqld_server_started; extern rw_lock_t LOCK_grant, LOCK_sys_init_connect, LOCK_sys_init_slave; extern pthread_cond_t COND_refresh, COND_thread_count, COND_manager; extern pthread_attr_t connection_attrib; @@ -1229,18 +1257,68 @@ extern KNOWN_DATE_TIME_FORMAT known_date_time_formats[]; extern String null_string; extern HASH open_cache; extern TABLE *unused_tables; -extern I_List<i_string> binlog_do_db, binlog_ignore_db; extern const char* any_db; extern struct my_option my_long_options[]; extern const LEX_STRING view_type; /* optional things, have_* variables */ -extern SHOW_COMP_OPTION have_isam, have_innodb, have_berkeley_db; -extern SHOW_COMP_OPTION have_example_db, have_archive_db, have_csv_db; +#ifdef WITH_INNOBASE_STORAGE_ENGINE +extern handlerton innobase_hton; +#define have_innodb innobase_hton.state +#else +extern SHOW_COMP_OPTION have_innodb; +#endif +#ifdef WITH_BERKELEY_STORAGE_ENGINE +extern handlerton berkeley_hton; +#define have_berkeley_db berkeley_hton.state +#else +extern SHOW_COMP_OPTION have_berkeley_db; +#endif +#ifdef WITH_EXAMPLE_STORAGE_ENGINE +extern handlerton example_hton; +#define have_example_db example_hton.state +#else +extern SHOW_COMP_OPTION have_example_db; +#endif +#ifdef WITH_ARCHIVE_STORAGE_ENGINE +extern handlerton archive_hton; +#define have_archive_db archive_hton.state +#else +extern SHOW_COMP_OPTION have_archive_db; +#endif +#ifdef WITH_CSV_STORAGE_ENGINE +extern handlerton tina_hton; +#define have_csv_db tina_hton.state +#else +extern SHOW_COMP_OPTION have_csv_db; +#endif +#ifdef WITH_FEDERATED_STORAGE_ENGINE +extern handlerton federated_hton; +#define have_federated_db federated_hton.state +#else extern SHOW_COMP_OPTION have_federated_db; +#endif +#ifdef WITH_BLACKHOLE_STORAGE_ENGINE +extern handlerton blackhole_hton; +#define have_blackhole_db blackhole_hton.state +#else extern SHOW_COMP_OPTION have_blackhole_db; +#endif +#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE +extern handlerton ndbcluster_hton; +#define have_ndbcluster ndbcluster_hton.state +#else extern SHOW_COMP_OPTION have_ndbcluster; +#endif +#ifdef WITH_PARTITION_STORAGE_ENGINE +extern handlerton partition_hton; +#define have_partition_db partition_hton.state +#else +extern SHOW_COMP_OPTION have_partition_db; +#endif + +extern SHOW_COMP_OPTION have_isam; extern SHOW_COMP_OPTION have_raid, have_openssl, have_symlink; extern SHOW_COMP_OPTION have_query_cache; extern SHOW_COMP_OPTION have_geometry, have_rtree_keys; @@ -1301,7 +1379,7 @@ int rea_create_table(THD *thd, my_string file_name, const char *db, const char *table, HA_CREATE_INFO *create_info, List<create_field> &create_field, - uint key_count,KEY *key_info); + uint key_count,KEY *key_info, handler *file); int format_number(uint inputflag,uint max_length,my_string pos,uint length, my_string *errpos); int openfrm(THD *thd, const char *name,const char *alias,uint filestat, diff --git a/sql/mysqld.cc b/sql/mysqld.cc index b5b95e48889..abcab00af3c 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -19,28 +19,21 @@ #include <my_dir.h> #include "slave.h" #include "sql_repl.h" +#include "rpl_filter.h" #include "repl_failsafe.h" #include "stacktrace.h" #include "mysqld_suffix.h" #include "mysys_err.h" -#ifdef HAVE_BERKELEY_DB -#include "ha_berkeley.h" -#endif -#ifdef HAVE_INNOBASE_DB -#include "ha_innodb.h" -#endif + #include "ha_myisam.h" -#ifdef HAVE_NDBCLUSTER_DB -#include "ha_ndbcluster.h" -#endif -#ifdef HAVE_INNOBASE_DB +#ifdef WITH_INNOBASE_STORAGE_ENGINE #define OPT_INNODB_DEFAULT 1 #else #define OPT_INNODB_DEFAULT 0 #endif #define OPT_BDB_DEFAULT 0 -#ifdef HAVE_NDBCLUSTER_DB +#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE #define OPT_NDBCLUSTER_DEFAULT 0 #if defined(NOT_ENOUGH_TESTED) \ && defined(NDB_SHM_TRANSPORTER) && MYSQL_VERSION_ID >= 50000 @@ -329,7 +322,7 @@ static I_List<THD> thread_cache; static pthread_cond_t COND_thread_cache, COND_flush_thread_cache; -#ifdef HAVE_BERKELEY_DB +#ifdef WITH_BERKELEY_STORAGE_ENGINE static my_bool opt_sync_bdb_logs; #endif @@ -354,7 +347,64 @@ my_bool opt_safe_user_create = 0, opt_no_mix_types = 0; my_bool opt_show_slave_auth_info, opt_sql_bin_update = 0; my_bool opt_log_slave_updates= 0; my_bool opt_innodb; -#ifdef HAVE_NDBCLUSTER_DB +#ifdef WITH_INNOBASE_STORAGE_ENGINE +extern struct show_var_st innodb_status_variables[]; +extern uint innobase_init_flags, innobase_lock_type; +extern uint innobase_flush_log_at_trx_commit; +extern ulong innobase_cache_size, innobase_fast_shutdown; +extern ulong innobase_large_page_size; +extern char *innobase_home, *innobase_tmpdir, *innobase_logdir; +extern long innobase_lock_scan_time; +extern long innobase_mirrored_log_groups, innobase_log_files_in_group; +extern long innobase_log_file_size, innobase_log_buffer_size; +extern long innobase_buffer_pool_size, innobase_additional_mem_pool_size; +extern long innobase_buffer_pool_awe_mem_mb; +extern long innobase_file_io_threads, innobase_lock_wait_timeout; +extern long innobase_force_recovery; +extern long innobase_open_files; +extern char *innobase_data_home_dir, *innobase_data_file_path; +extern char *innobase_log_group_home_dir, *innobase_log_arch_dir; +extern char *innobase_unix_file_flush_method; +/* The following variables have to be my_bool for SHOW VARIABLES to work */ +extern my_bool innobase_log_archive, + innobase_use_doublewrite, + innobase_use_checksums, + innobase_use_large_pages, + innobase_use_native_aio, + innobase_file_per_table, innobase_locks_unsafe_for_binlog, + innobase_create_status_file; +extern my_bool innobase_very_fast_shutdown; /* set this to 1 just before + calling innobase_end() if you want + InnoDB to shut down without + flushing the buffer pool: this + is equivalent to a 'crash' */ +extern "C" { +extern ulong srv_max_buf_pool_modified_pct; +extern ulong srv_max_purge_lag; +extern ulong srv_auto_extend_increment; +extern ulong srv_n_spin_wait_rounds; +extern ulong srv_n_free_tickets_to_enter; +extern ulong srv_thread_sleep_delay; +extern ulong srv_thread_concurrency; +extern ulong srv_commit_concurrency; +} +#endif +#ifdef WITH_BERKELEY_STORAGE_ENGINE +#ifndef HAVE_U_INT32_T +typedef unsigned int u_int32_t; +#endif +extern const u_int32_t bdb_DB_TXN_NOSYNC, bdb_DB_RECOVER, bdb_DB_PRIVATE; +extern bool berkeley_shared_data; +extern u_int32_t berkeley_init_flags,berkeley_env_flags, berkeley_lock_type, + berkeley_lock_types[]; +extern ulong berkeley_max_lock, berkeley_log_buffer_size; +extern ulonglong berkeley_cache_size; +extern ulong berkeley_region_size, berkeley_cache_parts; +extern char *berkeley_home, *berkeley_tmpdir, *berkeley_logdir; +extern long berkeley_lock_scan_time; +extern TYPELIB berkeley_lock_typelib; +#endif +#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE const char *opt_ndbcluster_connectstring= 0; const char *opt_ndb_connectstring= 0; char opt_ndb_constrbuf[1024]; @@ -363,6 +413,12 @@ my_bool opt_ndb_shm, opt_ndb_optimized_node_selection; ulong opt_ndb_cache_check_time; const char *opt_ndb_mgmd; ulong opt_ndb_nodeid; + +extern struct show_var_st ndb_status_variables[]; +extern const char *ndb_distribution_names[]; +extern TYPELIB ndb_distribution_typelib; +extern const char *opt_ndb_distribution; +extern enum ndb_distribution opt_ndb_distribution_id; #endif my_bool opt_readonly, use_temp_pool, relay_log_purge; my_bool opt_sync_frm, opt_allow_suspicious_udfs; @@ -452,12 +508,10 @@ FILE *bootstrap_file; int bootstrap_error; FILE *stderror_file=0; -I_List<i_string_pair> replicate_rewrite_db; -I_List<i_string> replicate_do_db, replicate_ignore_db; -// allow the user to tell us which db to replicate and which to ignore -I_List<i_string> binlog_do_db, binlog_ignore_db; I_List<THD> threads; I_List<NAMED_LIST> key_caches; +Rpl_filter* rpl_filter; +Rpl_filter* binlog_filter; struct system_variables global_system_variables; struct system_variables max_system_variables; @@ -469,13 +523,9 @@ MY_BITMAP temp_pool; CHARSET_INFO *system_charset_info, *files_charset_info ; CHARSET_INFO *national_charset_info, *table_alias_charset; -SHOW_COMP_OPTION have_berkeley_db, have_innodb, have_isam, have_ndbcluster, - have_example_db, have_archive_db, have_csv_db; -SHOW_COMP_OPTION have_federated_db; SHOW_COMP_OPTION have_raid, have_openssl, have_symlink, have_query_cache; SHOW_COMP_OPTION have_geometry, have_rtree_keys; SHOW_COMP_OPTION have_crypt, have_compress; -SHOW_COMP_OPTION have_blackhole_db; /* Thread specific variables */ @@ -495,6 +545,10 @@ rw_lock_t LOCK_grant, LOCK_sys_init_connect, LOCK_sys_init_slave; pthread_cond_t COND_refresh,COND_thread_count; pthread_t signal_thread; pthread_attr_t connection_attrib; +pthread_mutex_t LOCK_server_started; +pthread_cond_t COND_server_started; + +int mysqld_server_started= 0; File_parser_dummy_hook file_parser_dummy_hook; @@ -1064,10 +1118,13 @@ void clean_up(bool print_message) lex_free(); /* Free some memory */ set_var_free(); free_charsets(); -#ifdef HAVE_DLOPEN if (!opt_noacl) + { +#ifdef HAVE_DLOPEN udf_free(); #endif + plugin_free(); + } (void) ha_panic(HA_PANIC_CLOSE); /* close all tables and logs */ if (tc_log) tc_log->close(); @@ -1099,12 +1156,9 @@ void clean_up(bool print_message) free_max_user_conn(); #ifdef HAVE_REPLICATION end_slave_list(); - free_list(&replicate_do_db); - free_list(&replicate_ignore_db); - free_list(&binlog_do_db); - free_list(&binlog_ignore_db); - free_list(&replicate_rewrite_db); #endif + delete binlog_filter; + delete rpl_filter; #ifdef HAVE_OPENSSL if (ssl_acceptor_fd) my_free((gptr) ssl_acceptor_fd, MYF(MY_ALLOW_ZERO_PTR)); @@ -2457,7 +2511,7 @@ pthread_handler_t handle_shutdown(void *arg) static const char *load_default_groups[]= { -#ifdef HAVE_NDBCLUSTER_DB +#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE "mysql_cluster", #endif "mysqld","server", MYSQL_BASE_VERSION, 0, 0}; @@ -2577,7 +2631,7 @@ static int init_common_variables(const char *conf_file_name, int argc, { my_use_large_pages= 1; my_large_page_size= opt_large_page_size; -#ifdef HAVE_INNOBASE_DB +#ifdef WITH_INNOBASE_STORAGE_ENGINE innobase_use_large_pages= 1; innobase_large_page_size= opt_large_page_size; #endif @@ -2764,6 +2818,8 @@ static int init_thread_environment() (void) pthread_mutex_init(&LOCK_rpl_status, MY_MUTEX_INIT_FAST); (void) pthread_cond_init(&COND_rpl_status, NULL); #endif + (void) pthread_mutex_init(&LOCK_server_started, MY_MUTEX_INIT_FAST); + (void) pthread_cond_init(&COND_server_started,NULL); sp_cache_init(); /* Parameter for threads created for connections */ (void) pthread_attr_init(&connection_attrib); @@ -3120,7 +3176,7 @@ server."); static void create_maintenance_thread() { if ( -#ifdef HAVE_BERKELEY_DB +#ifdef WITH_BERKELEY_STORAGE_ENGINE (have_berkeley_db == SHOW_OPTION_YES) || #endif (flush_time && flush_time != ~(ulong) 0L)) @@ -3233,6 +3289,15 @@ int main(int argc, char **argv) #endif { DEBUGGER_OFF; + + rpl_filter= new Rpl_filter; + binlog_filter= new Rpl_filter; + if (!rpl_filter || !binlog_filter) + { + sql_perror("Could not allocate replication and binlog filters"); + exit(1); + } + MY_INIT(argv[0]); // init my_sys library & pthreads #ifdef _CUSTOMSTARTUPCONFIG_ @@ -3397,10 +3462,13 @@ we force server id to 2, but this MySQL server will not act as a slave."); if (!opt_noacl) (void) grant_init(); -#ifdef HAVE_DLOPEN if (!opt_noacl) + { + plugin_init(); +#ifdef HAVE_DLOPEN udf_init(); #endif + } if (opt_bootstrap) /* If running with bootstrap, do not start replication. */ opt_skip_slave_start= 1; /* @@ -3440,6 +3508,10 @@ we force server id to 2, but this MySQL server will not act as a slave."); mysqld_port, MYSQL_COMPILATION_COMMENT); + // Signal threads waiting for server to be started + mysqld_server_started= 1; + pthread_cond_signal(&COND_server_started); + #if defined(__NT__) || defined(HAVE_SMEM) handle_connections_methods(); #else @@ -3487,6 +3559,7 @@ we force server id to 2, but this MySQL server will not act as a slave."); CloseHandle(hEventShutdown); } #endif + clean_up(1); wait_for_signal_thread_to_end(); clean_up_mutexes(); my_end(opt_endinfo ? MY_CHECK_ERROR | MY_GIVE_INFO : 0); @@ -3588,7 +3661,6 @@ default_service_handling(char **argv, int main(int argc, char **argv) { - /* When several instances are running on the same machine, we need to have an unique named hEventShudown through the @@ -4442,6 +4514,9 @@ enum options_mysqld OPT_NDB_FORCE_SEND, OPT_NDB_AUTOINCREMENT_PREFETCH_SZ, OPT_NDB_SHM, OPT_NDB_OPTIMIZED_NODE_SELECTION, OPT_NDB_CACHE_CHECK_TIME, OPT_NDB_MGMD, OPT_NDB_NODEID, + OPT_NDB_DISTRIBUTION, + OPT_NDB_INDEX_STAT_ENABLE, + OPT_NDB_INDEX_STAT_CACHE_ENTRIES, OPT_NDB_INDEX_STAT_UPDATE_FREQ, OPT_SKIP_SAFEMALLOC, OPT_TEMP_POOL, OPT_TX_ISOLATION, OPT_COMPLETION_TYPE, OPT_SKIP_STACK_TRACE, OPT_SKIP_SYMLINKS, @@ -4514,8 +4589,10 @@ enum options_mysqld OPT_INNODB_CONCURRENCY_TICKETS, OPT_INNODB_THREAD_SLEEP_DELAY, OPT_BDB_CACHE_SIZE, + OPT_BDB_CACHE_PARTS, OPT_BDB_LOG_BUFFER_SIZE, OPT_BDB_MAX_LOCK, + OPT_BDB_REGION_SIZE, OPT_ERROR_LOG_FILE, OPT_DEFAULT_WEEK_FORMAT, OPT_RANGE_ALLOC_BLOCK_SIZE, OPT_ALLOW_SUSPICIOUS_UDFS, @@ -4529,6 +4606,7 @@ enum options_mysqld OPT_ENABLE_SHARED_MEMORY, OPT_SHARED_MEMORY_BASE_NAME, OPT_OLD_PASSWORDS, + OPT_OLD_ALTER_TABLE, OPT_EXPIRE_LOGS_DAYS, OPT_GROUP_CONCAT_MAX_LEN, OPT_DEFAULT_COLLATION, @@ -4551,7 +4629,8 @@ enum options_mysqld OPT_TIMED_MUTEXES, OPT_OLD_STYLE_USER_LIMITS, OPT_LOG_SLOW_ADMIN_STATEMENTS, - OPT_TABLE_LOCK_WAIT_TIMEOUT + OPT_TABLE_LOCK_WAIT_TIMEOUT, + OPT_PLUGIN_DIR }; @@ -4599,7 +4678,7 @@ struct my_option my_long_options[] = Disable with --skip-bdb (will save memory).", (gptr*) &opt_bdb, (gptr*) &opt_bdb, 0, GET_BOOL, NO_ARG, OPT_BDB_DEFAULT, 0, 0, 0, 0, 0}, -#ifdef HAVE_BERKELEY_DB +#ifdef WITH_BERKELEY_STORAGE_ENGINE {"bdb-home", OPT_BDB_HOME, "Berkeley home directory.", (gptr*) &berkeley_home, (gptr*) &berkeley_home, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"bdb-lock-detect", OPT_BDB_LOCK, @@ -4620,7 +4699,7 @@ Disable with --skip-bdb (will save memory).", {"bdb-tmpdir", OPT_BDB_TMP, "Berkeley DB tempfile name.", (gptr*) &berkeley_tmpdir, (gptr*) &berkeley_tmpdir, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, -#endif /* HAVE_BERKELEY_DB */ +#endif /* WITH_BERKELEY_STORAGE_ENGINE */ {"big-tables", OPT_BIG_TABLES, "Allow big result sets by saving all temporary sets on file (Solves most 'table full' errors).", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, @@ -4756,7 +4835,7 @@ Disable with --skip-large-pages.", Disable with --skip-innodb (will save memory).", (gptr*) &opt_innodb, (gptr*) &opt_innodb, 0, GET_BOOL, NO_ARG, OPT_INNODB_DEFAULT, 0, 0, 0, 0, 0}, -#ifdef HAVE_INNOBASE_DB +#ifdef WITH_INNOBASE_STORAGE_ENGINE {"innodb_checksums", OPT_INNODB_CHECKSUMS, "Enable InnoDB checksums validation (enabled by default). \ Disable with --skip-innodb-checksums.", (gptr*) &innobase_use_checksums, (gptr*) &innobase_use_checksums, 0, GET_BOOL, NO_ARG, 1, 0, 0, 0, 0, 0}, @@ -4764,7 +4843,7 @@ Disable with --skip-innodb-checksums.", (gptr*) &innobase_use_checksums, {"innodb_data_file_path", OPT_INNODB_DATA_FILE_PATH, "Path to individual files and their sizes.", 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, -#ifdef HAVE_INNOBASE_DB +#ifdef WITH_INNOBASE_STORAGE_ENGINE {"innodb_data_home_dir", OPT_INNODB_DATA_HOME_DIR, "The common part for InnoDB table spaces.", (gptr*) &innobase_data_home_dir, (gptr*) &innobase_data_home_dir, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, @@ -4835,7 +4914,7 @@ Disable with --skip-innodb-doublewrite.", (gptr*) &innobase_use_doublewrite, (gptr*) &global_system_variables.innodb_support_xa, (gptr*) &global_system_variables.innodb_support_xa, 0, GET_BOOL, OPT_ARG, 1, 0, 0, 0, 0, 0}, -#endif /* End HAVE_INNOBASE_DB */ +#endif /* End WITH_INNOBASE_STORAGE_ENGINE */ {"isam", OPT_ISAM, "Obsolete. ISAM storage engine is no longer supported.", (gptr*) &opt_isam, (gptr*) &opt_isam, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, @@ -5006,7 +5085,7 @@ master-ssl", Disable with --skip-ndbcluster (will save memory).", (gptr*) &opt_ndbcluster, (gptr*) &opt_ndbcluster, 0, GET_BOOL, NO_ARG, OPT_NDBCLUSTER_DEFAULT, 0, 0, 0, 0, 0}, -#ifdef HAVE_NDBCLUSTER_DB +#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE {"ndb-connectstring", OPT_NDB_CONNECTSTRING, "Connect string for ndbcluster.", (gptr*) &opt_ndb_connectstring, @@ -5027,6 +5106,11 @@ Disable with --skip-ndbcluster (will save memory).", (gptr*) &global_system_variables.ndb_autoincrement_prefetch_sz, (gptr*) &global_system_variables.ndb_autoincrement_prefetch_sz, 0, GET_ULONG, REQUIRED_ARG, 32, 1, 256, 0, 0, 0}, + {"ndb-distibution", OPT_NDB_DISTRIBUTION, + "Default distribution for new tables in ndb", + (gptr*) &opt_ndb_distribution, + (gptr*) &opt_ndb_distribution, + 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"ndb-force-send", OPT_NDB_FORCE_SEND, "Force send of buffers to ndb immediately without waiting for " "other threads.", @@ -5063,6 +5147,23 @@ Disable with --skip-ndbcluster (will save memory).", "A dedicated thread is created to, at the given millisecons interval, invalidate the query cache if another MySQL server in the cluster has changed the data in the database.", (gptr*) &opt_ndb_cache_check_time, (gptr*) &opt_ndb_cache_check_time, 0, GET_ULONG, REQUIRED_ARG, 0, 0, LONG_TIMEOUT, 0, 1, 0}, + {"ndb-index-stat-enable", OPT_NDB_INDEX_STAT_ENABLE, + "Use ndb index statistics in query optimization.", + (gptr*) &global_system_variables.ndb_index_stat_enable, + (gptr*) &max_system_variables.ndb_index_stat_enable, + 0, GET_BOOL, OPT_ARG, 1, 0, 1, 0, 0, 0}, + {"ndb-index-stat-cache-entries", OPT_NDB_INDEX_STAT_CACHE_ENTRIES, + "Number of start/end keys to store in statistics memory cache." + " Zero means no cache and forces query of db nodes always.", + (gptr*) &global_system_variables.ndb_index_stat_cache_entries, + (gptr*) &max_system_variables.ndb_index_stat_cache_entries, + 0, GET_ULONG, OPT_ARG, 32, 0, ~0L, 0, 0, 0}, + {"ndb-index-stat-update-freq", OPT_NDB_INDEX_STAT_UPDATE_FREQ, + "How often, in the long run, to query db nodes instead of statistics cache." + " For example 20 means every 20th time.", + (gptr*) &global_system_variables.ndb_index_stat_update_freq, + (gptr*) &max_system_variables.ndb_index_stat_update_freq, + 0, GET_ULONG, OPT_ARG, 20, 0, ~0L, 0, 0, 0}, #endif {"new", 'n', "Use very new possible 'unsafe' functions.", (gptr*) &global_system_variables.new_mode, @@ -5073,6 +5174,11 @@ Disable with --skip-ndbcluster (will save memory).", (gptr*) &opt_no_mix_types, (gptr*) &opt_no_mix_types, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, #endif + {"old-alter-table", OPT_OLD_ALTER_TABLE, + "Use old, non-optimized alter table.", + (gptr*) &global_system_variables.old_alter_table, + (gptr*) &max_system_variables.old_alter_table, 0, GET_BOOL, NO_ARG, + 0, 0, 0, 0, 0, 0}, {"old-passwords", OPT_OLD_PASSWORDS, "Use old password encryption method (needed for 4.0 and older clients).", (gptr*) &global_system_variables.old_passwords, (gptr*) &max_system_variables.old_passwords, 0, GET_BOOL, NO_ARG, @@ -5310,11 +5416,15 @@ log and this option does nothing anymore.", "The number of outstanding connection requests MySQL can have. This comes into play when the main MySQL thread gets very many connection requests in a very short time.", (gptr*) &back_log, (gptr*) &back_log, 0, GET_ULONG, REQUIRED_ARG, 50, 1, 65535, 0, 1, 0 }, -#ifdef HAVE_BERKELEY_DB +#ifdef WITH_BERKELEY_STORAGE_ENGINE + { "bdb_cache_parts", OPT_BDB_CACHE_PARTS, + "Number of parts to use for BDB cache.", + (gptr*) &berkeley_cache_parts, (gptr*) &berkeley_cache_parts, 0, GET_ULONG, + REQUIRED_ARG, 1, 1, 1024, 0, 1, 0}, { "bdb_cache_size", OPT_BDB_CACHE_SIZE, "The buffer that is allocated to cache index and rows for BDB tables.", - (gptr*) &berkeley_cache_size, (gptr*) &berkeley_cache_size, 0, GET_ULONG, - REQUIRED_ARG, KEY_CACHE_SIZE, 20*1024, (long) ~0, 0, IO_SIZE, 0}, + (gptr*) &berkeley_cache_size, (gptr*) &berkeley_cache_size, 0, GET_ULL, + REQUIRED_ARG, KEY_CACHE_SIZE, 20*1024, (ulonglong) ~0, 0, IO_SIZE, 0}, /* QQ: The following should be removed soon! (bdb_max_lock preferred) */ {"bdb_lock_max", OPT_BDB_MAX_LOCK, "Synonym for bdb_max_lock.", (gptr*) &berkeley_max_lock, (gptr*) &berkeley_max_lock, 0, GET_ULONG, @@ -5327,7 +5437,11 @@ log and this option does nothing anymore.", "The maximum number of locks you can have active on a BDB table.", (gptr*) &berkeley_max_lock, (gptr*) &berkeley_max_lock, 0, GET_ULONG, REQUIRED_ARG, 10000, 0, (long) ~0, 0, 1, 0}, -#endif /* HAVE_BERKELEY_DB */ + {"bdb_region_size", OPT_BDB_REGION_SIZE, + "The size of the underlying logging area of the Berkeley DB environment.", + (gptr*) &berkeley_region_size, (gptr*) &berkeley_region_size, 0, GET_ULONG, + OPT_ARG, 60*1024L, 60*1024L, (long) ~0, 0, 1, 0}, +#endif /* WITH_BERKELEY_STORAGE_ENGINE */ {"binlog_cache_size", OPT_BINLOG_CACHE_SIZE, "The size of the cache to hold the SQL statements for the binary log during a transaction. If you often use big, multi-statement transactions you can increase this to get more performance.", (gptr*) &binlog_cache_size, (gptr*) &binlog_cache_size, 0, GET_ULONG, @@ -5403,7 +5517,7 @@ log and this option does nothing anymore.", (gptr*) &global_system_variables.group_concat_max_len, (gptr*) &max_system_variables.group_concat_max_len, 0, GET_ULONG, REQUIRED_ARG, 1024, 4, (long) ~0, 0, 1, 0}, -#ifdef HAVE_INNOBASE_DB +#ifdef WITH_INNOBASE_STORAGE_ENGINE {"innodb_additional_mem_pool_size", OPT_INNODB_ADDITIONAL_MEM_POOL_SIZE, "Size of a memory pool InnoDB uses to store data dictionary information and other internal data structures.", (gptr*) &innobase_additional_mem_pool_size, @@ -5482,7 +5596,7 @@ log and this option does nothing anymore.", (gptr*) &srv_thread_sleep_delay, (gptr*) &srv_thread_sleep_delay, 0, GET_LONG, REQUIRED_ARG, 10000L, 0L, ~0L, 0, 1L, 0}, -#endif /* HAVE_INNOBASE_DB */ +#endif /* WITH_INNOBASE_STORAGE_ENGINE */ {"interactive_timeout", OPT_INTERACTIVE_TIMEOUT, "The number of seconds the server waits for activity on an interactive connection before closing it.", (gptr*) &global_system_variables.net_interactive_timeout, @@ -5687,6 +5801,10 @@ The minimum value for this variable is 4096.", (gptr*) &global_system_variables.optimizer_search_depth, (gptr*) &max_system_variables.optimizer_search_depth, 0, GET_ULONG, OPT_ARG, MAX_TABLES+1, 0, MAX_TABLES+2, 0, 1, 0}, + {"plugin_dir", OPT_PLUGIN_DIR, + "Directory for plugins.", + (gptr*) &opt_plugin_dir_ptr, (gptr*) &opt_plugin_dir_ptr, 0, + GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"preload_buffer_size", OPT_PRELOAD_BUFFER_SIZE, "The size of the buffer that is allocated when preloading indexes", (gptr*) &global_system_variables.preload_buff_size, @@ -5803,12 +5921,12 @@ The minimum value for this variable is 4096.", (gptr*) &max_system_variables.sortbuff_size, 0, GET_ULONG, REQUIRED_ARG, MAX_SORT_MEMORY, MIN_SORT_MEMORY+MALLOC_OVERHEAD*2, ~0L, MALLOC_OVERHEAD, 1, 0}, -#ifdef HAVE_BERKELEY_DB +#ifdef WITH_BERKELEY_STORAGE_ENGINE {"sync-bdb-logs", OPT_BDB_SYNC, "Synchronously flush Berkeley DB logs. Enabled by default", (gptr*) &opt_sync_bdb_logs, (gptr*) &opt_sync_bdb_logs, 0, GET_BOOL, NO_ARG, 1, 0, 0, 0, 0, 0}, -#endif /* HAVE_BERKELEY_DB */ +#endif /* WITH_BERKELEY_STORAGE_ENGINE */ {"sync-binlog", OPT_SYNC_BINLOG, "Synchronously flush binary log to disk after every #th event. " "Use 0 (default) to disable synchronous flushing.", @@ -5960,14 +6078,14 @@ struct show_var_st status_vars[]= { {"Com_show_create_db", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_CREATE_DB]), SHOW_LONG_STATUS}, {"Com_show_create_table", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_CREATE]), SHOW_LONG_STATUS}, {"Com_show_databases", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_DATABASES]), SHOW_LONG_STATUS}, + {"Com_show_engine_logs", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_ENGINE_LOGS]), SHOW_LONG_STATUS}, + {"Com_show_engine_mutex", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_ENGINE_MUTEX]), SHOW_LONG_STATUS}, + {"Com_show_engine_status", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_ENGINE_STATUS]), SHOW_LONG_STATUS}, {"Com_show_errors", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_ERRORS]), SHOW_LONG_STATUS}, {"Com_show_fields", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_FIELDS]), SHOW_LONG_STATUS}, {"Com_show_grants", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_GRANTS]), SHOW_LONG_STATUS}, - {"Com_show_innodb_status", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_INNODB_STATUS]), SHOW_LONG_STATUS}, {"Com_show_keys", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_KEYS]), SHOW_LONG_STATUS}, - {"Com_show_logs", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_LOGS]), SHOW_LONG_STATUS}, {"Com_show_master_status", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_MASTER_STAT]), SHOW_LONG_STATUS}, - {"Com_show_ndb_status", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_NDBCLUSTER_STATUS]), SHOW_LONG_STATUS}, {"Com_show_new_master", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_NEW_MASTER]), SHOW_LONG_STATUS}, {"Com_show_open_tables", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_OPEN_TABLES]), SHOW_LONG_STATUS}, {"Com_show_privileges", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_PRIVILEGES]), SHOW_LONG_STATUS}, @@ -6022,9 +6140,9 @@ struct show_var_st status_vars[]= { {"Handler_savepoint_rollback",(char*) offsetof(STATUS_VAR, ha_savepoint_rollback_count), SHOW_LONG_STATUS}, {"Handler_update", (char*) offsetof(STATUS_VAR, ha_update_count), SHOW_LONG_STATUS}, {"Handler_write", (char*) offsetof(STATUS_VAR, ha_write_count), SHOW_LONG_STATUS}, -#ifdef HAVE_INNOBASE_DB +#ifdef WITH_INNOBASE_STORAGE_ENGINE {"Innodb_", (char*) &innodb_status_variables, SHOW_VARS}, -#endif /*HAVE_INNOBASE_DB*/ +#endif /* WITH_INNOBASE_STORAGE_ENGINE */ {"Key_blocks_not_flushed", (char*) &dflt_key_cache_var.global_blocks_changed, SHOW_KEY_CACHE_LONG}, {"Key_blocks_unused", (char*) &dflt_key_cache_var.blocks_unused, SHOW_KEY_CACHE_CONST_LONG}, {"Key_blocks_used", (char*) &dflt_key_cache_var.blocks_used, SHOW_KEY_CACHE_CONST_LONG}, @@ -6034,9 +6152,9 @@ struct show_var_st status_vars[]= { {"Key_writes", (char*) &dflt_key_cache_var.global_cache_write, SHOW_KEY_CACHE_LONGLONG}, {"Last_query_cost", (char*) offsetof(STATUS_VAR, last_query_cost), SHOW_DOUBLE_STATUS}, {"Max_used_connections", (char*) &max_used_connections, SHOW_LONG}, -#ifdef HAVE_NDBCLUSTER_DB +#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE {"Ndb_", (char*) &ndb_status_variables, SHOW_VARS}, -#endif /*HAVE_NDBCLUSTER_DB*/ +#endif /* WITH_NDBCLUSTER_STORAGE_ENGINE */ {"Not_flushed_delayed_rows", (char*) &delayed_rows_in_use, SHOW_LONG_CONST}, {"Open_files", (char*) &my_file_opened, SHOW_LONG_CONST}, {"Open_streams", (char*) &my_stream_opened, SHOW_LONG_CONST}, @@ -6254,13 +6372,6 @@ static void mysql_init_variables(void) exit(1); multi_keycache_init(); /* set key_cache_hash.default_value = dflt_key_cache */ - /* Initialize structures that is used when processing options */ - replicate_rewrite_db.empty(); - replicate_do_db.empty(); - replicate_ignore_db.empty(); - binlog_do_db.empty(); - binlog_ignore_db.empty(); - /* Set directory paths */ strmake(language, LANGUAGE, sizeof(language)-1); strmake(mysql_real_data_home, get_relative_path(DATADIR), @@ -6293,6 +6404,7 @@ static void mysql_init_variables(void) global_system_variables.max_join_size= (ulonglong) HA_POS_ERROR; max_system_variables.max_join_size= (ulonglong) HA_POS_ERROR; global_system_variables.old_passwords= 0; + global_system_variables.old_alter_table= 0; /* Default behavior for 4.1 and 5.0 is to treat NULL values as unequal @@ -6306,44 +6418,14 @@ static void mysql_init_variables(void) "d:t:i:o,/tmp/mysqld.trace"); #endif opt_error_log= IF_WIN(1,0); -#ifdef HAVE_BERKELEY_DB - have_berkeley_db= SHOW_OPTION_YES; -#else - have_berkeley_db= SHOW_OPTION_NO; -#endif -#ifdef HAVE_INNOBASE_DB - have_innodb=SHOW_OPTION_YES; -#else - have_innodb=SHOW_OPTION_NO; -#endif - have_isam=SHOW_OPTION_NO; -#ifdef HAVE_EXAMPLE_DB - have_example_db= SHOW_OPTION_YES; -#else - have_example_db= SHOW_OPTION_NO; -#endif -#ifdef HAVE_ARCHIVE_DB - have_archive_db= SHOW_OPTION_YES; -#else - have_archive_db= SHOW_OPTION_NO; -#endif -#ifdef HAVE_BLACKHOLE_DB - have_blackhole_db= SHOW_OPTION_YES; -#else - have_blackhole_db= SHOW_OPTION_NO; -#endif -#ifdef HAVE_FEDERATED_DB - have_federated_db= SHOW_OPTION_YES; -#else - have_federated_db= SHOW_OPTION_NO; -#endif -#ifdef HAVE_CSV_DB - have_csv_db= SHOW_OPTION_YES; -#else - have_csv_db= SHOW_OPTION_NO; -#endif -#ifdef HAVE_NDBCLUSTER_DB +#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE have_ndbcluster=SHOW_OPTION_DISABLED; + global_system_variables.ndb_index_stat_enable=TRUE; + max_system_variables.ndb_index_stat_enable=TRUE; + global_system_variables.ndb_index_stat_cache_entries=32; + max_system_variables.ndb_index_stat_cache_entries=~0L; + global_system_variables.ndb_index_stat_update_freq=20; + max_system_variables.ndb_index_stat_update_freq=~0L; #else have_ndbcluster=SHOW_OPTION_NO; #endif @@ -6512,14 +6594,12 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)), } case (int)OPT_REPLICATE_IGNORE_DB: { - i_string *db = new i_string(argument); - replicate_ignore_db.push_back(db); + rpl_filter->add_ignore_db(argument); break; } case (int)OPT_REPLICATE_DO_DB: { - i_string *db = new i_string(argument); - replicate_do_db.push_back(db); + rpl_filter->add_do_db(argument); break; } case (int)OPT_REPLICATE_REWRITE_DB: @@ -6552,71 +6632,54 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)), exit(1); } - i_string_pair *db_pair = new i_string_pair(key, val); - replicate_rewrite_db.push_back(db_pair); + rpl_filter->add_db_rewrite(key, val); break; } case (int)OPT_BINLOG_IGNORE_DB: { - i_string *db = new i_string(argument); - binlog_ignore_db.push_back(db); + binlog_filter->add_ignore_db(argument); break; } case (int)OPT_BINLOG_DO_DB: { - i_string *db = new i_string(argument); - binlog_do_db.push_back(db); + binlog_filter->add_do_db(argument); break; } case (int)OPT_REPLICATE_DO_TABLE: { - if (!do_table_inited) - init_table_rule_hash(&replicate_do_table, &do_table_inited); - if (add_table_rule(&replicate_do_table, argument)) + if (rpl_filter->add_do_table(argument)) { fprintf(stderr, "Could not add do table rule '%s'!\n", argument); exit(1); } - table_rules_on = 1; break; } case (int)OPT_REPLICATE_WILD_DO_TABLE: { - if (!wild_do_table_inited) - init_table_rule_array(&replicate_wild_do_table, - &wild_do_table_inited); - if (add_wild_table_rule(&replicate_wild_do_table, argument)) + if (rpl_filter->add_wild_do_table(argument)) { fprintf(stderr, "Could not add do table rule '%s'!\n", argument); exit(1); } - table_rules_on = 1; break; } case (int)OPT_REPLICATE_WILD_IGNORE_TABLE: { - if (!wild_ignore_table_inited) - init_table_rule_array(&replicate_wild_ignore_table, - &wild_ignore_table_inited); - if (add_wild_table_rule(&replicate_wild_ignore_table, argument)) + if (rpl_filter->add_wild_ignore_table(argument)) { fprintf(stderr, "Could not add ignore table rule '%s'!\n", argument); exit(1); } - table_rules_on = 1; break; } case (int)OPT_REPLICATE_IGNORE_TABLE: { - if (!ignore_table_inited) - init_table_rule_hash(&replicate_ignore_table, &ignore_table_inited); - if (add_table_rule(&replicate_ignore_table, argument)) + if (rpl_filter->add_ignore_table(argument)) { fprintf(stderr, "Could not add ignore table rule '%s'!\n", argument); exit(1); } - table_rules_on = 1; break; } #endif /* HAVE_REPLICATION */ @@ -6774,19 +6837,19 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)), global_system_variables.tx_isolation= (type-1); break; } -#ifdef HAVE_BERKELEY_DB +#ifdef WITH_BERKELEY_STORAGE_ENGINE case OPT_BDB_NOSYNC: /* Deprecated option */ opt_sync_bdb_logs= 0; /* Fall through */ case OPT_BDB_SYNC: if (!opt_sync_bdb_logs) - berkeley_env_flags|= DB_TXN_NOSYNC; + berkeley_env_flags|= bdb_DB_TXN_NOSYNC; else - berkeley_env_flags&= ~DB_TXN_NOSYNC; + berkeley_env_flags&= ~bdb_DB_TXN_NOSYNC; break; case OPT_BDB_NO_RECOVER: - berkeley_init_flags&= ~(DB_RECOVER); + berkeley_init_flags&= ~(bdb_DB_RECOVER); break; case OPT_BDB_LOCK: { @@ -6810,12 +6873,12 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)), break; } case OPT_BDB_SHARED: - berkeley_init_flags&= ~(DB_PRIVATE); + berkeley_init_flags&= ~(bdb_DB_PRIVATE); berkeley_shared_data= 1; break; -#endif /* HAVE_BERKELEY_DB */ +#endif /* WITH_BERKELEY_STORAGE_ENGINE */ case OPT_BDB: -#ifdef HAVE_BERKELEY_DB +#ifdef WITH_BERKELEY_STORAGE_ENGINE if (opt_bdb) have_berkeley_db= SHOW_OPTION_YES; else @@ -6823,14 +6886,14 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)), #endif break; case OPT_NDBCLUSTER: -#ifdef HAVE_NDBCLUSTER_DB +#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE if (opt_ndbcluster) have_ndbcluster= SHOW_OPTION_YES; else have_ndbcluster= SHOW_OPTION_DISABLED; #endif break; -#ifdef HAVE_NDBCLUSTER_DB +#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE case OPT_NDB_MGMD: case OPT_NDB_NODEID: { @@ -6854,9 +6917,23 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)), opt_ndb_constrbuf[opt_ndb_constrbuf_len]= 0; opt_ndbcluster_connectstring= opt_ndb_constrbuf; break; + case OPT_NDB_DISTRIBUTION: + int id; + if ((id= find_type(argument, &ndb_distribution_typelib, 2)) <= 0) + { + fprintf(stderr, + "Unknown ndb distribution type: '%s' " + "(should be '%s' or '%s')\n", + argument, + ndb_distribution_names[ND_KEYHASH], + ndb_distribution_names[ND_LINHASH]); + exit(1); + } + opt_ndb_distribution_id= (enum ndb_distribution)(id-1); + break; #endif case OPT_INNODB: -#ifdef HAVE_INNOBASE_DB +#ifdef WITH_INNOBASE_STORAGE_ENGINE if (opt_innodb) have_innodb= SHOW_OPTION_YES; else @@ -6864,15 +6941,15 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)), #endif break; case OPT_INNODB_DATA_FILE_PATH: -#ifdef HAVE_INNOBASE_DB +#ifdef WITH_INNOBASE_STORAGE_ENGINE innobase_data_file_path= argument; #endif break; -#ifdef HAVE_INNOBASE_DB +#ifdef WITH_INNOBASE_STORAGE_ENGINE case OPT_INNODB_LOG_ARCHIVE: innobase_log_archive= argument ? test(atoi(argument)) : 1; break; -#endif /* HAVE_INNOBASE_DB */ +#endif /* WITH_INNOBASE_STORAGE_ENGINE */ case OPT_MYISAM_RECOVER: { if (!argument || !argument[0]) @@ -7018,19 +7095,19 @@ static void get_options(int argc,char **argv) get_one_option))) exit(ho_error); -#ifndef HAVE_NDBCLUSTER_DB +#ifndef WITH_NDBCLUSTER_STORAGE_ENGINE if (opt_ndbcluster) sql_print_warning("this binary does not contain NDBCLUSTER storage engine"); #endif -#ifndef HAVE_INNOBASE_DB +#ifndef WITH_INNOBASE_STORAGE_ENGINE if (opt_innodb) sql_print_warning("this binary does not contain INNODB storage engine"); #endif -#ifndef HAVE_ISAM +#ifndef WITH_ISAM_STORAGE_ENGINE if (opt_isam) sql_print_warning("this binary does not contain ISAM storage engine"); #endif -#ifndef HAVE_BERKELEY_DB +#ifndef WITH_BERKELEY_STORAGE_ENGINE if (opt_bdb) sql_print_warning("this binary does not contain BDB storage engine"); #endif @@ -7183,6 +7260,10 @@ static void fix_paths(void) (void) my_load_path(mysql_home,mysql_home,""); // Resolve current dir (void) my_load_path(mysql_real_data_home,mysql_real_data_home,mysql_home); (void) my_load_path(pidfile_name,pidfile_name,mysql_real_data_home); + strmake(opt_plugin_dir, get_relative_path(LIBDIR), + sizeof(opt_plugin_dir) - 1); + opt_plugin_dir_ptr= opt_plugin_dir; + (void) my_load_path(opt_plugin_dir, opt_plugin_dir_ptr, mysql_home); char *sharedir=get_relative_path(SHAREDIR); if (test_if_hard_path(sharedir)) @@ -7339,6 +7420,72 @@ static void create_pid_file() /***************************************************************************** + Instantiate have_xyx for missing storage engines +*****************************************************************************/ +#undef have_isam +#undef have_berkeley_db +#undef have_innodb +#undef have_ndbcluster +#undef have_example_db +#undef have_archive_db +#undef have_csv_db +#undef have_federated_db +#undef have_partition_db +#undef have_blackhole_db + +SHOW_COMP_OPTION have_berkeley_db= SHOW_OPTION_NO; +SHOW_COMP_OPTION have_innodb= SHOW_OPTION_NO; +SHOW_COMP_OPTION have_isam= SHOW_OPTION_NO; +SHOW_COMP_OPTION have_ndbcluster= SHOW_OPTION_NO; +SHOW_COMP_OPTION have_example_db= SHOW_OPTION_NO; +SHOW_COMP_OPTION have_archive_db= SHOW_OPTION_NO; +SHOW_COMP_OPTION have_csv_db= SHOW_OPTION_NO; +SHOW_COMP_OPTION have_federated_db= SHOW_OPTION_NO; +SHOW_COMP_OPTION have_partition_db= SHOW_OPTION_NO; +SHOW_COMP_OPTION have_blackhole_db= SHOW_OPTION_NO; + +#ifndef WITH_BERKELEY_STORAGE_ENGINE +bool berkeley_shared_data; +ulong berkeley_max_lock, berkeley_log_buffer_size; +ulonglong berkeley_cache_size; +ulong berkeley_region_size, berkeley_cache_parts; +char *berkeley_home, *berkeley_tmpdir, *berkeley_logdir; +#endif + +#ifndef WITH_INNOBASE_STORAGE_ENGINE +uint innobase_flush_log_at_trx_commit; +ulong innobase_fast_shutdown; +long innobase_mirrored_log_groups, innobase_log_files_in_group; +long innobase_log_file_size, innobase_log_buffer_size; +long innobase_buffer_pool_size, innobase_additional_mem_pool_size; +long innobase_buffer_pool_awe_mem_mb; +long innobase_file_io_threads, innobase_lock_wait_timeout; +long innobase_force_recovery; +long innobase_open_files; +char *innobase_data_home_dir, *innobase_data_file_path; +char *innobase_log_group_home_dir, *innobase_log_arch_dir; +char *innobase_unix_file_flush_method; +my_bool innobase_log_archive, + innobase_use_doublewrite, + innobase_use_checksums, + innobase_file_per_table, + innobase_locks_unsafe_for_binlog; + +ulong srv_max_buf_pool_modified_pct; +ulong srv_max_purge_lag; +ulong srv_auto_extend_increment; +ulong srv_n_spin_wait_rounds; +ulong srv_n_free_tickets_to_enter; +ulong srv_thread_sleep_delay; +ulong srv_thread_concurrency; +ulong srv_commit_concurrency; +#endif + +#ifndef WITH_NDBCLUSTER_STORAGE_ENGINE +ulong ndb_cache_check_time; +#endif + +/***************************************************************************** Instantiate templates *****************************************************************************/ @@ -7352,3 +7499,5 @@ template class I_List<NAMED_LIST>; template class I_List<Statement>; template class I_List_iterator<Statement>; #endif + + diff --git a/sql/opt_range.cc b/sql/opt_range.cc index 323e829f219..21a3f87e0fd 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -752,7 +752,7 @@ int QUICK_RANGE_SELECT::init() if (file->inited != handler::NONE) file->ha_index_or_rnd_end(); - DBUG_RETURN(error= file->ha_index_init(index)); + DBUG_RETURN(error= file->ha_index_init(index, 1)); } @@ -777,9 +777,10 @@ QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT() { DBUG_PRINT("info", ("Freeing separate handler %p (free=%d)", file, free_file)); - file->reset(); + file->ha_reset(); file->external_lock(current_thd, F_UNLCK); file->close(); + delete file; } } delete_dynamic(&ranges); /* ranges are allocated in alloc */ @@ -915,7 +916,7 @@ int QUICK_RANGE_SELECT::init_ror_merged_scan(bool reuse_handler) { DBUG_PRINT("info", ("Reusing handler %p", file)); if (file->extra(HA_EXTRA_KEYREAD) || - file->extra(HA_EXTRA_RETRIEVE_PRIMARY_KEY) || + file->ha_retrieve_all_pk() || init() || reset()) { DBUG_RETURN(1); @@ -943,7 +944,7 @@ int QUICK_RANGE_SELECT::init_ror_merged_scan(bool reuse_handler) goto failure; if (file->extra(HA_EXTRA_KEYREAD) || - file->extra(HA_EXTRA_RETRIEVE_PRIMARY_KEY) || + file->ha_retrieve_all_pk() || init() || reset()) { file->external_lock(thd, F_UNLCK); @@ -955,6 +956,8 @@ int QUICK_RANGE_SELECT::init_ror_merged_scan(bool reuse_handler) DBUG_RETURN(0); failure: + if (file) + delete file; file= save_file; DBUG_RETURN(1); } @@ -1650,9 +1653,10 @@ static int fill_used_fields_bitmap(PARAM *param) { TABLE *table= param->table; param->fields_bitmap_size= (table->s->fields/8 + 1); - uchar *tmp; + uint32 *tmp; uint pk; - if (!(tmp= (uchar*)alloc_root(param->mem_root,param->fields_bitmap_size)) || + if (!(tmp= (uint32*)alloc_root(param->mem_root, + bytes_word_aligned(param->fields_bitmap_size))) || bitmap_init(¶m->needed_fields, tmp, param->fields_bitmap_size*8, FALSE)) return 1; @@ -2395,7 +2399,7 @@ static ROR_SCAN_INFO *make_ror_scan(const PARAM *param, int idx, SEL_ARG *sel_arg) { ROR_SCAN_INFO *ror_scan; - uchar *bitmap_buf; + uint32 *bitmap_buf; uint keynr; DBUG_ENTER("make_ror_scan"); @@ -2410,8 +2414,8 @@ ROR_SCAN_INFO *make_ror_scan(const PARAM *param, int idx, SEL_ARG *sel_arg) ror_scan->sel_arg= sel_arg; ror_scan->records= param->table->quick_rows[keynr]; - if (!(bitmap_buf= (uchar*)alloc_root(param->mem_root, - param->fields_bitmap_size))) + if (!(bitmap_buf= (uint32*)alloc_root(param->mem_root, + bytes_word_aligned(param->fields_bitmap_size)))) DBUG_RETURN(NULL); if (bitmap_init(&ror_scan->covered_fields, bitmap_buf, @@ -2525,12 +2529,13 @@ static ROR_INTERSECT_INFO* ror_intersect_init(const PARAM *param) { ROR_INTERSECT_INFO *info; - uchar* buf; + uint32* buf; if (!(info= (ROR_INTERSECT_INFO*)alloc_root(param->mem_root, sizeof(ROR_INTERSECT_INFO)))) return NULL; info->param= param; - if (!(buf= (uchar*)alloc_root(param->mem_root, param->fields_bitmap_size))) + if (!(buf= (uint32*)alloc_root(param->mem_root, + bytes_word_aligned(param->fields_bitmap_size)))) return NULL; if (bitmap_init(&info->covered_fields, buf, param->fields_bitmap_size*8, FALSE)) @@ -2547,7 +2552,7 @@ void ror_intersect_cpy(ROR_INTERSECT_INFO *dst, const ROR_INTERSECT_INFO *src) { dst->param= src->param; memcpy(dst->covered_fields.bitmap, src->covered_fields.bitmap, - src->covered_fields.bitmap_size); + no_bytes_in_map(&src->covered_fields)); dst->out_rows= src->out_rows; dst->is_covering= src->is_covering; dst->index_records= src->index_records; @@ -3089,9 +3094,9 @@ TRP_ROR_INTERSECT *get_best_covering_ror_intersect(PARAM *param, /*I=set of all covering indexes */ ror_scan_mark= tree->ror_scans; - uchar buf[MAX_KEY/8+1]; + uint32 int_buf[MAX_KEY/32+1]; MY_BITMAP covered_fields; - if (bitmap_init(&covered_fields, buf, nbits, FALSE)) + if (bitmap_init(&covered_fields, int_buf, nbits, FALSE)) DBUG_RETURN(0); bitmap_clear_all(&covered_fields); @@ -5872,7 +5877,7 @@ int QUICK_INDEX_MERGE_SELECT::read_keys_and_merge() (This also creates a deficiency - it is possible that we will retrieve parts of key that are not used by current query at all.) */ - if (head->file->extra(HA_EXTRA_RETRIEVE_PRIMARY_KEY)) + if (head->file->ha_retrieve_all_pk()) DBUG_RETURN(1); cur_quick_it.rewind(); @@ -6142,7 +6147,7 @@ int QUICK_RANGE_SELECT::reset() in_range= FALSE; cur_range= (QUICK_RANGE**) ranges.buffer; - if (file->inited == handler::NONE && (error= file->ha_index_init(index))) + if (file->inited == handler::NONE && (error= file->ha_index_init(index,1))) DBUG_RETURN(error); /* Do not allocate the buffers twice. */ @@ -6401,7 +6406,7 @@ int QUICK_RANGE_SELECT_GEOM::get_next() (byte*) range->min_key, range->min_length, (ha_rkey_function)(range->flag ^ GEOM_FLAG)); - if (result != HA_ERR_KEY_NOT_FOUND) + if (result != HA_ERR_KEY_NOT_FOUND && result != HA_ERR_END_OF_FILE) DBUG_RETURN(result); range=0; // Not found, to next range } @@ -6544,7 +6549,7 @@ int QUICK_SELECT_DESC::get_next() } if (result) { - if (result != HA_ERR_KEY_NOT_FOUND) + if (result != HA_ERR_KEY_NOT_FOUND && result != HA_ERR_END_OF_FILE) DBUG_RETURN(result); range=0; // Not found, to next range continue; @@ -8212,7 +8217,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::reset(void) DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::reset"); file->extra(HA_EXTRA_KEYREAD); /* We need only the key attributes */ - result= file->ha_index_init(index); + result= file->ha_index_init(index, 1); result= file->index_last(record); if (result == HA_ERR_END_OF_FILE) DBUG_RETURN(0); @@ -8288,7 +8293,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::get_next() DBUG_ASSERT(is_last_prefix <= 0); if (result == HA_ERR_KEY_NOT_FOUND) continue; - else if (result) + if (result) break; if (have_min) @@ -8318,10 +8323,11 @@ int QUICK_GROUP_MIN_MAX_SELECT::get_next() HA_READ_KEY_EXACT); result= have_min ? min_res : have_max ? max_res : result; - } - while (result == HA_ERR_KEY_NOT_FOUND && is_last_prefix != 0); + } while ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) && + is_last_prefix != 0); if (result == 0) + { /* Partially mimic the behavior of end_select_send. Copy the field data from Item_field::field into Item_field::result_field @@ -8329,6 +8335,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::get_next() other fields in non-ANSI SQL mode). */ copy_fields(&join->tmp_table_param); + } else if (result == HA_ERR_KEY_NOT_FOUND) result= HA_ERR_END_OF_FILE; @@ -8355,6 +8362,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::get_next() RETURN 0 on success HA_ERR_KEY_NOT_FOUND if no MIN key was found that fulfills all conditions. + HA_ERR_END_OF_FILE - "" - other if some error occurred */ @@ -8408,7 +8416,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_min() if (key_cmp(index_info->key_part, group_prefix, real_prefix_len)) key_restore(record, tmp_record, index_info, 0); } - else if (result == HA_ERR_KEY_NOT_FOUND) + else if (result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) result= 0; /* There is a result in any case. */ } } @@ -8433,6 +8441,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_min() RETURN 0 on success HA_ERR_KEY_NOT_FOUND if no MAX key was found that fulfills all conditions. + HA_ERR_END_OF_FILE - "" - other if some error occurred */ @@ -8533,6 +8542,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_prefix() 0 on success HA_ERR_KEY_NOT_FOUND if there is no key with the given prefix in any of the ranges + HA_ERR_END_OF_FILE - "" - other if some error */ @@ -8577,11 +8587,12 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_min_in_range() result= file->index_read(record, group_prefix, search_prefix_len, find_flag); - if ((result == HA_ERR_KEY_NOT_FOUND) && - (cur_range->flag & (EQ_RANGE | NULL_RANGE))) - continue; /* Check the next range. */ - else if (result) + if (result) { + if ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) && + (cur_range->flag & (EQ_RANGE | NULL_RANGE))) + continue; /* Check the next range. */ + /* In all other cases (HA_ERR_*, HA_READ_KEY_EXACT with NO_MIN_RANGE, HA_READ_AFTER_KEY, HA_READ_KEY_OR_NEXT) if the lookup failed for this @@ -8608,7 +8619,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_min_in_range() /* Check if record belongs to the current group. */ if (key_cmp(index_info->key_part, group_prefix, real_prefix_len)) { - result = HA_ERR_KEY_NOT_FOUND; + result= HA_ERR_KEY_NOT_FOUND; continue; } @@ -8626,7 +8637,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_min_in_range() if (!((cur_range->flag & NEAR_MAX) && (cmp_res == -1) || (cmp_res <= 0))) { - result = HA_ERR_KEY_NOT_FOUND; + result= HA_ERR_KEY_NOT_FOUND; continue; } } @@ -8665,6 +8676,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_min_in_range() 0 on success HA_ERR_KEY_NOT_FOUND if there is no key with the given prefix in any of the ranges + HA_ERR_END_OF_FILE - "" - other if some error */ @@ -8710,10 +8722,12 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_max_in_range() result= file->index_read(record, group_prefix, search_prefix_len, find_flag); - if ((result == HA_ERR_KEY_NOT_FOUND) && (cur_range->flag & EQ_RANGE)) - continue; /* Check the next range. */ if (result) { + if ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) && + (cur_range->flag & EQ_RANGE)) + continue; /* Check the next range. */ + /* In no key was found with this upper bound, there certainly are no keys in the ranges to the left. diff --git a/sql/opt_sum.cc b/sql/opt_sum.cc index 37acce2934b..2e87f9cf0db 100644 --- a/sql/opt_sum.cc +++ b/sql/opt_sum.cc @@ -205,7 +205,7 @@ int opt_sum_query(TABLE_LIST *tables, List<Item> &all_fields,COND *conds) const_result= 0; break; } - error= table->file->ha_index_init((uint) ref.key); + error= table->file->ha_index_init((uint) ref.key, 1); if (!ref.key_length) error= table->file->index_first(table->record[0]); @@ -292,7 +292,7 @@ int opt_sum_query(TABLE_LIST *tables, List<Item> &all_fields,COND *conds) const_result= 0; break; } - error= table->file->ha_index_init((uint) ref.key); + error= table->file->ha_index_init((uint) ref.key, 1); if (!ref.key_length) error= table->file->index_last(table->record[0]); diff --git a/sql/records.cc b/sql/records.cc index b352f9f395a..ff0185195b0 100644 --- a/sql/records.cc +++ b/sql/records.cc @@ -65,7 +65,7 @@ void init_read_record_idx(READ_RECORD *info, THD *thd, TABLE *table, table->status=0; /* And it's always found */ if (!table->file->inited) { - table->file->ha_index_init(idx); + table->file->ha_index_init(idx, 1); table->file->extra(HA_EXTRA_RETRIEVE_PRIMARY_KEY); } /* read_record will be changed to rr_index in rr_index_first */ @@ -73,8 +73,74 @@ void init_read_record_idx(READ_RECORD *info, THD *thd, TABLE *table, } -/* init struct for read with info->read_record */ - +/* + init_read_record is used to scan by using a number of different methods. + Which method to use is set-up in this call so that later calls to + the info->read_record will call the appropriate method using a function + pointer. + + There are five methods that relate completely to the sort function + filesort. The result of a filesort is retrieved using read_record + calls. The other two methods are used for normal table access. + + The filesort will produce references to the records sorted, these + references can be stored in memory or in a temporary file. + + The temporary file is normally used when the references doesn't fit into + a properly sized memory buffer. For most small queries the references + are stored in the memory buffer. + + The temporary file is also used when performing an update where a key is + modified. + + Methods used when ref's are in memory (using rr_from_pointers): + rr_unpack_from_buffer: + ---------------------- + This method is used when table->sort.addon_field is allocated. + This is allocated for most SELECT queries not involving any BLOB's. + In this case the records are fetched from a memory buffer. + rr_from_pointers: + ----------------- + Used when the above is not true, UPDATE, DELETE and so forth and + SELECT's involving BLOB's. It is also used when the addon_field + buffer is not allocated due to that its size was bigger than the + session variable max_length_for_sort_data. + In this case the record data is fetched from the handler using the + saved reference using the rnd_pos handler call. + + Methods used when ref's are in a temporary file (using rr_from_tempfile) + rr_unpack_from_tempfile: + ------------------------ + Same as rr_unpack_from_buffer except that references are fetched from + temporary file. Should obviously not really happen other than in + strange configurations. + + rr_from_tempfile: + ----------------- + Same as rr_from_pointers except that references are fetched from + temporary file instead of from + rr_from_cache: + -------------- + This is a special variant of rr_from_tempfile that can be used for + handlers that is not using the HA_FAST_KEY_READ table flag. Instead + of reading the references one by one from the temporary file it reads + a set of them, sorts them and reads all of them into a buffer which + is then used for a number of subsequent calls to rr_from_cache. + It is only used for SELECT queries and a number of other conditions + on table size. + + All other accesses use either index access methods (rr_quick) or a full + table scan (rr_sequential). + rr_quick: + --------- + rr_quick uses one of the QUICK_SELECT classes in opt_range.cc to + perform an index scan. There are loads of functionality hidden + in these quick classes. It handles all index scans of various kinds. + rr_sequential: + -------------- + This is the most basic access method of a table using rnd_init, + rnd_next and rnd_end. No indexes are used. +*/ void init_read_record(READ_RECORD *info,THD *thd, TABLE *table, SQL_SELECT *select, int use_record_cache, bool print_error) diff --git a/sql/repl_failsafe.cc b/sql/repl_failsafe.cc index d76be2ec2e4..960e0ac86cc 100644 --- a/sql/repl_failsafe.cc +++ b/sql/repl_failsafe.cc @@ -20,6 +20,7 @@ #include "repl_failsafe.h" #include "sql_repl.h" #include "slave.h" +#include "rpl_filter.h" #include "log_event.h" #include <mysql.h> @@ -732,14 +733,14 @@ static int fetch_db_tables(THD *thd, MYSQL *mysql, const char *db, TABLE_LIST table; const char* table_name= row[0]; int error; - if (table_rules_on) + if (rpl_filter->is_on()) { bzero((char*) &table, sizeof(table)); //just for safe table.db= (char*) db; table.table_name= (char*) table_name; table.updating= 1; - if (!tables_ok(thd, &table)) + if (!rpl_filter->tables_ok(thd->db, &table)) continue; } /* download master's table and overwrite slave's table */ @@ -858,8 +859,8 @@ bool load_master_data(THD* thd) data from master */ - if (!db_ok(db, replicate_do_db, replicate_ignore_db) || - !db_ok_with_wild_table(db) || + if (!rpl_filter->db_ok(db) || + !rpl_filter->db_ok_with_wild_table(db) || !strcmp(db,"mysql")) { *cur_table_res = 0; diff --git a/sql/rpl_filter.cc b/sql/rpl_filter.cc new file mode 100644 index 00000000000..143cd027b5f --- /dev/null +++ b/sql/rpl_filter.cc @@ -0,0 +1,546 @@ +/* Copyright (C) 2000-2003 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "mysql_priv.h" +#include "rpl_filter.h" + +#define TABLE_RULE_HASH_SIZE 16 +#define TABLE_RULE_ARR_SIZE 16 + +Rpl_filter::Rpl_filter() : + table_rules_on(0), do_table_inited(0), ignore_table_inited(0), + wild_do_table_inited(0), wild_ignore_table_inited(0) +{ + do_db.empty(); + ignore_db.empty(); + rewrite_db.empty(); +} + + +Rpl_filter::~Rpl_filter() +{ + if (do_table_inited) + hash_free(&do_table); + if (ignore_table_inited) + hash_free(&ignore_table); + if (wild_do_table_inited) + free_string_array(&wild_do_table); + if (wild_ignore_table_inited) + free_string_array(&wild_ignore_table); + free_list(&do_db); + free_list(&ignore_db); + free_list(&rewrite_db); +} + + +/* + Returns true if table should be logged/replicated + + SYNOPSIS + tables_ok() + db db to use if db in TABLE_LIST is undefined for a table + tables list of tables to check + + NOTES + Changing table order in the list can lead to different results. + + Note also order of precedence of do/ignore rules (see code). For + that reason, users should not set conflicting rules because they + may get unpredicted results (precedence order is explained in the + manual). + + If no table in the list is marked "updating", then we always + return 0, because there is no reason to execute this statement on + slave if it updates nothing. (Currently, this can only happen if + statement is a multi-delete (SQLCOM_DELETE_MULTI) and "tables" are + the tables in the FROM): + + In the case of SQLCOM_DELETE_MULTI, there will be a second call to + tables_ok(), with tables having "updating==TRUE" (those after the + DELETE), so this second call will make the decision (because + all_tables_not_ok() = !tables_ok(1st_list) && + !tables_ok(2nd_list)). + + TODO + "Include all tables like "abc.%" except "%.EFG"". (Can't be done now.) + If we supported Perl regexps, we could do it with pattern: /^abc\.(?!EFG)/ + (I could not find an equivalent in the regex library MySQL uses). + + RETURN VALUES + 0 should not be logged/replicated + 1 should be logged/replicated +*/ + +bool +Rpl_filter::tables_ok(const char* db, TABLE_LIST* tables) +{ + bool some_tables_updating= 0; + DBUG_ENTER("Rpl_filter::tables_ok"); + + for (; tables; tables= tables->next_global) + { + char hash_key[2*NAME_LEN+2]; + char *end; + uint len; + + if (!tables->updating) + continue; + some_tables_updating= 1; + end= strmov(hash_key, tables->db ? tables->db : db); + *end++= '.'; + len= (uint) (strmov(end, tables->table_name) - hash_key); + if (do_table_inited) // if there are any do's + { + if (hash_search(&do_table, (byte*) hash_key, len)) + DBUG_RETURN(1); + } + if (ignore_table_inited) // if there are any ignores + { + if (hash_search(&ignore_table, (byte*) hash_key, len)) + DBUG_RETURN(0); + } + if (wild_do_table_inited && + find_wild(&wild_do_table, hash_key, len)) + DBUG_RETURN(1); + if (wild_ignore_table_inited && + find_wild(&wild_ignore_table, hash_key, len)) + DBUG_RETURN(0); + } + + /* + If no table was to be updated, ignore statement (no reason we play it on + slave, slave is supposed to replicate _changes_ only). + If no explicit rule found and there was a do list, do not replicate. + If there was no do list, go ahead + */ + DBUG_RETURN(some_tables_updating && + !do_table_inited && !wild_do_table_inited); +} + + +/* + Checks whether a db matches some do_db and ignore_db rules + + SYNOPSIS + db_ok() + db name of the db to check + + RETURN VALUES + 0 should not be logged/replicated + 1 should be logged/replicated +*/ + +bool +Rpl_filter::db_ok(const char* db) +{ + DBUG_ENTER("Rpl_filter::db_ok"); + + if (do_db.is_empty() && ignore_db.is_empty()) + DBUG_RETURN(1); // Ok to replicate if the user puts no constraints + + /* + If the user has specified restrictions on which databases to replicate + and db was not selected, do not replicate. + */ + if (!db) + DBUG_RETURN(0); + + if (!do_db.is_empty()) // if the do's are not empty + { + I_List_iterator<i_string> it(do_db); + i_string* tmp; + + while ((tmp=it++)) + { + if (!strcmp(tmp->ptr, db)) + DBUG_RETURN(1); // match + } + DBUG_RETURN(0); + } + else // there are some elements in the don't, otherwise we cannot get here + { + I_List_iterator<i_string> it(ignore_db); + i_string* tmp; + + while ((tmp=it++)) + { + if (!strcmp(tmp->ptr, db)) + DBUG_RETURN(0); // match + } + DBUG_RETURN(1); + } +} + + +/* + Checks whether a db matches wild_do_table and wild_ignore_table + rules (for replication) + + SYNOPSIS + db_ok_with_wild_table() + db name of the db to check. + Is tested with check_db_name() before calling this function. + + NOTES + Here is the reason for this function. + We advise users who want to exclude a database 'db1' safely to do it + with replicate_wild_ignore_table='db1.%' instead of binlog_ignore_db or + replicate_ignore_db because the two lasts only check for the selected db, + which won't work in that case: + USE db2; + UPDATE db1.t SET ... #this will be replicated and should not + whereas replicate_wild_ignore_table will work in all cases. + With replicate_wild_ignore_table, we only check tables. When + one does 'DROP DATABASE db1', tables are not involved and the + statement will be replicated, while users could expect it would not (as it + rougly means 'DROP db1.first_table, DROP db1.second_table...'). + In other words, we want to interpret 'db1.%' as "everything touching db1". + That is why we want to match 'db1' against 'db1.%' wild table rules. + + RETURN VALUES + 0 should not be logged/replicated + 1 should be logged/replicated +*/ + +bool +Rpl_filter::db_ok_with_wild_table(const char *db) +{ + DBUG_ENTER("Rpl_filter::db_ok_with_wild_table"); + + char hash_key[NAME_LEN+2]; + char *end; + int len; + end= strmov(hash_key, db); + *end++= '.'; + len= end - hash_key ; + if (wild_do_table_inited && find_wild(&wild_do_table, hash_key, len)) + { + DBUG_PRINT("return",("1")); + DBUG_RETURN(1); + } + if (wild_ignore_table_inited && find_wild(&wild_ignore_table, hash_key, len)) + { + DBUG_PRINT("return",("0")); + DBUG_RETURN(0); + } + + /* + If no explicit rule found and there was a do list, do not replicate. + If there was no do list, go ahead + */ + DBUG_PRINT("return",("db=%s,retval=%d", db, !wild_do_table_inited)); + DBUG_RETURN(!wild_do_table_inited); +} + + +bool +Rpl_filter::is_on() +{ + return table_rules_on; +} + + +int +Rpl_filter::add_do_table(const char* table_spec) +{ + DBUG_ENTER("Rpl_filter::add_do_table"); + if (!do_table_inited) + init_table_rule_hash(&do_table, &do_table_inited); + table_rules_on= 1; + DBUG_RETURN(add_table_rule(&do_table, table_spec)); +} + + +int +Rpl_filter::add_ignore_table(const char* table_spec) +{ + DBUG_ENTER("Rpl_filter::add_ignore_table"); + if (!ignore_table_inited) + init_table_rule_hash(&ignore_table, &ignore_table_inited); + table_rules_on= 1; + DBUG_RETURN(add_table_rule(&ignore_table, table_spec)); +} + + +int +Rpl_filter::add_wild_do_table(const char* table_spec) +{ + DBUG_ENTER("Rpl_filter::add_wild_do_table"); + if (!wild_do_table_inited) + init_table_rule_array(&wild_do_table, &wild_do_table_inited); + table_rules_on= 1; + DBUG_RETURN(add_wild_table_rule(&wild_do_table, table_spec)); +} + + +int +Rpl_filter::add_wild_ignore_table(const char* table_spec) +{ + DBUG_ENTER("Rpl_filter::add_wild_ignore_table"); + if (!wild_ignore_table_inited) + init_table_rule_array(&wild_ignore_table, &wild_ignore_table_inited); + table_rules_on= 1; + DBUG_RETURN(add_wild_table_rule(&wild_ignore_table, table_spec)); +} + + +void +Rpl_filter::add_db_rewrite(const char* from_db, const char* to_db) +{ + i_string_pair *db_pair = new i_string_pair(from_db, to_db); + rewrite_db.push_back(db_pair); +} + + +int +Rpl_filter::add_table_rule(HASH* h, const char* table_spec) +{ + const char* dot = strchr(table_spec, '.'); + if (!dot) return 1; + // len is always > 0 because we know the there exists a '.' + uint len = (uint)strlen(table_spec); + TABLE_RULE_ENT* e = (TABLE_RULE_ENT*)my_malloc(sizeof(TABLE_RULE_ENT) + + len, MYF(MY_WME)); + if (!e) return 1; + e->db= (char*)e + sizeof(TABLE_RULE_ENT); + e->tbl_name= e->db + (dot - table_spec) + 1; + e->key_len= len; + memcpy(e->db, table_spec, len); + + return my_hash_insert(h, (byte*)e); +} + + +/* + Add table expression with wildcards to dynamic array +*/ + +int +Rpl_filter::add_wild_table_rule(DYNAMIC_ARRAY* a, const char* table_spec) +{ + const char* dot = strchr(table_spec, '.'); + if (!dot) return 1; + uint len = (uint)strlen(table_spec); + TABLE_RULE_ENT* e = (TABLE_RULE_ENT*)my_malloc(sizeof(TABLE_RULE_ENT) + + len, MYF(MY_WME)); + if (!e) return 1; + e->db= (char*)e + sizeof(TABLE_RULE_ENT); + e->tbl_name= e->db + (dot - table_spec) + 1; + e->key_len= len; + memcpy(e->db, table_spec, len); + insert_dynamic(a, (gptr)&e); + return 0; +} + + +void +Rpl_filter::add_do_db(const char* table_spec) +{ + DBUG_ENTER("Rpl_filter::add_do_db"); + i_string *db = new i_string(table_spec); + do_db.push_back(db); +} + + +void +Rpl_filter::add_ignore_db(const char* table_spec) +{ + DBUG_ENTER("Rpl_filter::add_ignore_db"); + i_string *db = new i_string(table_spec); + ignore_db.push_back(db); +} + + +static byte* get_table_key(const byte* a, uint* len, + my_bool __attribute__((unused))) +{ + TABLE_RULE_ENT *e= (TABLE_RULE_ENT *) a; + + *len= e->key_len; + return (byte*)e->db; +} + + +static void free_table_ent(void* a) +{ + TABLE_RULE_ENT *e= (TABLE_RULE_ENT *) a; + + my_free((gptr) e, MYF(0)); +} + + +void +Rpl_filter::init_table_rule_hash(HASH* h, bool* h_inited) +{ + hash_init(h, system_charset_info,TABLE_RULE_HASH_SIZE,0,0, + get_table_key, free_table_ent, 0); + *h_inited = 1; +} + + +void +Rpl_filter::init_table_rule_array(DYNAMIC_ARRAY* a, bool* a_inited) +{ + my_init_dynamic_array(a, sizeof(TABLE_RULE_ENT*), TABLE_RULE_ARR_SIZE, + TABLE_RULE_ARR_SIZE); + *a_inited = 1; +} + + +TABLE_RULE_ENT* +Rpl_filter::find_wild(DYNAMIC_ARRAY *a, const char* key, int len) +{ + uint i; + const char* key_end= key + len; + + for (i= 0; i < a->elements; i++) + { + TABLE_RULE_ENT* e ; + get_dynamic(a, (gptr)&e, i); + if (!my_wildcmp(system_charset_info, key, key_end, + (const char*)e->db, + (const char*)(e->db + e->key_len), + '\\',wild_one,wild_many)) + return e; + } + + return 0; +} + + +void +Rpl_filter::free_string_array(DYNAMIC_ARRAY *a) +{ + uint i; + for (i= 0; i < a->elements; i++) + { + char* p; + get_dynamic(a, (gptr) &p, i); + my_free(p, MYF(MY_WME)); + } + delete_dynamic(a); +} + + +/* + Builds a String from a HASH of TABLE_RULE_ENT. Cannot be used for any other + hash, as it assumes that the hash entries are TABLE_RULE_ENT. + + SYNOPSIS + table_rule_ent_hash_to_str() + s pointer to the String to fill + h pointer to the HASH to read + + RETURN VALUES + none +*/ + +void +Rpl_filter::table_rule_ent_hash_to_str(String* s, HASH* h, bool inited) +{ + s->length(0); + if (inited) + { + for (uint i= 0; i < h->records; i++) + { + TABLE_RULE_ENT* e= (TABLE_RULE_ENT*) hash_element(h, i); + if (s->length()) + s->append(','); + s->append(e->db,e->key_len); + } + } +} + + +void +Rpl_filter::table_rule_ent_dynamic_array_to_str(String* s, DYNAMIC_ARRAY* a, + bool inited) +{ + s->length(0); + if (inited) + { + for (uint i= 0; i < a->elements; i++) + { + TABLE_RULE_ENT* e; + get_dynamic(a, (gptr)&e, i); + if (s->length()) + s->append(','); + s->append(e->db,e->key_len); + } + } +} + + +void +Rpl_filter::get_do_table(String* str) +{ + table_rule_ent_hash_to_str(str, &do_table, do_table_inited); +} + + +void +Rpl_filter::get_ignore_table(String* str) +{ + table_rule_ent_hash_to_str(str, &ignore_table, ignore_table_inited); +} + + +void +Rpl_filter::get_wild_do_table(String* str) +{ + table_rule_ent_dynamic_array_to_str(str, &wild_do_table, wild_do_table_inited); +} + + +void +Rpl_filter::get_wild_ignore_table(String* str) +{ + table_rule_ent_dynamic_array_to_str(str, &wild_ignore_table, wild_ignore_table_inited); +} + + +const char* +Rpl_filter::get_rewrite_db(const char* db, uint32 *new_len) +{ + if (rewrite_db.is_empty() || !db) + return db; + I_List_iterator<i_string_pair> it(rewrite_db); + i_string_pair* tmp; + + while ((tmp=it++)) + { + if (!strcmp(tmp->key, db)) + { + *new_len= strlen(tmp->val); + return tmp->val; + } + } + return db; +} + + +I_List<i_string>* +Rpl_filter::get_do_db() +{ + return &do_db; +} + + +I_List<i_string>* +Rpl_filter::get_ignore_db() +{ + return &ignore_db; +} diff --git a/sql/rpl_filter.h b/sql/rpl_filter.h new file mode 100644 index 00000000000..5a766424d19 --- /dev/null +++ b/sql/rpl_filter.h @@ -0,0 +1,118 @@ +/* Copyright (C) 2000-2003 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef RPL_FILTER_H +#define RPL_FILTER_H + +#include "mysql.h" +#include "my_list.h" + +typedef struct st_table_rule_ent +{ + char* db; + char* tbl_name; + uint key_len; +} TABLE_RULE_ENT; + +/* + Rpl_filter + + Inclusion and exclusion rules of tables and databases. + Also handles rewrites of db. + Used for replication and binlogging. + */ +class Rpl_filter +{ +public: + Rpl_filter(); + ~Rpl_filter(); + Rpl_filter(Rpl_filter const&); + Rpl_filter& operator=(Rpl_filter const&); + + /* Checks - returns true if ok to replicate/log */ + + bool tables_ok(const char* db, TABLE_LIST* tables); + bool db_ok(const char* db); + bool db_ok_with_wild_table(const char *db); + + bool is_on(); + + /* Setters - add filtering rules */ + + int add_do_table(const char* table_spec); + int add_ignore_table(const char* table_spec); + + int add_wild_do_table(const char* table_spec); + int add_wild_ignore_table(const char* table_spec); + + void add_do_db(const char* db_spec); + void add_ignore_db(const char* db_spec); + + void add_db_rewrite(const char* from_db, const char* to_db); + + /* Getters - to get information about current rules */ + + void get_do_table(String* str); + void get_ignore_table(String* str); + + void get_wild_do_table(String* str); + void get_wild_ignore_table(String* str); + + const char* get_rewrite_db(const char* db, uint32 *new_len); + + I_List<i_string>* get_do_db(); + I_List<i_string>* get_ignore_db(); + +private: + bool table_rules_on; + + void init_table_rule_hash(HASH* h, bool* h_inited); + void init_table_rule_array(DYNAMIC_ARRAY* a, bool* a_inited); + + int add_table_rule(HASH* h, const char* table_spec); + int add_wild_table_rule(DYNAMIC_ARRAY* a, const char* table_spec); + + void free_string_array(DYNAMIC_ARRAY *a); + + void table_rule_ent_hash_to_str(String* s, HASH* h, bool inited); + void table_rule_ent_dynamic_array_to_str(String* s, DYNAMIC_ARRAY* a, + bool inited); + TABLE_RULE_ENT* find_wild(DYNAMIC_ARRAY *a, const char* key, int len); + + /* + Those 4 structures below are uninitialized memory unless the + corresponding *_inited variables are "true". + */ + HASH do_table; + HASH ignore_table; + DYNAMIC_ARRAY wild_do_table; + DYNAMIC_ARRAY wild_ignore_table; + + bool do_table_inited; + bool ignore_table_inited; + bool wild_do_table_inited; + bool wild_ignore_table_inited; + + I_List<i_string> do_db; + I_List<i_string> ignore_db; + + I_List<i_string_pair> rewrite_db; +}; + +extern Rpl_filter *rpl_filter; +extern Rpl_filter *binlog_filter; + +#endif // RPL_FILTER_H diff --git a/sql/set_var.cc b/sql/set_var.cc index 632c37d2296..0e57a80e52c 100644 --- a/sql/set_var.cc +++ b/sql/set_var.cc @@ -58,15 +58,50 @@ #include <my_getopt.h> #include <thr_alarm.h> #include <myisam.h> -#ifdef HAVE_BERKELEY_DB -#include "ha_berkeley.h" -#endif -#ifdef HAVE_INNOBASE_DB -#include "ha_innodb.h" -#endif -#ifdef HAVE_NDBCLUSTER_DB -#include "ha_ndbcluster.h" -#endif + +/* WITH_BERKELEY_STORAGE_ENGINE */ +extern bool berkeley_shared_data; +extern ulong berkeley_max_lock, berkeley_log_buffer_size; +extern ulonglong berkeley_cache_size; +extern ulong berkeley_region_size, berkeley_cache_parts; +extern char *berkeley_home, *berkeley_tmpdir, *berkeley_logdir; + +/* WITH_INNOBASE_STORAGE_ENGINE */ +extern uint innobase_flush_log_at_trx_commit; +extern ulong innobase_fast_shutdown; +extern long innobase_mirrored_log_groups, innobase_log_files_in_group; +extern long innobase_log_file_size, innobase_log_buffer_size; +extern long innobase_buffer_pool_size, innobase_additional_mem_pool_size; +extern long innobase_buffer_pool_awe_mem_mb; +extern long innobase_file_io_threads, innobase_lock_wait_timeout; +extern long innobase_force_recovery; +extern long innobase_open_files; +extern char *innobase_data_home_dir, *innobase_data_file_path; +extern char *innobase_log_group_home_dir, *innobase_log_arch_dir; +extern char *innobase_unix_file_flush_method; +/* The following variables have to be my_bool for SHOW VARIABLES to work */ +extern my_bool innobase_log_archive, + innobase_use_doublewrite, + innobase_use_checksums, + innobase_file_per_table, + innobase_locks_unsafe_for_binlog; + +extern "C" { +extern ulong srv_max_buf_pool_modified_pct; +extern ulong srv_max_purge_lag; +extern ulong srv_auto_extend_increment; +extern ulong srv_n_spin_wait_rounds; +extern ulong srv_n_free_tickets_to_enter; +extern ulong srv_thread_sleep_delay; +extern ulong srv_thread_concurrency; +extern ulong srv_commit_concurrency; +} + +/* WITH_NDBCLUSTER_STORAGE_ENGINE */ +extern ulong ndb_cache_check_time; + + + static HASH system_variable_hash; const char *bool_type_names[]= { "OFF", "ON", NullS }; @@ -120,7 +155,6 @@ static KEY_CACHE *create_key_cache(const char *name, uint length); void fix_sql_mode_var(THD *thd, enum_var_type type); static byte *get_error_count(THD *thd); static byte *get_warning_count(THD *thd); -static byte *get_have_innodb(THD *thd); /* Variable definition list @@ -129,6 +163,9 @@ static byte *get_have_innodb(THD *thd); alphabetic order */ +sys_var *sys_var::first= NULL; +uint sys_var::sys_vars= 0; + sys_var_thd_ulong sys_auto_increment_increment("auto_increment_increment", &SV::auto_increment_increment); sys_var_thd_ulong sys_auto_increment_offset("auto_increment_offset", @@ -293,6 +330,8 @@ sys_var_thd_ulong sys_net_retry_count("net_retry_count", &SV::net_retry_count, 0, fix_net_retry_count); sys_var_thd_bool sys_new_mode("new", &SV::new_mode); +sys_var_thd_bool sys_old_alter_table("old_alter_table", + &SV::old_alter_table); sys_var_thd_bool sys_old_passwords("old_passwords", &SV::old_passwords); sys_var_thd_ulong sys_optimizer_prune_level("optimizer_prune_level", &SV::optimizer_prune_level); @@ -399,7 +438,6 @@ sys_var_bool_ptr sys_timed_mutexes("timed_mutexes", sys_var_thd_ulong sys_net_wait_timeout("wait_timeout", &SV::net_wait_timeout); -#ifdef HAVE_INNOBASE_DB sys_var_long_ptr sys_innodb_fast_shutdown("innodb_fast_shutdown", &innobase_fast_shutdown); sys_var_long_ptr sys_innodb_max_dirty_pages_pct("innodb_max_dirty_pages_pct", @@ -422,14 +460,12 @@ sys_var_long_ptr sys_innodb_thread_concurrency("innodb_thread_concurrency", &srv_thread_concurrency); sys_var_long_ptr sys_innodb_commit_concurrency("innodb_commit_concurrency", &srv_commit_concurrency); -#endif /* Condition pushdown to storage engine */ sys_var_thd_bool sys_engine_condition_pushdown("engine_condition_pushdown", &SV::engine_condition_pushdown); -#ifdef HAVE_NDBCLUSTER_DB /* ndb thread specific variable settings */ sys_var_thd_ulong sys_ndb_autoincrement_prefetch_sz("ndb_autoincrement_prefetch_sz", @@ -442,7 +478,15 @@ sys_var_thd_bool sys_ndb_use_transactions("ndb_use_transactions", &SV::ndb_use_transactions); sys_var_long_ptr sys_ndb_cache_check_time("ndb_cache_check_time", &ndb_cache_check_time); -#endif +sys_var_thd_bool +sys_ndb_index_stat_enable("ndb_index_stat_enable", + &SV::ndb_index_stat_enable); +sys_var_thd_ulong +sys_ndb_index_stat_cache_entries("ndb_index_stat_cache_entries", + &SV::ndb_index_stat_cache_entries); +sys_var_thd_ulong +sys_ndb_index_stat_update_freq("ndb_index_stat_update_freq", + &SV::ndb_index_stat_update_freq); /* Time/date/datetime formats */ @@ -546,204 +590,34 @@ sys_var_thd_time_zone sys_time_zone("time_zone"); /* Read only variables */ sys_var_const_str sys_os("version_compile_os", SYSTEM_TYPE); -sys_var_readonly sys_have_innodb("have_innodb", OPT_GLOBAL, - SHOW_CHAR, get_have_innodb); +sys_var_have_variable sys_have_archive_db("have_archive", &have_archive_db); +sys_var_have_variable sys_have_berkeley_db("have_bdb", &have_berkeley_db); +sys_var_have_variable sys_have_blackhole_db("have_blackhole_engine", + &have_blackhole_db); +sys_var_have_variable sys_have_compress("have_compress", &have_compress); +sys_var_have_variable sys_have_crypt("have_crypt", &have_crypt); +sys_var_have_variable sys_have_csv_db("have_csv", &have_csv_db); +sys_var_have_variable sys_have_example_db("have_example_engine", + &have_example_db); +sys_var_have_variable sys_have_federated_db("have_federated_engine", + &have_federated_db); +sys_var_have_variable sys_have_geometry("have_geometry", &have_geometry); +sys_var_have_variable sys_have_innodb("have_innodb", &have_innodb); +sys_var_have_variable sys_have_isam("have_isam", &have_isam); +sys_var_have_variable sys_have_ndbcluster("have_ndbcluster", &have_ndbcluster); +sys_var_have_variable sys_have_openssl("have_openssl", &have_openssl); +sys_var_have_variable sys_have_partition_db("have_partition_engine", + &have_partition_db); +sys_var_have_variable sys_have_query_cache("have_query_cache", + &have_query_cache); +sys_var_have_variable sys_have_raid("have_raid", &have_raid); +sys_var_have_variable sys_have_rtree_keys("have_rtree_keys", &have_rtree_keys); +sys_var_have_variable sys_have_symlink("have_symlink", &have_symlink); /* Global read-only variable describing server license */ sys_var_const_str sys_license("license", STRINGIFY_ARG(LICENSE)); /* - List of all variables for initialisation and storage in hash - This is sorted in alphabetical order to make it easy to add new variables - - If the variable is not in this list, it can't be changed with - SET variable_name= -*/ - -sys_var *sys_variables[]= -{ - &sys_auto_is_null, - &sys_auto_increment_increment, - &sys_auto_increment_offset, - &sys_autocommit, - &sys_automatic_sp_privileges, - &sys_big_tables, - &sys_big_selects, - &sys_binlog_cache_size, - &sys_buffer_results, - &sys_bulk_insert_buff_size, - &sys_character_set_server, - &sys_character_set_database, - &sys_character_set_client, - &sys_character_set_connection, - &sys_character_set_results, - &sys_charset_system, - &sys_collation_connection, - &sys_collation_database, - &sys_collation_server, - &sys_completion_type, - &sys_concurrent_insert, - &sys_connect_timeout, - &sys_date_format, - &sys_datetime_format, - &sys_div_precincrement, - &sys_default_week_format, - &sys_delay_key_write, - &sys_delayed_insert_limit, - &sys_delayed_insert_timeout, - &sys_delayed_queue_size, - &sys_error_count, - &sys_expire_logs_days, - &sys_flush, - &sys_flush_time, - &sys_ft_boolean_syntax, - &sys_foreign_key_checks, - &sys_group_concat_max_len, - &sys_have_innodb, - &sys_identity, - &sys_init_connect, - &sys_init_slave, - &sys_insert_id, - &sys_interactive_timeout, - &sys_join_buffer_size, - &sys_key_buffer_size, - &sys_key_cache_block_size, - &sys_key_cache_division_limit, - &sys_key_cache_age_threshold, - &sys_last_insert_id, - &sys_license, - &sys_local_infile, - &sys_log_binlog, - &sys_log_off, - &sys_log_update, - &sys_log_warnings, - &sys_long_query_time, - &sys_low_priority_updates, - &sys_max_allowed_packet, - &sys_max_binlog_cache_size, - &sys_max_binlog_size, - &sys_max_connect_errors, - &sys_max_connections, - &sys_max_delayed_threads, - &sys_max_error_count, - &sys_max_insert_delayed_threads, - &sys_max_heap_table_size, - &sys_max_join_size, - &sys_max_length_for_sort_data, - &sys_max_relay_log_size, - &sys_max_seeks_for_key, - &sys_max_sort_length, - &sys_max_sp_recursion_depth, - &sys_max_tmp_tables, - &sys_max_user_connections, - &sys_max_write_lock_count, - &sys_multi_range_count, - &sys_myisam_data_pointer_size, - &sys_myisam_max_sort_file_size, - &sys_myisam_repair_threads, - &sys_myisam_sort_buffer_size, - &sys_myisam_stats_method, - &sys_net_buffer_length, - &sys_net_read_timeout, - &sys_net_retry_count, - &sys_net_wait_timeout, - &sys_net_write_timeout, - &sys_new_mode, - &sys_old_passwords, - &sys_optimizer_prune_level, - &sys_optimizer_search_depth, - &sys_preload_buff_size, - &sys_pseudo_thread_id, - &sys_query_alloc_block_size, - &sys_query_cache_size, - &sys_query_prealloc_size, -#ifdef HAVE_QUERY_CACHE - &sys_query_cache_limit, - &sys_query_cache_min_res_unit, - &sys_query_cache_type, - &sys_query_cache_wlock_invalidate, -#endif /* HAVE_QUERY_CACHE */ - &sys_quote_show_create, - &sys_rand_seed1, - &sys_rand_seed2, - &sys_range_alloc_block_size, - &sys_readonly, - &sys_read_buff_size, - &sys_read_rnd_buff_size, -#ifdef HAVE_REPLICATION - &sys_relay_log_purge, -#endif - &sys_rpl_recovery_rank, - &sys_safe_updates, - &sys_secure_auth, - &sys_select_limit, - &sys_server_id, -#ifdef HAVE_REPLICATION - &sys_slave_compressed_protocol, - &sys_slave_net_timeout, - &sys_slave_trans_retries, - &sys_slave_skip_counter, -#endif - &sys_slow_launch_time, - &sys_sort_buffer, - &sys_sql_big_tables, - &sys_sql_low_priority_updates, - &sys_sql_max_join_size, - &sys_sql_mode, - &sys_sql_warnings, - &sys_sql_notes, - &sys_storage_engine, -#ifdef HAVE_REPLICATION - &sys_sync_binlog_period, - &sys_sync_replication, - &sys_sync_replication_slave_id, - &sys_sync_replication_timeout, -#endif - &sys_sync_frm, - &sys_table_cache_size, - &sys_table_lock_wait_timeout, - &sys_table_type, - &sys_thread_cache_size, - &sys_time_format, - &sys_timed_mutexes, - &sys_timestamp, - &sys_time_zone, - &sys_tmp_table_size, - &sys_trans_alloc_block_size, - &sys_trans_prealloc_size, - &sys_tx_isolation, - &sys_os, -#ifdef HAVE_INNOBASE_DB - &sys_innodb_fast_shutdown, - &sys_innodb_max_dirty_pages_pct, - &sys_innodb_max_purge_lag, - &sys_innodb_table_locks, - &sys_innodb_support_xa, - &sys_innodb_max_purge_lag, - &sys_innodb_autoextend_increment, - &sys_innodb_sync_spin_loops, - &sys_innodb_concurrency_tickets, - &sys_innodb_thread_sleep_delay, - &sys_innodb_thread_concurrency, - &sys_innodb_commit_concurrency, -#endif - &sys_trust_routine_creators, - &sys_trust_function_creators, - &sys_engine_condition_pushdown, -#ifdef HAVE_NDBCLUSTER_DB - &sys_ndb_autoincrement_prefetch_sz, - &sys_ndb_cache_check_time, - &sys_ndb_force_send, - &sys_ndb_use_exact_count, - &sys_ndb_use_transactions, -#endif - &sys_unique_checks, - &sys_updatable_views_with_limit, - &sys_warning_count -}; - - -/* Variables shown by SHOW variables in alphabetical order */ @@ -753,15 +627,15 @@ struct show_var_st init_vars[]= { {sys_automatic_sp_privileges.name,(char*) &sys_automatic_sp_privileges, SHOW_SYS}, {"back_log", (char*) &back_log, SHOW_LONG}, {"basedir", mysql_home, SHOW_CHAR}, -#ifdef HAVE_BERKELEY_DB - {"bdb_cache_size", (char*) &berkeley_cache_size, SHOW_LONG}, + {"bdb_cache_parts", (char*) &berkeley_cache_parts, SHOW_LONG}, + {"bdb_cache_size", (char*) &berkeley_cache_size, SHOW_LONGLONG}, {"bdb_home", (char*) &berkeley_home, SHOW_CHAR_PTR}, {"bdb_log_buffer_size", (char*) &berkeley_log_buffer_size, SHOW_LONG}, {"bdb_logdir", (char*) &berkeley_logdir, SHOW_CHAR_PTR}, {"bdb_max_lock", (char*) &berkeley_max_lock, SHOW_LONG}, + {"bdb_region_size", (char*) &berkeley_region_size, SHOW_LONG}, {"bdb_shared_data", (char*) &berkeley_shared_data, SHOW_BOOL}, {"bdb_tmpdir", (char*) &berkeley_tmpdir, SHOW_CHAR_PTR}, -#endif {sys_binlog_cache_size.name,(char*) &sys_binlog_cache_size, SHOW_SYS}, {sys_bulk_insert_buff_size.name,(char*) &sys_bulk_insert_buff_size,SHOW_SYS}, {sys_character_set_client.name,(char*) &sys_character_set_client, SHOW_SYS}, @@ -797,27 +671,27 @@ struct show_var_st init_vars[]= { {"ft_query_expansion_limit",(char*) &ft_query_expansion_limit, SHOW_LONG}, {"ft_stopword_file", (char*) &ft_stopword_file, SHOW_CHAR_PTR}, {sys_group_concat_max_len.name, (char*) &sys_group_concat_max_len, SHOW_SYS}, - {"have_archive", (char*) &have_archive_db, SHOW_HAVE}, - {"have_bdb", (char*) &have_berkeley_db, SHOW_HAVE}, - {"have_blackhole_engine", (char*) &have_blackhole_db, SHOW_HAVE}, - {"have_compress", (char*) &have_compress, SHOW_HAVE}, - {"have_crypt", (char*) &have_crypt, SHOW_HAVE}, - {"have_csv", (char*) &have_csv_db, SHOW_HAVE}, - {"have_example_engine", (char*) &have_example_db, SHOW_HAVE}, - {"have_federated_engine", (char*) &have_federated_db, SHOW_HAVE}, - {"have_geometry", (char*) &have_geometry, SHOW_HAVE}, - {"have_innodb", (char*) &have_innodb, SHOW_HAVE}, - {"have_isam", (char*) &have_isam, SHOW_HAVE}, - {"have_ndbcluster", (char*) &have_ndbcluster, SHOW_HAVE}, - {"have_openssl", (char*) &have_openssl, SHOW_HAVE}, - {"have_query_cache", (char*) &have_query_cache, SHOW_HAVE}, - {"have_raid", (char*) &have_raid, SHOW_HAVE}, - {"have_rtree_keys", (char*) &have_rtree_keys, SHOW_HAVE}, - {"have_symlink", (char*) &have_symlink, SHOW_HAVE}, + {sys_have_archive_db.name, (char*) &have_archive_db, SHOW_HAVE}, + {sys_have_berkeley_db.name, (char*) &have_berkeley_db, SHOW_HAVE}, + {sys_have_blackhole_db.name,(char*) &have_blackhole_db, SHOW_HAVE}, + {sys_have_compress.name, (char*) &have_compress, SHOW_HAVE}, + {sys_have_crypt.name, (char*) &have_crypt, SHOW_HAVE}, + {sys_have_csv_db.name, (char*) &have_csv_db, SHOW_HAVE}, + {sys_have_example_db.name, (char*) &have_example_db, SHOW_HAVE}, + {sys_have_federated_db.name,(char*) &have_federated_db, SHOW_HAVE}, + {sys_have_geometry.name, (char*) &have_geometry, SHOW_HAVE}, + {sys_have_innodb.name, (char*) &have_innodb, SHOW_HAVE}, + {sys_have_isam.name, (char*) &have_isam, SHOW_HAVE}, + {sys_have_ndbcluster.name, (char*) &have_ndbcluster, SHOW_HAVE}, + {sys_have_openssl.name, (char*) &have_openssl, SHOW_HAVE}, + {sys_have_partition_db.name,(char*) &have_partition_db, SHOW_HAVE}, + {sys_have_query_cache.name, (char*) &have_query_cache, SHOW_HAVE}, + {sys_have_raid.name, (char*) &have_raid, SHOW_HAVE}, + {sys_have_rtree_keys.name, (char*) &have_rtree_keys, SHOW_HAVE}, + {sys_have_symlink.name, (char*) &have_symlink, SHOW_HAVE}, {"init_connect", (char*) &sys_init_connect, SHOW_SYS}, {"init_file", (char*) &opt_init_file, SHOW_CHAR_PTR}, {"init_slave", (char*) &sys_init_slave, SHOW_SYS}, -#ifdef HAVE_INNOBASE_DB {"innodb_additional_mem_pool_size", (char*) &innobase_additional_mem_pool_size, SHOW_LONG }, {sys_innodb_autoextend_increment.name, (char*) &sys_innodb_autoextend_increment, SHOW_SYS}, {"innodb_buffer_pool_awe_mem_mb", (char*) &innobase_buffer_pool_awe_mem_mb, SHOW_LONG }, @@ -851,7 +725,6 @@ struct show_var_st init_vars[]= { {sys_innodb_table_locks.name, (char*) &sys_innodb_table_locks, SHOW_SYS}, {sys_innodb_thread_concurrency.name, (char*) &sys_innodb_thread_concurrency, SHOW_SYS}, {sys_innodb_thread_sleep_delay.name, (char*) &sys_innodb_thread_sleep_delay, SHOW_SYS}, -#endif {sys_interactive_timeout.name,(char*) &sys_interactive_timeout, SHOW_SYS}, {sys_join_buffer_size.name, (char*) &sys_join_buffer_size, SHOW_SYS}, {sys_key_buffer_size.name, (char*) &sys_key_buffer_size, SHOW_SYS}, @@ -918,19 +791,21 @@ struct show_var_st init_vars[]= { #ifdef __NT__ {"named_pipe", (char*) &opt_enable_named_pipe, SHOW_MY_BOOL}, #endif -#ifdef HAVE_NDBCLUSTER_DB {sys_ndb_autoincrement_prefetch_sz.name, (char*) &sys_ndb_autoincrement_prefetch_sz, SHOW_SYS}, + {sys_ndb_cache_check_time.name,(char*) &sys_ndb_cache_check_time, SHOW_SYS}, {sys_ndb_force_send.name, (char*) &sys_ndb_force_send, SHOW_SYS}, + {sys_ndb_index_stat_cache_entries.name, (char*) &sys_ndb_index_stat_cache_entries, SHOW_SYS}, + {sys_ndb_index_stat_enable.name, (char*) &sys_ndb_index_stat_enable, SHOW_SYS}, + {sys_ndb_index_stat_update_freq.name, (char*) &sys_ndb_index_stat_update_freq, SHOW_SYS}, {sys_ndb_use_exact_count.name,(char*) &sys_ndb_use_exact_count, SHOW_SYS}, {sys_ndb_use_transactions.name,(char*) &sys_ndb_use_transactions, SHOW_SYS}, - {sys_ndb_cache_check_time.name,(char*) &sys_ndb_cache_check_time, SHOW_SYS}, -#endif {sys_net_buffer_length.name,(char*) &sys_net_buffer_length, SHOW_SYS}, {sys_net_read_timeout.name, (char*) &sys_net_read_timeout, SHOW_SYS}, {sys_net_retry_count.name, (char*) &sys_net_retry_count, SHOW_SYS}, {sys_net_write_timeout.name,(char*) &sys_net_write_timeout, SHOW_SYS}, {sys_new_mode.name, (char*) &sys_new_mode, SHOW_SYS}, + {sys_old_alter_table.name, (char*) &sys_old_alter_table, SHOW_SYS}, {sys_old_passwords.name, (char*) &sys_old_passwords, SHOW_SYS}, {"open_files_limit", (char*) &open_files_limit, SHOW_LONG}, {sys_optimizer_prune_level.name, (char*) &sys_optimizer_prune_level, @@ -938,6 +813,7 @@ struct show_var_st init_vars[]= { {sys_optimizer_search_depth.name,(char*) &sys_optimizer_search_depth, SHOW_SYS}, {"pid_file", (char*) pidfile_name, SHOW_CHAR}, + {"plugin_dir", (char*) opt_plugin_dir, SHOW_CHAR}, {"port", (char*) &mysqld_port, SHOW_INT}, {sys_preload_buff_size.name, (char*) &sys_preload_buff_size, SHOW_SYS}, {"protocol_version", (char*) &protocol_version, SHOW_INT}, @@ -1021,9 +897,6 @@ struct show_var_st init_vars[]= { {sys_updatable_views_with_limit.name, (char*) &sys_updatable_views_with_limit,SHOW_SYS}, {"version", server_version, SHOW_CHAR}, -#ifdef HAVE_BERKELEY_DB - {"version_bdb", (char*) DB_VERSION_STRING, SHOW_CHAR}, -#endif {"version_comment", (char*) MYSQL_COMPILATION_COMMENT, SHOW_CHAR}, {"version_compile_machine", (char*) MACHINE_TYPE, SHOW_CHAR}, {sys_os.name, (char*) &sys_os, SHOW_SYS}, @@ -2783,12 +2656,6 @@ static byte *get_error_count(THD *thd) } -static byte *get_have_innodb(THD *thd) -{ - return (byte*) show_comp_option_name[have_innodb]; -} - - /**************************************************************************** Main handling of variables: - Initialisation @@ -2847,17 +2714,15 @@ static byte *get_sys_var_length(const sys_var *var, uint *length, void set_var_init() { - hash_init(&system_variable_hash, system_charset_info, - array_elements(sys_variables),0,0, - (hash_get_key) get_sys_var_length,0,0); - sys_var **var, **end; - for (var= sys_variables, end= sys_variables+array_elements(sys_variables) ; - var < end; - var++) - { - (*var)->name_length= strlen((*var)->name); - (*var)->option_limits= find_option(my_long_options, (*var)->name); - my_hash_insert(&system_variable_hash, (byte*) *var); + sys_var *var; + + hash_init(&system_variable_hash, system_charset_info, sys_var::sys_vars, 0, + 0, (hash_get_key) get_sys_var_length, 0, 0); + for (var= sys_var::first; var; var= var->next) + { + var->name_length= strlen(var->name); + var->option_limits= find_option(my_long_options, var->name); + my_hash_insert(&system_variable_hash, (byte*) var); } /* Special cases diff --git a/sql/set_var.h b/sql/set_var.h index 18c3353e8ff..14059f7e9b7 100644 --- a/sql/set_var.h +++ b/sql/set_var.h @@ -39,6 +39,9 @@ typedef byte *(*sys_value_ptr_func)(THD *thd); class sys_var { public: + static sys_var *first; + static uint sys_vars; + sys_var *next; struct my_option *option_limits; /* Updated by by set_var_init() */ uint name_length; /* Updated by by set_var_init() */ const char *name; @@ -48,12 +51,18 @@ public: sys_var(const char *name_arg) :name(name_arg), after_update(0) , no_support_one_shot(1) - {} + { add_sys_var(); } sys_var(const char *name_arg,sys_after_update_func func) :name(name_arg), after_update(func) , no_support_one_shot(1) - {} + { add_sys_var(); } virtual ~sys_var() {} + void add_sys_var() + { + next= first; + first= this; + sys_vars++; + } virtual bool check(THD *thd, set_var *var); bool check_enum(THD *thd, set_var *var, TYPELIB *enum_names); bool check_set(THD *thd, set_var *var, TYPELIB *enum_names); @@ -701,6 +710,30 @@ public: bool is_readonly() const { return 1; } }; + +class sys_var_have_variable: public sys_var +{ + SHOW_COMP_OPTION *have_variable; + +public: + sys_var_have_variable(const char *variable_name, + SHOW_COMP_OPTION *have_variable_arg): + sys_var(variable_name), + have_variable(have_variable_arg) + { } + byte *value_ptr(THD *thd, enum_var_type type, LEX_STRING *base) + { + return (byte*) show_comp_option_name[*have_variable]; + } + bool update(THD *thd, set_var *var) { return 1; } + bool check_default(enum_var_type type) { return 1; } + bool check_type(enum_var_type type) { return type != OPT_GLOBAL; } + bool check_update_type(Item_result type) { return 1; } + SHOW_TYPE type() { return SHOW_CHAR; } + bool is_readonly() const { return 1; } +}; + + class sys_var_thd_time_zone :public sys_var_thd { public: @@ -891,6 +924,7 @@ public: /* updated in sql_acl.cc */ +extern sys_var_thd_bool sys_old_alter_table; extern sys_var_thd_bool sys_old_passwords; extern LEX_STRING default_key_cache_base; diff --git a/sql/share/errmsg.txt b/sql/share/errmsg.txt index d0773e8af3b..f922f7a08ba 100644 --- a/sql/share/errmsg.txt +++ b/sql/share/errmsg.txt @@ -3006,7 +3006,7 @@ ER_CANT_FIND_DL_ENTRY cze "Nemohu naj-Bít funkci '%-.64s' v knihovń'" dan "Kan ikke finde funktionen '%-.64s' i bibliotek'" nla "Kan functie '%-.64s' niet in library vinden" - eng "Can't find function '%-.64s' in library'" + eng "Can't find symbol '%-.64s' in library'" jps "function '%-.64s' ‚đƒ‰ƒCƒuƒ‰ƒ[’†‚ÉŒ©•t‚¯‚é–‚ª‚Å‚«‚Ü‚¹‚ñ", est "Ei leia funktsiooni '%-.64s' antud teegis" fre "Impossible de trouver la fonction '%-.64s' dans la bibliothèque'" @@ -3018,7 +3018,7 @@ ER_CANT_FIND_DL_ENTRY kor "¶óÀ̀¹ö·¯¸®¿¡¼ '%-.64s' ÇÔ¼ö¸¦ Ă£À» ¼ö ¾ø½À´Ï´Ù." por "Năo pode encontrar a funçăo '%-.64s' na biblioteca" rum "Nu pot gasi functia '%-.64s' in libraria" - rus "îÅ×ÏÚÍÏÖÎÏ ÏÔÙÓËÁÔØ ÆƠÎËĂÉÀ '%-.64s' × ÂÉẦÉÏÔÅËÅ" + rus "îÅ×ÏÚÍÏÖÎÏ ÏÔÙÓËÁÔØ ÓÉÍ×Ï̀ '%-.64s' × ÂÉẦÉÏÔÅËÅ" serbian "Ne mogu da pronadjem funkciju '%-.64s' u biblioteci" slo "Nemô¾em nájs» funkciu '%-.64s' v kni¾nici'" spa "No puedo encontrar función '%-.64s' en libraria'" @@ -5406,11 +5406,11 @@ ER_PS_NO_RECURSION ER_SP_CANT_SET_AUTOCOMMIT eng "Not allowed to set autocommit from a stored function or trigger" ER_MALFORMED_DEFINER - eng "Definer is not fully qualified" + eng "Definer is not fully qualified" ER_VIEW_FRM_NO_USER eng "View %-.64s.%-.64s has not definer information (old table format). Current user is used as definer. Please recreate view!" ER_VIEW_OTHER_USER - eng "You need the SUPER privilege for creation view with %-.64s@%-.64s definer" + eng "You need the SUPER privilege for creation view with %-.64s@%-.64s definer" ER_NO_SUCH_USER eng "There is no '%-.64s'@'%-.64s' registered" ER_FORBID_SCHEMA_CHANGE @@ -5422,8 +5422,129 @@ ER_NO_REFERENCED_ROW_2 23000 ER_SP_BAD_VAR_SHADOW 42000 eng "Variable '%-.64s' must be quoted with `...`, or renamed" ER_TRG_NO_DEFINER - eng "No definer attribute for trigger '%-.64s'.'%-.64s'. The trigger will be activated under the authorization of the invoker, which may have insufficient privileges. Please recreate the trigger." + eng "No definer attribute for trigger '%-.64s'.'%-.64s'. The trigger will be activated under the authorization of the invoker, which may have insufficient privileges. Please recreate the trigger." ER_OLD_FILE_FORMAT eng "'%-.64s' has an old format, you should re-create the '%s' object(s)" ER_SP_RECURSION_LIMIT eng "Recursive limit %d (as set by the max_sp_recursion_depth variable) was exceeded for routine %.64s" +ER_PARTITION_REQUIRES_VALUES_ERROR + eng "%s PARTITIONING requires definition of VALUES %s for each partition" + swe "%s PARTITIONering kräver definition av VALUES %s för varje partition" +ER_PARTITION_WRONG_VALUES_ERROR + eng "Only %s PARTITIONING can use VALUES %s in partition definition" + swe "Endast %s partitionering kan använda VALUES %s i definition av partitionen" +ER_PARTITION_MAXVALUE_ERROR + eng "MAXVALUE can only be used in last partition definition" + swe "MAXVALUE kan bara användas i definitionen av den sista partitionen" +ER_PARTITION_SUBPARTITION_ERROR + eng "Subpartitions can only be hash partitions and by key" + swe "Subpartitioner kan bara vara hash och key partitioner" +ER_PARTITION_WRONG_NO_PART_ERROR + eng "Wrong number of partitions defined, mismatch with previous setting" + swe "Antal partitioner definierade och antal partitioner är inte lika" +ER_PARTITION_WRONG_NO_SUBPART_ERROR + eng "Wrong number of subpartitions defined, mismatch with previous setting" + swe "Antal subpartitioner definierade och antal subpartitioner är inte lika" +ER_CONST_EXPR_IN_PARTITION_FUNC_ERROR + eng "Constant/Random expression in (sub)partitioning function is not allowed" + swe "Konstanta uttryck eller slumpmässiga uttryck är inte tillåtna (sub)partitioneringsfunktioner" +ER_NO_CONST_EXPR_IN_RANGE_OR_LIST_ERROR + eng "Expression in RANGE/LIST VALUES must be constant" + swe "Uttryck i RANGE/LIST VALUES måste vara ett konstant uttryck" +ER_FIELD_NOT_FOUND_PART_ERROR + eng "Field in list of fields for partition function not found in table" + swe "Fält i listan av fält för partitionering med key inte funnen i tabellen" +ER_LIST_OF_FIELDS_ONLY_IN_HASH_ERROR + eng "List of fields is only allowed in KEY partitions" + swe "En lista av fält är endast tillåtet för KEY partitioner" +ER_INCONSISTENT_PARTITION_INFO_ERROR + eng "The partition info in the frm file is not consistent with what can be written into the frm file" + swe "Partitioneringsinformationen i frm-filen är inte konsistent med vad som kan skrivas i frm-filen" +ER_PARTITION_FUNC_NOT_ALLOWED_ERROR + eng "The %s function returns the wrong type" + swe "%s-funktionen returnerar felaktig typ" +ER_PARTITIONS_MUST_BE_DEFINED_ERROR + eng "For %s partitions each partition must be defined" + swe "För %s partitionering så måste varje partition definieras" +ER_RANGE_NOT_INCREASING_ERROR + eng "VALUES LESS THAN value must be strictly increasing for each partition" + swe "Värden i VALUES LESS THAN måste vara strikt växande för varje partition" +ER_INCONSISTENT_TYPE_OF_FUNCTIONS_ERROR + eng "VALUES value must be of same type as partition function" + swe "Värden i VALUES måste vara av samma typ som partitioneringsfunktionen" +ER_MULTIPLE_DEF_CONST_IN_LIST_PART_ERROR + eng "Multiple definition of same constant in list partitioning" + swe "Multipel definition av samma konstant i list partitionering" +ER_PARTITION_ENTRY_ERROR + eng "Partitioning can not be used stand-alone in query" + swe "Partitioneringssyntax kan inte användas på egen hand i en SQL-fråga" +ER_MIX_HANDLER_ERROR + eng "The mix of handlers in the partitions is not allowed in this version of MySQL" + swe "Denna mix av lagringsmotorer är inte tillåten i denna version av MySQL" +ER_PARTITION_NOT_DEFINED_ERROR + eng "For the partitioned engine it is necessary to define all %s" + swe "För partitioneringsmotorn så är det nödvändigt att definiera alla %s" +ER_TOO_MANY_PARTITIONS_ERROR + eng "Too many partitions were defined" + swe "För många partitioner definierades" +ER_SUBPARTITION_ERROR + eng "It is only possible to mix RANGE/LIST partitioning with HASH/KEY partitioning for subpartitioning" + swe "Det är endast möjligt att blanda RANGE/LIST partitionering med HASH/KEY partitionering för subpartitionering" +ER_CANT_CREATE_HANDLER_FILE + eng "Failed to create specific handler file" + swe "Misslyckades med att skapa specifik fil i lagringsmotor" +ER_BLOB_FIELD_IN_PART_FUNC_ERROR + eng "A BLOB field is not allowed in partition function" + swe "Ett BLOB-fält är inte tillåtet i partitioneringsfunktioner" +ER_CHAR_SET_IN_PART_FIELD_ERROR + eng "VARCHAR only allowed if binary collation for partition functions" + swe "VARCHAR endast tillåten med binär collation för partitioneringsfunktion" +ER_UNIQUE_KEY_NEED_ALL_FIELDS_IN_PF + eng "A %s need to include all fields in the partition function" + swe "En %s behöver inkludera alla fält i partitioneringsfunktionen för denna lagringsmotor" +ER_NO_PARTS_ERROR + eng "Number of %s = 0 is not an allowed value" + swe "Antal %s = 0 är inte ett tillåten värde" +ER_PARTITION_MGMT_ON_NONPARTITIONED + eng "Partition management on a not partitioned table is not possible" + swe "Partitioneringskommando på en opartitionerad tabell är inte möjligt" +ER_DROP_PARTITION_NON_EXISTENT + eng "Error in list of partitions to change" + swe "Fel i listan av partitioner att förändra" +ER_DROP_LAST_PARTITION + eng "Cannot remove all partitions, use DROP TABLE instead" + swe "Det är inte tillåtet att ta bort alla partitioner, använd DROP TABLE istället" +ER_COALESCE_ONLY_ON_HASH_PARTITION + eng "COALESCE PARTITION can only be used on HASH/KEY partitions" + swe "COALESCE PARTITION kan bara användas på HASH/KEY partitioner" +ER_ONLY_ON_RANGE_LIST_PARTITION + eng "%s PARTITION can only be used on RANGE/LIST partitions" + swe "%s PARTITION kan bara användas på RANGE/LIST-partitioner" +ER_ADD_PARTITION_SUBPART_ERROR + eng "Trying to Add partition(s) with wrong number of subpartitions" + swe "ADD PARTITION med fel antal subpartitioner" +ER_ADD_PARTITION_NO_NEW_PARTITION + eng "At least one partition must be added" + swe "Åtminstone en partition måste läggas till vid ADD PARTITION" +ER_COALESCE_PARTITION_NO_PARTITION + eng "At least one partition must be coalesced" + swe "Åtminstone en partition måste slås ihop vid COALESCE PARTITION" +ER_REORG_PARTITION_NOT_EXIST + eng "More partitions to reorganise than there are partitions" + swe "Fler partitioner att reorganisera än det finns partitioner" +ER_SAME_NAME_PARTITION + eng "All partitions must have unique names in the table" + swe "Alla partitioner i tabellen måste ha unika namn" +ER_CONSECUTIVE_REORG_PARTITIONS + eng "When reorganising a set of partitions they must be in consecutive order" + swe "När ett antal partitioner omorganiseras måste de vara i konsekutiv ordning" +ER_REORG_OUTSIDE_RANGE + eng "The new partitions cover a bigger range then the reorganised partitions do" + swe "De nya partitionerna täcker ett större intervall än de omorganiserade partitionerna" +ER_DROP_PARTITION_FAILURE + eng "Drop partition not supported in this version for this handler" +ER_DROP_PARTITION_WHEN_FK_DEFINED + eng "Cannot drop a partition when a foreign key constraint is defined on the table" + swe "Kan inte ta bort en partition när en främmande nyckel är definierad på tabellen" +ER_PLUGIN_IS_NOT_LOADED + eng "Plugin '%-.64s' is not loaded" diff --git a/sql/slave.cc b/sql/slave.cc index 065d9c787ce..263169a8406 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -22,6 +22,7 @@ #include <myisam.h> #include "slave.h" #include "sql_repl.h" +#include "rpl_filter.h" #include "repl_failsafe.h" #include <thr_alarm.h> #include <my_dir.h> @@ -36,11 +37,6 @@ typedef bool (*CHECK_KILLED_FUNC)(THD*,void*); volatile bool slave_sql_running = 0, slave_io_running = 0; char* slave_load_tmpdir = 0; MASTER_INFO *active_mi; -HASH replicate_do_table, replicate_ignore_table; -DYNAMIC_ARRAY replicate_wild_do_table, replicate_wild_ignore_table; -bool do_table_inited = 0, ignore_table_inited = 0; -bool wild_do_table_inited = 0, wild_ignore_table_inited = 0; -bool table_rules_on= 0; my_bool replicate_same_server_id; ulonglong relay_log_space_limit = 0; @@ -195,20 +191,6 @@ err: } -static void free_table_ent(TABLE_RULE_ENT* e) -{ - my_free((gptr) e, MYF(0)); -} - - -static byte* get_table_key(TABLE_RULE_ENT* e, uint* len, - my_bool not_used __attribute__((unused))) -{ - *len = e->key_len; - return (byte*)e->db; -} - - /* Open the given relay log @@ -824,237 +806,6 @@ int start_slave_threads(bool need_slave_mutex, bool wait_for_start, } -void init_table_rule_hash(HASH* h, bool* h_inited) -{ - hash_init(h, system_charset_info,TABLE_RULE_HASH_SIZE,0,0, - (hash_get_key) get_table_key, - (hash_free_key) free_table_ent, 0); - *h_inited = 1; -} - - -void init_table_rule_array(DYNAMIC_ARRAY* a, bool* a_inited) -{ - my_init_dynamic_array(a, sizeof(TABLE_RULE_ENT*), TABLE_RULE_ARR_SIZE, - TABLE_RULE_ARR_SIZE); - *a_inited = 1; -} - - -static TABLE_RULE_ENT* find_wild(DYNAMIC_ARRAY *a, const char* key, int len) -{ - uint i; - const char* key_end = key + len; - - for (i = 0; i < a->elements; i++) - { - TABLE_RULE_ENT* e ; - get_dynamic(a, (gptr)&e, i); - if (!my_wildcmp(system_charset_info, key, key_end, - (const char*)e->db, - (const char*)(e->db + e->key_len), - '\\',wild_one,wild_many)) - return e; - } - - return 0; -} - - -/* - Checks whether tables match some (wild_)do_table and (wild_)ignore_table - rules (for replication) - - SYNOPSIS - tables_ok() - thd thread (SQL slave thread normally). Mustn't be null. - tables list of tables to check - - NOTES - Note that changing the order of the tables in the list can lead to - different results. Note also the order of precedence of the do/ignore - rules (see code below). For that reason, users should not set conflicting - rules because they may get unpredicted results (precedence order is - explained in the manual). - - Thought which arose from a question of a big customer "I want to include - all tables like "abc.%" except the "%.EFG"". This can't be done now. If we - supported Perl regexps we could do it with this pattern: /^abc\.(?!EFG)/ - (I could not find an equivalent in the regex library MySQL uses). - - RETURN VALUES - 0 should not be logged/replicated - 1 should be logged/replicated -*/ - -bool tables_ok(THD* thd, TABLE_LIST* tables) -{ - bool some_tables_updating= 0; - DBUG_ENTER("tables_ok"); - - /* - In routine, can't reliably pick and choose substatements, so always - replicate. - We can't reliably know if one substatement should be executed or not: - consider the case of this substatement: a SELECT on a non-replicated - constant table; if we don't execute it maybe it was going to fill a - variable which was going to be used by the next substatement to update - a replicated table? If we execute it maybe the constant non-replicated - table does not exist (and so we'll fail) while there was no need to - execute this as this SELECT does not influence replicated tables in the - rest of the routine? In other words: users are used to replicate-*-table - specifying how to handle updates to tables, these options don't say - anything about reads to tables; we can't guess. - */ - if (thd->spcont) - DBUG_RETURN(1); - - for (; tables; tables= tables->next_global) - { - char hash_key[2*NAME_LEN+2]; - char *end; - uint len; - - if (!tables->updating) - continue; - some_tables_updating= 1; - end= strmov(hash_key, tables->db ? tables->db : thd->db); - *end++= '.'; - len= (uint) (strmov(end, tables->table_name) - hash_key); - if (do_table_inited) // if there are any do's - { - if (hash_search(&replicate_do_table, (byte*) hash_key, len)) - DBUG_RETURN(1); - } - if (ignore_table_inited) // if there are any ignores - { - if (hash_search(&replicate_ignore_table, (byte*) hash_key, len)) - DBUG_RETURN(0); - } - if (wild_do_table_inited && find_wild(&replicate_wild_do_table, - hash_key, len)) - DBUG_RETURN(1); - if (wild_ignore_table_inited && find_wild(&replicate_wild_ignore_table, - hash_key, len)) - DBUG_RETURN(0); - } - - /* - If no table was to be updated, ignore statement (no reason we play it on - slave, slave is supposed to replicate _changes_ only). - If no explicit rule found and there was a do list, do not replicate. - If there was no do list, go ahead - */ - DBUG_RETURN(some_tables_updating && - !do_table_inited && !wild_do_table_inited); -} - - -/* - Checks whether a db matches wild_do_table and wild_ignore_table - rules (for replication) - - SYNOPSIS - db_ok_with_wild_table() - db name of the db to check. - Is tested with check_db_name() before calling this function. - - NOTES - Here is the reason for this function. - We advise users who want to exclude a database 'db1' safely to do it - with replicate_wild_ignore_table='db1.%' instead of binlog_ignore_db or - replicate_ignore_db because the two lasts only check for the selected db, - which won't work in that case: - USE db2; - UPDATE db1.t SET ... #this will be replicated and should not - whereas replicate_wild_ignore_table will work in all cases. - With replicate_wild_ignore_table, we only check tables. When - one does 'DROP DATABASE db1', tables are not involved and the - statement will be replicated, while users could expect it would not (as it - rougly means 'DROP db1.first_table, DROP db1.second_table...'). - In other words, we want to interpret 'db1.%' as "everything touching db1". - That is why we want to match 'db1' against 'db1.%' wild table rules. - - RETURN VALUES - 0 should not be logged/replicated - 1 should be logged/replicated - */ - -int db_ok_with_wild_table(const char *db) -{ - char hash_key[NAME_LEN+2]; - char *end; - int len; - end= strmov(hash_key, db); - *end++= '.'; - len= end - hash_key ; - if (wild_do_table_inited && find_wild(&replicate_wild_do_table, - hash_key, len)) - return 1; - if (wild_ignore_table_inited && find_wild(&replicate_wild_ignore_table, - hash_key, len)) - return 0; - - /* - If no explicit rule found and there was a do list, do not replicate. - If there was no do list, go ahead - */ - return !wild_do_table_inited; -} - - -int add_table_rule(HASH* h, const char* table_spec) -{ - const char* dot = strchr(table_spec, '.'); - if (!dot) return 1; - // len is always > 0 because we know the there exists a '.' - uint len = (uint)strlen(table_spec); - TABLE_RULE_ENT* e = (TABLE_RULE_ENT*)my_malloc(sizeof(TABLE_RULE_ENT) - + len, MYF(MY_WME)); - if (!e) return 1; - e->db = (char*)e + sizeof(TABLE_RULE_ENT); - e->tbl_name = e->db + (dot - table_spec) + 1; - e->key_len = len; - memcpy(e->db, table_spec, len); - (void)my_hash_insert(h, (byte*)e); - return 0; -} - - -/* - Add table expression with wildcards to dynamic array -*/ - -int add_wild_table_rule(DYNAMIC_ARRAY* a, const char* table_spec) -{ - const char* dot = strchr(table_spec, '.'); - if (!dot) return 1; - uint len = (uint)strlen(table_spec); - TABLE_RULE_ENT* e = (TABLE_RULE_ENT*)my_malloc(sizeof(TABLE_RULE_ENT) - + len, MYF(MY_WME)); - if (!e) return 1; - e->db = (char*)e + sizeof(TABLE_RULE_ENT); - e->tbl_name = e->db + (dot - table_spec) + 1; - e->key_len = len; - memcpy(e->db, table_spec, len); - insert_dynamic(a, (gptr)&e); - return 0; -} - - -static void free_string_array(DYNAMIC_ARRAY *a) -{ - uint i; - for (i = 0; i < a->elements; i++) - { - char* p; - get_dynamic(a, (gptr) &p, i); - my_free(p, MYF(MY_WME)); - } - delete_dynamic(a); -} - - #ifdef NOT_USED_YET static int end_slave_on_walk(MASTER_INFO* mi, gptr /*unused*/) { @@ -1090,14 +841,6 @@ void end_slave() */ terminate_slave_threads(active_mi,SLAVE_FORCE_ALL); end_master_info(active_mi); - if (do_table_inited) - hash_free(&replicate_do_table); - if (ignore_table_inited) - hash_free(&replicate_ignore_table); - if (wild_do_table_inited) - free_string_array(&replicate_wild_do_table); - if (wild_ignore_table_inited) - free_string_array(&replicate_wild_ignore_table); delete active_mi; active_mi= 0; } @@ -1177,24 +920,6 @@ bool net_request_file(NET* net, const char* fname) } -const char *rewrite_db(const char* db, uint32 *new_len) -{ - if (replicate_rewrite_db.is_empty() || !db) - return db; - I_List_iterator<i_string_pair> it(replicate_rewrite_db); - i_string_pair* tmp; - - while ((tmp=it++)) - { - if (!strcmp(tmp->key, db)) - { - *new_len= (uint32)strlen(tmp->val); - return tmp->val; - } - } - return db; -} - /* From other comments and tests in code, it looks like sometimes Query_log_event and Load_log_event can have db == 0 @@ -1207,60 +932,6 @@ const char *print_slave_db_safe(const char* db) return (db ? db : ""); } -/* - Checks whether a db matches some do_db and ignore_db rules - (for logging or replication) - - SYNOPSIS - db_ok() - db name of the db to check - do_list either binlog_do_db or replicate_do_db - ignore_list either binlog_ignore_db or replicate_ignore_db - - RETURN VALUES - 0 should not be logged/replicated - 1 should be logged/replicated -*/ - -int db_ok(const char* db, I_List<i_string> &do_list, - I_List<i_string> &ignore_list ) -{ - if (do_list.is_empty() && ignore_list.is_empty()) - return 1; // ok to replicate if the user puts no constraints - - /* - If the user has specified restrictions on which databases to replicate - and db was not selected, do not replicate. - */ - if (!db) - return 0; - - if (!do_list.is_empty()) // if the do's are not empty - { - I_List_iterator<i_string> it(do_list); - i_string* tmp; - - while ((tmp=it++)) - { - if (!strcmp(tmp->ptr, db)) - return 1; // match - } - return 0; - } - else // there are some elements in the don't, otherwise we cannot get here - { - I_List_iterator<i_string> it(ignore_list); - i_string* tmp; - - while ((tmp=it++)) - { - if (!strcmp(tmp->ptr, db)) - return 0; // match - } - return 1; - } -} - static int init_strvar_from_file(char *var, int max_size, IO_CACHE *f, const char *default_val) @@ -2337,48 +2008,6 @@ int register_slave_on_master(MYSQL* mysql) } -/* - Builds a String from a HASH of TABLE_RULE_ENT. Cannot be used for any other - hash, as it assumes that the hash entries are TABLE_RULE_ENT. - - SYNOPSIS - table_rule_ent_hash_to_str() - s pointer to the String to fill - h pointer to the HASH to read - - RETURN VALUES - none -*/ - -void table_rule_ent_hash_to_str(String* s, HASH* h) -{ - s->length(0); - for (uint i=0 ; i < h->records ; i++) - { - TABLE_RULE_ENT* e= (TABLE_RULE_ENT*) hash_element(h, i); - if (s->length()) - s->append(','); - s->append(e->db,e->key_len); - } -} - -/* - Mostly the same thing as above -*/ - -void table_rule_ent_dynamic_array_to_str(String* s, DYNAMIC_ARRAY* a) -{ - s->length(0); - for (uint i=0 ; i < a->elements ; i++) - { - TABLE_RULE_ENT* e; - get_dynamic(a, (gptr)&e, i); - if (s->length()) - s->append(','); - s->append(e->db,e->key_len); - } -} - bool show_master_info(THD* thd, MASTER_INFO* mi) { // TODO: fix this for multi-master @@ -2474,23 +2103,18 @@ bool show_master_info(THD* thd, MASTER_INFO* mi) protocol->store(mi->slave_running == MYSQL_SLAVE_RUN_CONNECT ? "Yes" : "No", &my_charset_bin); protocol->store(mi->rli.slave_running ? "Yes":"No", &my_charset_bin); - protocol->store(&replicate_do_db); - protocol->store(&replicate_ignore_db); - /* - We can't directly use some protocol->store for - replicate_*_table, - as Protocol doesn't know the TABLE_RULE_ENT struct. - We first build Strings and then pass them to protocol->store. - */ + protocol->store(rpl_filter->get_do_db()); + protocol->store(rpl_filter->get_ignore_db()); + char buf[256]; String tmp(buf, sizeof(buf), &my_charset_bin); - table_rule_ent_hash_to_str(&tmp, &replicate_do_table); + rpl_filter->get_do_table(&tmp); protocol->store(&tmp); - table_rule_ent_hash_to_str(&tmp, &replicate_ignore_table); + rpl_filter->get_ignore_table(&tmp); protocol->store(&tmp); - table_rule_ent_dynamic_array_to_str(&tmp, &replicate_wild_do_table); + rpl_filter->get_wild_do_table(&tmp); protocol->store(&tmp); - table_rule_ent_dynamic_array_to_str(&tmp, &replicate_wild_ignore_table); + rpl_filter->get_wild_ignore_table(&tmp); protocol->store(&tmp); protocol->store((uint32) mi->rli.last_slave_errno); @@ -3966,10 +3590,8 @@ static int process_io_create_file(MASTER_INFO* mi, Create_file_log_event* cev) if (unlikely(!cev->is_valid())) DBUG_RETURN(1); - /* - TODO: fix to honor table rules, not only db rules - */ - if (!db_ok(cev->db, replicate_do_db, replicate_ignore_db)) + + if (!rpl_filter->db_ok(cev->db)) { skip_load_data_infile(net); DBUG_RETURN(0); diff --git a/sql/slave.h b/sql/slave.h index 4d3c338680d..c994bfb2d34 100644 --- a/sql/slave.h +++ b/sql/slave.h @@ -21,6 +21,8 @@ #include "mysql.h" #include "my_list.h" +#include "rpl_filter.h" + #define SLAVE_NET_TIMEOUT 3600 #define MAX_SLAVE_ERRMSG 1024 #define MAX_SLAVE_ERROR 2000 @@ -472,15 +474,6 @@ typedef struct st_master_info int queue_event(MASTER_INFO* mi,const char* buf,ulong event_len); -typedef struct st_table_rule_ent -{ - char* db; - char* tbl_name; - uint key_len; -} TABLE_RULE_ENT; - -#define TABLE_RULE_HASH_SIZE 16 -#define TABLE_RULE_ARR_SIZE 16 #define MAX_SLAVE_ERRMSG 1024 #define RPL_LOG_NAME (rli->group_master_log_name[0] ? rli->group_master_log_name :\ @@ -534,27 +527,9 @@ int mysql_table_dump(THD* thd, const char* db, int fetch_master_table(THD* thd, const char* db_name, const char* table_name, MASTER_INFO* mi, MYSQL* mysql, bool overwrite); -void table_rule_ent_hash_to_str(String* s, HASH* h); -void table_rule_ent_dynamic_array_to_str(String* s, DYNAMIC_ARRAY* a); bool show_master_info(THD* thd, MASTER_INFO* mi); bool show_binlog_info(THD* thd); -/* See if the query uses any tables that should not be replicated */ -bool tables_ok(THD* thd, TABLE_LIST* tables); - -/* - Check to see if the database is ok to operate on with respect to the - do and ignore lists - used in replication -*/ -int db_ok(const char* db, I_List<i_string> &do_list, - I_List<i_string> &ignore_list ); -int db_ok_with_wild_table(const char *db); - -int add_table_rule(HASH* h, const char* table_spec); -int add_wild_table_rule(DYNAMIC_ARRAY* a, const char* table_spec); -void init_table_rule_hash(HASH* h, bool* h_inited); -void init_table_rule_array(DYNAMIC_ARRAY* a, bool* a_inited); -const char *rewrite_db(const char* db, uint32 *new_db_len); const char *print_slave_db_safe(const char *db); int check_expected_error(THD* thd, RELAY_LOG_INFO* rli, int error_code); void skip_load_data_infile(NET* net); @@ -588,11 +563,6 @@ pthread_handler_t handle_slave_sql(void *arg); extern bool volatile abort_loop; extern MASTER_INFO main_mi, *active_mi; /* active_mi for multi-master */ extern LIST master_list; -extern HASH replicate_do_table, replicate_ignore_table; -extern DYNAMIC_ARRAY replicate_wild_do_table, replicate_wild_ignore_table; -extern bool do_table_inited, ignore_table_inited, - wild_do_table_inited, wild_ignore_table_inited; -extern bool table_rules_on; extern my_bool replicate_same_server_id; extern int disconnect_slave_event_count, abort_slave_event_count ; @@ -607,8 +577,6 @@ extern my_bool master_ssl; extern my_string master_ssl_ca, master_ssl_capath, master_ssl_cert, master_ssl_cipher, master_ssl_key; -extern I_List<i_string> replicate_do_db, replicate_ignore_db; -extern I_List<i_string_pair> replicate_rewrite_db; extern I_List<THD> threads; #endif diff --git a/sql/sp.cc b/sql/sp.cc index 8991cc78b5e..01b16354c39 100644 --- a/sql/sp.cc +++ b/sql/sp.cc @@ -813,7 +813,7 @@ db_show_routine_status(THD *thd, int type, const char *wild) } } - table->file->ha_index_init(0); + table->file->ha_index_init(0, 1); if ((res= table->file->index_first(table->record[0]))) { res= (res == HA_ERR_END_OF_FILE) ? 0 : SP_INTERNAL_ERROR; @@ -863,7 +863,7 @@ sp_drop_db_routines(THD *thd, char *db) goto err; ret= SP_OK; - table->file->ha_index_init(0); + table->file->ha_index_init(0, 1); if (! table->file->index_read(table->record[0], key, keylen, HA_READ_KEY_EXACT)) { diff --git a/sql/sp_head.cc b/sql/sp_head.cc index a6e88c08789..ab564cef7c1 100644 --- a/sql/sp_head.cc +++ b/sql/sp_head.cc @@ -87,11 +87,11 @@ sp_get_flags_for_command(LEX *lex) case SQLCOM_SHOW_ERRORS: case SQLCOM_SHOW_FIELDS: case SQLCOM_SHOW_GRANTS: - case SQLCOM_SHOW_INNODB_STATUS: + case SQLCOM_SHOW_ENGINE_STATUS: + case SQLCOM_SHOW_ENGINE_LOGS: + case SQLCOM_SHOW_ENGINE_MUTEX: case SQLCOM_SHOW_KEYS: - case SQLCOM_SHOW_LOGS: case SQLCOM_SHOW_MASTER_STAT: - case SQLCOM_SHOW_MUTEX_STATUS: case SQLCOM_SHOW_NEW_MASTER: case SQLCOM_SHOW_OPEN_TABLES: case SQLCOM_SHOW_PRIVILEGES: diff --git a/sql/sql_acl.cc b/sql/sql_acl.cc index a572f03b575..cb46682470f 100644 --- a/sql/sql_acl.cc +++ b/sql/sql_acl.cc @@ -27,9 +27,6 @@ #include "mysql_priv.h" #include "hash_filo.h" -#ifdef HAVE_REPLICATION -#include "sql_repl.h" //for tables_ok() -#endif #include <m_ctype.h> #include <stdarg.h> #include "sp_head.h" @@ -37,6 +34,8 @@ #ifndef NO_EMBEDDED_ACCESS_CHECKS +#define FIRST_NON_YN_FIELD 26 + class acl_entry :public hash_filo_element { public: @@ -1420,7 +1419,7 @@ bool change_password(THD *thd, const char *host, const char *user, GRANT and REVOKE are applied the slave in/exclusion rules as they are some kind of updates to the mysql.% tables. */ - if (thd->slave_thread && table_rules_on) + if (thd->slave_thread && rpl_filter->is_on()) { /* The tables must be marked "updating" so that tables_ok() takes them into @@ -1428,7 +1427,7 @@ bool change_password(THD *thd, const char *host, const char *user, */ tables.updating= 1; /* Thanks to bzero, tables.next==0 */ - if (!tables_ok(thd, &tables)) + if (!(thd->spcont || rpl_filter->tables_ok(0, &tables))) DBUG_RETURN(0); } #endif @@ -1643,7 +1642,7 @@ static bool update_user_table(THD *thd, TABLE *table, key_copy((byte *) user_key, table->record[0], table->key_info, table->key_info->key_length); - table->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS); + table->file->ha_retrieve_all_cols(); if (table->file->index_read_idx(table->record[0], 0, (byte *) user_key, table->key_info->key_length, HA_READ_KEY_EXACT)) @@ -1733,7 +1732,7 @@ static int replace_user_table(THD *thd, TABLE *table, const LEX_USER &combo, key_copy(user_key, table->record[0], table->key_info, table->key_info->key_length); - table->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS); + table->file->ha_retrieve_all_cols(); if (table->file->index_read_idx(table->record[0], 0, user_key, table->key_info->key_length, HA_READ_KEY_EXACT)) @@ -1869,7 +1868,7 @@ static int replace_user_table(THD *thd, TABLE *table, const LEX_USER &combo, We should NEVER delete from the user table, as a uses can still use mysqld even if he doesn't have any privileges in the user table! */ - table->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS); + table->file->ha_retrieve_all_cols(); if (cmp_record(table,record[1]) && (error=table->file->update_row(table->record[1],table->record[0]))) { // This should never happen @@ -1951,7 +1950,7 @@ static int replace_db_table(TABLE *table, const char *db, key_copy(user_key, table->record[0], table->key_info, table->key_info->key_length); - table->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS); + table->file->ha_retrieve_all_cols(); if (table->file->index_read_idx(table->record[0],0, user_key, table->key_info->key_length, HA_READ_KEY_EXACT)) @@ -1987,7 +1986,7 @@ static int replace_db_table(TABLE *table, const char *db, /* update old existing row */ if (rights) { - table->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS); + table->file->ha_retrieve_all_cols(); if ((error=table->file->update_row(table->record[1],table->record[0]))) goto table_error; /* purecov: deadcode */ } @@ -2166,7 +2165,7 @@ GRANT_TABLE::GRANT_TABLE(TABLE *form, TABLE *col_privs) key_copy(key, col_privs->record[0], col_privs->key_info, key_prefix_len); col_privs->field[4]->store("",0, &my_charset_latin1); - col_privs->file->ha_index_init(0); + col_privs->file->ha_index_init(0, 1); if (col_privs->file->index_read(col_privs->record[0], (byte*) key, key_prefix_len, HA_READ_KEY_EXACT)) @@ -2311,7 +2310,7 @@ static int replace_column_table(GRANT_TABLE *g_t, List_iterator <LEX_COLUMN> iter(columns); class LEX_COLUMN *column; - table->file->ha_index_init(0); + table->file->ha_index_init(0, 1); while ((column= iter++)) { ulong privileges= column->rights; @@ -2326,7 +2325,7 @@ static int replace_column_table(GRANT_TABLE *g_t, key_copy(user_key, table->record[0], table->key_info, table->key_info->key_length); - table->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS); + table->file->ha_retrieve_all_cols(); if (table->file->index_read(table->record[0], user_key, table->key_info->key_length, HA_READ_KEY_EXACT)) @@ -2404,7 +2403,7 @@ static int replace_column_table(GRANT_TABLE *g_t, key_copy(user_key, table->record[0], table->key_info, key_prefix_length); - table->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS); + table->file->ha_retrieve_all_cols(); if (table->file->index_read(table->record[0], user_key, key_prefix_length, HA_READ_KEY_EXACT)) @@ -2503,7 +2502,7 @@ static int replace_table_table(THD *thd, GRANT_TABLE *grant_table, key_copy(user_key, table->record[0], table->key_info, table->key_info->key_length); - table->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS); + table->file->ha_retrieve_all_cols(); if (table->file->index_read_idx(table->record[0], 0, user_key, table->key_info->key_length, HA_READ_KEY_EXACT)) @@ -2823,14 +2822,15 @@ bool mysql_table_grant(THD *thd, TABLE_LIST *table_list, GRANT and REVOKE are applied the slave in/exclusion rules as they are some kind of updates to the mysql.% tables. */ - if (thd->slave_thread && table_rules_on) + if (thd->slave_thread && rpl_filter->is_on()) + if (thd->slave_thread && rpl_filter->is_on()) { /* The tables must be marked "updating" so that tables_ok() takes them into account in tests. */ tables[0].updating= tables[1].updating= tables[2].updating= 1; - if (!tables_ok(thd, tables)) + if (!(thd->spcont || rpl_filter->tables_ok(0, tables))) DBUG_RETURN(FALSE); } #endif @@ -3030,14 +3030,14 @@ bool mysql_routine_grant(THD *thd, TABLE_LIST *table_list, bool is_proc, GRANT and REVOKE are applied the slave in/exclusion rules as they are some kind of updates to the mysql.% tables. */ - if (thd->slave_thread && table_rules_on) + if (thd->slave_thread && rpl_filter->is_on()) { /* The tables must be marked "updating" so that tables_ok() takes them into account in tests. */ tables[0].updating= tables[1].updating= 1; - if (!tables_ok(thd, tables)) + if (!(thd->spcont || rpl_filter->tables_ok(0, tables))) DBUG_RETURN(FALSE); } #endif @@ -3161,14 +3161,14 @@ bool mysql_grant(THD *thd, const char *db, List <LEX_USER> &list, GRANT and REVOKE are applied the slave in/exclusion rules as they are some kind of updates to the mysql.% tables. */ - if (thd->slave_thread && table_rules_on) + if (thd->slave_thread && rpl_filter->is_on()) { /* The tables must be marked "updating" so that tables_ok() takes them into account in tests. */ tables[0].updating= tables[1].updating= 1; - if (!tables_ok(thd, tables)) + if (!(thd->spcont || rpl_filter->tables_ok(0, tables))) DBUG_RETURN(FALSE); } #endif @@ -3311,8 +3311,8 @@ static my_bool grant_load(TABLE_LIST *tables) t_table = tables[0].table; c_table = tables[1].table; p_table= tables[2].table; - t_table->file->ha_index_init(0); - p_table->file->ha_index_init(0); + t_table->file->ha_index_init(0, 1); + p_table->file->ha_index_init(0, 1); if (!t_table->file->index_first(t_table->record[0])) { memex_ptr= &memex; @@ -4520,7 +4520,7 @@ int open_grant_tables(THD *thd, TABLE_LIST *tables) GRANT and REVOKE are applied the slave in/exclusion rules as they are some kind of updates to the mysql.% tables. */ - if (thd->slave_thread && table_rules_on) + if (thd->slave_thread && rpl_filter->is_on()) { /* The tables must be marked "updating" so that tables_ok() takes them into @@ -4528,7 +4528,7 @@ int open_grant_tables(THD *thd, TABLE_LIST *tables) */ tables[0].updating=tables[1].updating=tables[2].updating= tables[3].updating=tables[4].updating=1; - if (!tables_ok(thd, tables)) + if (!(thd->spcont || rpl_filter->tables_ok(0, tables))) DBUG_RETURN(1); tables[0].updating=tables[1].updating=tables[2].updating= tables[3].updating=tables[4].updating=0;; @@ -4690,7 +4690,7 @@ static int handle_grant_table(TABLE_LIST *tables, uint table_no, bool drop, user_key, key_prefix_length, HA_READ_KEY_EXACT))) { - if (error != HA_ERR_KEY_NOT_FOUND) + if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) { table->file->print_error(error, MYF(0)); result= -1; diff --git a/sql/sql_acl.h b/sql/sql_acl.h index 0e50737f84c..0a9c6ba785f 100644 --- a/sql/sql_acl.h +++ b/sql/sql_acl.h @@ -14,6 +14,8 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include "slave.h" // for tables_ok(), rpl_filter + #define SELECT_ACL (1L << 0) #define INSERT_ACL (1L << 1) #define UPDATE_ACL (1L << 2) @@ -50,7 +52,6 @@ */ #define EXTRA_ACL (1L << 29) #define NO_ACCESS (1L << 30) - #define DB_ACLS \ (UPDATE_ACL | SELECT_ACL | INSERT_ACL | DELETE_ACL | CREATE_ACL | DROP_ACL | \ GRANT_ACL | REFERENCES_ACL | INDEX_ACL | ALTER_ACL | CREATE_TMP_ACL | \ diff --git a/sql/sql_base.cc b/sql/sql_base.cc index b765f2f53e2..af08d20baaf 100644 --- a/sql/sql_base.cc +++ b/sql/sql_base.cc @@ -566,7 +566,7 @@ bool close_thread_table(THD *thd, TABLE **table_ptr) else { // Free memory and reset for next loop - table->file->reset(); + table->file->ha_reset(); } table->in_use=0; if (unused_tables) @@ -2671,15 +2671,20 @@ static void update_field_dependencies(THD *thd, Field *field, TABLE *table) { if (thd->set_query_id) { + table->file->ha_set_bit_in_rw_set(field->fieldnr, + (bool)(thd->set_query_id-1)); if (field->query_id != thd->query_id) { + if (table->get_fields_in_item_tree) + field->flags|= GET_FIXED_FIELDS_FLAG; field->query_id= thd->query_id; table->used_fields++; table->used_keys.intersect(field->part_of_key); } else thd->dupp_field= field; - } + } else if (table->get_fields_in_item_tree) + field->flags|= GET_FIXED_FIELDS_FLAG; } @@ -3159,6 +3164,42 @@ find_field_in_table_ref(THD *thd, TABLE_LIST *table_list, /* + Find field in table, no side effects, only purpose is to check for field + in table object and get reference to the field if found. + + SYNOPSIS + find_field_in_table_sef() + + table table where to find + name Name of field searched for + + RETURN + 0 field is not found + # pointer to field +*/ + +Field *find_field_in_table_sef(TABLE *table, const char *name) +{ + Field **field_ptr; + if (table->s->name_hash.records) + field_ptr= (Field**)hash_search(&table->s->name_hash,(byte*) name, + strlen(name)); + else + { + if (!(field_ptr= table->field)) + return (Field *)0; + for (; *field_ptr; ++field_ptr) + if (!my_strcasecmp(system_charset_info, (*field_ptr)->field_name, name)) + break; + } + if (field_ptr) + return *field_ptr; + else + return (Field *)0; +} + + +/* Find field in table list. SYNOPSIS @@ -3825,15 +3866,19 @@ mark_common_columns(THD *thd, TABLE_LIST *table_ref_1, TABLE_LIST *table_ref_2, if (field_1) { + TABLE *table_1= nj_col_1->table_ref->table; /* Mark field_1 used for table cache. */ field_1->query_id= thd->query_id; - nj_col_1->table_ref->table->used_keys.intersect(field_1->part_of_key); + table_1->file->ha_set_bit_in_read_set(field_1->fieldnr); + table_1->used_keys.intersect(field_1->part_of_key); } if (field_2) { + TABLE *table_2= nj_col_2->table_ref->table; /* Mark field_2 used for table cache. */ field_2->query_id= thd->query_id; - nj_col_2->table_ref->table->used_keys.intersect(field_2->part_of_key); + table_2->file->ha_set_bit_in_read_set(field_2->fieldnr); + table_2->used_keys.intersect(field_2->part_of_key); } if (using_fields != NULL) @@ -4313,11 +4358,11 @@ int setup_wild(THD *thd, TABLE_LIST *tables, List<Item> &fields, ****************************************************************************/ bool setup_fields(THD *thd, Item **ref_pointer_array, - List<Item> &fields, bool set_query_id, + List<Item> &fields, ulong set_query_id, List<Item> *sum_func_list, bool allow_sum_func) { reg2 Item *item; - bool save_set_query_id= thd->set_query_id; + ulong save_set_query_id= thd->set_query_id; List_iterator<Item> it(fields); DBUG_ENTER("setup_fields"); @@ -4699,6 +4744,7 @@ insert_fields(THD *thd, Name_resolution_context *context, const char *db_name, if (field->query_id == thd->query_id) thd->dupp_field= field; field->query_id= thd->query_id; + field->table->file->ha_set_bit_in_read_set(field->fieldnr); if (table) table->used_keys.intersect(field->part_of_key); @@ -4739,7 +4785,10 @@ insert_fields(THD *thd, Name_resolution_context *context, const char *db_name, For NATURAL joins, used_tables is updated in the IF above. */ if (table) + { table->used_fields= table->s->fields; + table->file->ha_set_all_bits_in_read_set(); + } } if (found) DBUG_RETURN(FALSE); diff --git a/sql/sql_bitmap.h b/sql/sql_bitmap.h index 0f5b6dcd35e..35c501ede56 100644 --- a/sql/sql_bitmap.h +++ b/sql/sql_bitmap.h @@ -25,7 +25,7 @@ template <uint default_width> class Bitmap { MY_BITMAP map; - uchar buffer[(default_width+7)/8]; + uint32 buffer[(default_width+31)/32]; public: Bitmap() { init(); } Bitmap(const Bitmap& from) { *this=from; } @@ -48,14 +48,14 @@ public: void intersect(ulonglong map2buff) { MY_BITMAP map2; - bitmap_init(&map2, (uchar *)&map2buff, sizeof(ulonglong)*8, 0); + bitmap_init(&map2, (uint32 *)&map2buff, sizeof(ulonglong)*8, 0); bitmap_intersect(&map, &map2); } /* Use highest bit for all bits above sizeof(ulonglong)*8. */ void intersect_extended(ulonglong map2buff) { intersect(map2buff); - if (map.bitmap_size > sizeof(ulonglong)) + if (map.n_bits > sizeof(ulonglong) * 8) bitmap_set_above(&map, sizeof(ulonglong), test(map2buff & (LL(1) << (sizeof(ulonglong) * 8 - 1)))); } @@ -70,7 +70,7 @@ public: char *print(char *buf) const { char *s=buf; - const uchar *e=buffer, *b=e+sizeof(buffer)-1; + const uchar *e=(uchar *)buffer, *b=e+sizeof(buffer)-1; while (!*b && b>e) b--; if ((*s=_dig_vec_upper[*b >> 4]) != '0') diff --git a/sql/sql_cache.cc b/sql/sql_cache.cc index 3965079988b..c150489f311 100644 --- a/sql/sql_cache.cc +++ b/sql/sql_cache.cc @@ -303,7 +303,7 @@ TODO list: #ifndef MASTER #include "../srclib/myisammrg/myrg_def.h" #else -#include "../myisammrg/myrg_def.h" +#include "../storage/myisammrg/myrg_def.h" #endif #ifdef EMBEDDED_LIBRARY diff --git a/sql/sql_cache.h b/sql/sql_cache.h index 69a0d6cd05d..b7531696150 100644 --- a/sql/sql_cache.h +++ b/sql/sql_cache.h @@ -215,6 +215,8 @@ struct Query_cache_memory_bin struct Query_cache_memory_bin_step { +public: + Query_cache_memory_bin_step() {} ulong size; ulong increment; uint idx; diff --git a/sql/sql_class.cc b/sql/sql_class.cc index a0d87b05e97..19bbf08353a 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -287,7 +287,7 @@ void THD::init(void) variables.date_format); variables.datetime_format= date_time_format_copy((THD*) 0, variables.datetime_format); -#ifdef HAVE_NDBCLUSTER_DB +#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE variables.ndb_use_transactions= 1; #endif pthread_mutex_unlock(&LOCK_global_system_variables); @@ -902,7 +902,7 @@ bool select_send::send_data(List<Item> &items) return 0; } -#ifdef HAVE_INNOBASE_DB +#ifdef WITH_INNOBASE_STORAGE_ENGINE /* We may be passing the control from mysqld to the client: release the InnoDB adaptive hash S-latch to avoid thread deadlocks if it was reserved @@ -938,7 +938,7 @@ bool select_send::send_data(List<Item> &items) bool select_send::send_eof() { -#ifdef HAVE_INNOBASE_DB +#ifdef WITH_INNOBASE_STORAGE_ENGINE /* We may be passing the control from mysqld to the client: release the InnoDB adaptive hash S-latch to avoid thread deadlocks if it was reserved by thd */ diff --git a/sql/sql_class.h b/sql/sql_class.h index 0346801b0dc..95d4f45fcff 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -409,11 +409,13 @@ public: List<key_part_spec> columns; const char *name; bool generated; + LEX_STRING *parser_name; Key(enum Keytype type_par, const char *name_arg, enum ha_key_alg alg_par, - bool generated_arg, List<key_part_spec> &cols) + bool generated_arg, List<key_part_spec> &cols, + LEX_STRING *parser_arg= 0) :type(type_par), algorithm(alg_par), columns(cols), name(name_arg), - generated(generated_arg) + generated(generated_arg), parser_name(parser_arg) {} ~Key() {} /* Equality comparison of keys (ignoring name) */ @@ -465,19 +467,20 @@ public: class i_string: public ilink { public: - char* ptr; + const char* ptr; i_string():ptr(0) { } - i_string(char* s) : ptr(s) {} + i_string(const char* s) : ptr(s) {} }; /* needed for linked list of two strings for replicate-rewrite-db */ class i_string_pair: public ilink { public: - char* key; - char* val; + const char* key; + const char* val; i_string_pair():key(0),val(0) { } - i_string_pair(char* key_arg, char* val_arg) : key(key_arg),val(val_arg) {} + i_string_pair(const char* key_arg, const char* val_arg) : + key(key_arg),val(val_arg) {} }; @@ -557,16 +560,16 @@ struct system_variables ulong sync_replication_slave_id; ulong sync_replication_timeout; #endif /* HAVE_REPLICATION */ -#ifdef HAVE_INNOBASE_DB my_bool innodb_table_locks; my_bool innodb_support_xa; -#endif /* HAVE_INNOBASE_DB */ -#ifdef HAVE_NDBCLUSTER_DB ulong ndb_autoincrement_prefetch_sz; my_bool ndb_force_send; my_bool ndb_use_exact_count; my_bool ndb_use_transactions; -#endif /* HAVE_NDBCLUSTER_DB */ + my_bool ndb_index_stat_enable; + ulong ndb_index_stat_cache_entries; + ulong ndb_index_stat_update_freq; + my_bool old_alter_table; my_bool old_passwords; /* Only charset part of these variables is sensible */ @@ -778,8 +781,15 @@ public: /* - if set_query_id=1, we set field->query_id for all fields. In that case field list can not contain duplicates. + 0: Means query_id is not set and no indicator to handler of fields used + is set + 1: Means query_id is set for fields in list and bit in read set is set + to inform handler of that field is to be read + 2: Means query is set for fields in list and bit is set in update set + to inform handler that it needs to update this field in write_row + and update_row */ - bool set_query_id; + ulong set_query_id; /* This variable is used in post-parse stage to declare that sum-functions, or functions which have sense only if GROUP BY is present, are allowed. diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc index 983f0931ef9..816aba25218 100644 --- a/sql/sql_delete.cc +++ b/sql/sql_delete.cc @@ -30,7 +30,8 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, SQL_LIST *order, ha_rows limit, ulonglong options, bool reset_auto_increment) { - int error; + bool will_batch; + int error, loc_error; TABLE *table; SQL_SELECT *select=0; READ_RECORD info; @@ -194,6 +195,7 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, deleted=0L; init_ftfuncs(thd, select_lex, 1); thd->proc_info="updating"; + will_batch= !table->file->start_bulk_delete(); while (!(error=info.read_record(&info)) && !thd->killed && !thd->net.report_error) { @@ -209,7 +211,7 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, break; } - if (!(error=table->file->delete_row(table->record[0]))) + if (!(error= table->file->delete_row(table->record[0]))) { deleted++; if (table->triggers && @@ -245,7 +247,13 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, } if (thd->killed && !error) error= 1; // Aborted - thd->proc_info="end"; + if (will_batch && (loc_error= table->file->end_bulk_delete())) + { + if (error != 1) + table->file->print_error(loc_error,MYF(0)); + error=1; + } + thd->proc_info= "end"; end_read_record(&info); free_io_cache(table); // Will not do any harm if (options & OPTION_QUICK) @@ -658,7 +666,8 @@ void multi_delete::send_error(uint errcode,const char *err) int multi_delete::do_deletes() { - int local_error= 0, counter= 0; + int local_error= 0, counter= 0, error; + bool will_batch; DBUG_ENTER("do_deletes"); DBUG_ASSERT(do_delete); @@ -686,6 +695,7 @@ int multi_delete::do_deletes() been deleted by foreign key handling */ info.ignore_not_found_rows= 1; + will_batch= !table->file->start_bulk_delete(); while (!(local_error=info.read_record(&info)) && !thd->killed) { if (table->triggers && @@ -709,6 +719,14 @@ int multi_delete::do_deletes() break; } } + if (will_batch && (error= table->file->end_bulk_delete())) + { + if (!local_error) + { + local_error= error; + table->file->print_error(local_error,MYF(0)); + } + } end_read_record(&info); if (thd->killed && !local_error) local_error= 1; diff --git a/sql/sql_handler.cc b/sql/sql_handler.cc index da72d283259..e9c695419d8 100644 --- a/sql/sql_handler.cc +++ b/sql/sql_handler.cc @@ -485,7 +485,7 @@ bool mysql_ha_read(THD *thd, TABLE_LIST *tables, if (keyname) { table->file->ha_index_or_rnd_end(); - table->file->ha_index_init(keyno); + table->file->ha_index_init(keyno, 1); error= table->file->index_first(table->record[0]); } else @@ -507,7 +507,7 @@ bool mysql_ha_read(THD *thd, TABLE_LIST *tables, case RLAST: DBUG_ASSERT(keyname != 0); table->file->ha_index_or_rnd_end(); - table->file->ha_index_init(keyno); + table->file->ha_index_init(keyno, 1); error= table->file->index_last(table->record[0]); mode=RPREV; break; @@ -546,7 +546,7 @@ bool mysql_ha_read(THD *thd, TABLE_LIST *tables, if (!(key= (byte*) thd->calloc(ALIGN_SIZE(key_len)))) goto err; table->file->ha_index_or_rnd_end(); - table->file->ha_index_init(keyno); + table->file->ha_index_init(keyno, 1); key_copy(key, table->record[0], table->key_info + keyno, key_len); error= table->file->index_read(table->record[0], key,key_len,ha_rkey_mode); diff --git a/sql/sql_help.cc b/sql/sql_help.cc index b47412981ea..0715aeeaa5b 100644 --- a/sql/sql_help.cc +++ b/sql/sql_help.cc @@ -286,8 +286,8 @@ int get_topics_for_keyword(THD *thd, TABLE *topics, TABLE *relations, rtopic_id= find_fields[help_relation_help_topic_id].field; rkey_id= find_fields[help_relation_help_keyword_id].field; - topics->file->ha_index_init(iindex_topic); - relations->file->ha_index_init(iindex_relations); + topics->file->ha_index_init(iindex_topic,1); + relations->file->ha_index_init(iindex_relations,1); rkey_id->store((longlong) key_id, TRUE); rkey_id->get_key_image(buff, rkey_id->pack_length(), Field::itRAW); diff --git a/sql/sql_insert.cc b/sql/sql_insert.cc index 088b55c2fd8..8877cc17bf0 100644 --- a/sql/sql_insert.cc +++ b/sql/sql_insert.cc @@ -103,6 +103,11 @@ static int check_insert_fields(THD *thd, TABLE_LIST *table_list, #endif clear_timestamp_auto_bits(table->timestamp_field_type, TIMESTAMP_AUTO_SET_ON_INSERT); + /* + No fields are provided so all fields must be provided in the values. + Thus we set all bits in the write set. + */ + table->file->ha_set_all_bits_in_write_set(); } else { // Part field list @@ -140,7 +145,11 @@ static int check_insert_fields(THD *thd, TABLE_LIST *table_list, */ table_list->next_local= 0; context->resolve_in_table_list_only(table_list); - res= setup_fields(thd, 0, fields, 1, 0, 0); + /* + Indicate fields in list is to be updated by setting set_query_id + parameter to 2. This sets the bit in the write_set for each field. + */ + res= setup_fields(thd, 0, fields, 2, 0, 0); /* Restore the current context. */ table_list->next_local= save_next_local; @@ -239,9 +248,10 @@ static int check_update_fields(THD *thd, TABLE_LIST *insert_table_list, /* Check the fields we are going to modify. This will set the query_id - of all used fields to the threads query_id. + of all used fields to the threads query_id. It will also set all + fields into the write set of this table. */ - if (setup_fields(thd, 0, update_fields, 1, 0, 0)) + if (setup_fields(thd, 0, update_fields, 2, 0, 0)) return -1; if (table->timestamp_field) @@ -251,7 +261,10 @@ static int check_update_fields(THD *thd, TABLE_LIST *insert_table_list, clear_timestamp_auto_bits(table->timestamp_field_type, TIMESTAMP_AUTO_SET_ON_UPDATE); else + { table->timestamp_field->query_id= timestamp_query_id; + table->file->ha_set_bit_in_write_set(table->timestamp_field->fieldnr); + } } return 0; @@ -688,7 +701,7 @@ static bool check_view_insertability(THD * thd, TABLE_LIST *view) Field_translator *trans; Field **field_ptr= table->field; uint used_fields_buff_size= (table->s->fields + 7) / 8; - uchar *used_fields_buff= (uchar*)thd->alloc(used_fields_buff_size); + uint32 *used_fields_buff= (uint32*)thd->alloc(used_fields_buff_size); MY_BITMAP used_fields; DBUG_ENTER("check_key_in_view"); @@ -928,7 +941,7 @@ bool mysql_prepare_insert(THD *thd, TABLE_LIST *table_list, select_lex->first_execution= 0; } if (duplic == DUP_UPDATE || duplic == DUP_REPLACE) - table->file->extra(HA_EXTRA_RETRIEVE_PRIMARY_KEY); + table->file->ha_retrieve_all_pk(); DBUG_RETURN(FALSE); } @@ -2303,7 +2316,7 @@ select_insert::~select_insert() if (table) { table->next_number_field=0; - table->file->reset(); + table->file->ha_reset(); } thd->count_cuted_fields= CHECK_FIELD_IGNORE; thd->abort_on_warning= 0; diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc index ac2d7e82fcf..c19efff2d85 100644 --- a/sql/sql_lex.cc +++ b/sql/sql_lex.cc @@ -28,7 +28,8 @@ We are using pointer to this variable for distinguishing between assignment to NEW row field (when parsing trigger definition) and structured variable. */ -sys_var_long_ptr trg_new_row_fake_var(0, 0); + +sys_var *trg_new_row_fake_var= (sys_var*) 0x01; /* Macros to look like lex */ @@ -159,6 +160,7 @@ void lex_start(THD *thd, uchar *buf,uint length) lex->yylineno = 1; lex->in_comment=0; lex->length=0; + lex->part_info= 0; lex->select_lex.in_sum_expr=0; lex->select_lex.expr_list.empty(); lex->select_lex.ftfunc_list_alloc.empty(); diff --git a/sql/sql_lex.h b/sql/sql_lex.h index 372bbc5576b..5126d0ee463 100644 --- a/sql/sql_lex.h +++ b/sql/sql_lex.h @@ -25,6 +25,7 @@ class sp_head; class sp_name; class sp_instr; class sp_pcontext; +class partition_info; /* The following hack is needed because mysql_yacc.cc does not define @@ -52,8 +53,8 @@ enum enum_sql_command { SQLCOM_DELETE, SQLCOM_TRUNCATE, SQLCOM_DROP_TABLE, SQLCOM_DROP_INDEX, SQLCOM_SHOW_DATABASES, SQLCOM_SHOW_TABLES, SQLCOM_SHOW_FIELDS, - SQLCOM_SHOW_KEYS, SQLCOM_SHOW_VARIABLES, SQLCOM_SHOW_LOGS, SQLCOM_SHOW_STATUS, - SQLCOM_SHOW_INNODB_STATUS, SQLCOM_SHOW_NDBCLUSTER_STATUS, SQLCOM_SHOW_MUTEX_STATUS, + SQLCOM_SHOW_KEYS, SQLCOM_SHOW_VARIABLES, SQLCOM_SHOW_STATUS, + SQLCOM_SHOW_ENGINE_LOGS, SQLCOM_SHOW_ENGINE_STATUS, SQLCOM_SHOW_ENGINE_MUTEX, SQLCOM_SHOW_PROCESSLIST, SQLCOM_SHOW_MASTER_STAT, SQLCOM_SHOW_SLAVE_STAT, SQLCOM_SHOW_GRANTS, SQLCOM_SHOW_CREATE, SQLCOM_SHOW_CHARSETS, SQLCOM_SHOW_COLLATIONS, SQLCOM_SHOW_CREATE_DB, SQLCOM_SHOW_TABLE_STATUS, @@ -91,6 +92,8 @@ enum enum_sql_command { SQLCOM_XA_START, SQLCOM_XA_END, SQLCOM_XA_PREPARE, SQLCOM_XA_COMMIT, SQLCOM_XA_ROLLBACK, SQLCOM_XA_RECOVER, SQLCOM_SHOW_PROC_CODE, SQLCOM_SHOW_FUNC_CODE, + SQLCOM_INSTALL_PLUGIN, SQLCOM_UNINSTALL_PLUGIN, + SQLCOM_SHOW_AUTHORS, /* This should be the last !!! */ SQLCOM_END @@ -660,6 +663,11 @@ typedef class st_select_lex SELECT_LEX; #define ALTER_KEYS_ONOFF 512 #define ALTER_CONVERT 1024 #define ALTER_FORCE 2048 +#define ALTER_RECREATE 4096 +#define ALTER_ADD_PARTITION 8192 +#define ALTER_DROP_PARTITION 16384 +#define ALTER_COALESCE_PARTITION 32768 +#define ALTER_REORGANISE_PARTITION 65536 typedef struct st_alter_info { @@ -668,9 +676,17 @@ typedef struct st_alter_info uint flags; enum enum_enable_or_disable keys_onoff; enum tablespace_op_type tablespace_op; + List<char> partition_names; + uint no_parts; st_alter_info(){clear();} - void clear(){keys_onoff= LEAVE_AS_IS;tablespace_op= NO_TABLESPACE_OP;} + void clear() + { + keys_onoff= LEAVE_AS_IS; + tablespace_op= NO_TABLESPACE_OP; + no_parts= 0; + partition_names.empty(); + } void reset(){drop_list.empty();alter_list.empty();clear();} } ALTER_INFO; @@ -689,7 +705,7 @@ struct st_trg_chistics enum trg_event_type event; }; -extern sys_var_long_ptr trg_new_row_fake_var; +extern sys_var *trg_new_row_fake_var; enum xa_option_words {XA_NONE, XA_JOIN, XA_RESUME, XA_ONE_PHASE, XA_SUSPEND, XA_FOR_MIGRATE}; @@ -740,6 +756,8 @@ typedef struct st_lex TABLE_LIST *leaf_tables_insert; char *create_view_start; char *create_view_select_start; + /* Partition info structure filled in by PARTITION BY parse part */ + partition_info *part_info; /* The definer of the object being created (view, trigger, stored routine). diff --git a/sql/sql_load.cc b/sql/sql_load.cc index 2a0145af9c7..911e0c38fc5 100644 --- a/sql/sql_load.cc +++ b/sql/sql_load.cc @@ -176,7 +176,7 @@ bool mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list, The main thing to fix to remove this restriction is to ensure that the table is marked to be 'used for insert' in which case we should never - mark this table as as 'const table' (ie, one that has only one row). + mark this table as 'const table' (ie, one that has only one row). */ if (unique_table(table_list, table_list->next_global)) { @@ -192,6 +192,10 @@ bool mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list, Field **field; for (field=table->field; *field ; field++) fields_vars.push_back(new Item_field(*field)); + /* + Since all fields are set we set all bits in the write set + */ + table->file->ha_set_all_bits_in_write_set(); table->timestamp_field_type= TIMESTAMP_NO_AUTO_SET; /* Let us also prepare SET clause, altough it is probably empty @@ -204,8 +208,15 @@ bool mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list, else { // Part field list /* TODO: use this conds for 'WITH CHECK OPTIONS' */ - if (setup_fields(thd, 0, fields_vars, 1, 0, 0) || - setup_fields(thd, 0, set_fields, 1, 0, 0) || + /* + Indicate that both variables in field list and fields in update_list + is to be included in write set of table. We do however set all bits + in write set anyways since it is not allowed to specify NULLs in + LOAD DATA + */ + table->file->ha_set_all_bits_in_write_set(); + if (setup_fields(thd, 0, fields_vars, 2, 0, 0) || + setup_fields(thd, 0, set_fields, 2, 0, 0) || check_that_all_fields_are_given_values(thd, table, table_list)) DBUG_RETURN(TRUE); /* diff --git a/sql/sql_manager.cc b/sql/sql_manager.cc index 1d3acd1696c..f580bcb16d9 100644 --- a/sql/sql_manager.cc +++ b/sql/sql_manager.cc @@ -32,12 +32,43 @@ pthread_t manager_thread; pthread_mutex_t LOCK_manager; pthread_cond_t COND_manager; +struct handler_cb { + struct handler_cb *next; + void (*action)(void); +}; + +static struct handler_cb * volatile cb_list; + +bool mysql_manager_submit(void (*action)()) +{ + bool result= FALSE; + struct handler_cb * volatile *cb; + pthread_mutex_lock(&LOCK_manager); + cb= &cb_list; + while (*cb && (*cb)->action != action) + cb= &(*cb)->next; + if (!*cb) + { + *cb= (struct handler_cb *)my_malloc(sizeof(struct handler_cb), MYF(MY_WME)); + if (!*cb) + result= TRUE; + else + { + (*cb)->next= NULL; + (*cb)->action= action; + } + } + pthread_mutex_unlock(&LOCK_manager); + return result; +} + pthread_handler_t handle_manager(void *arg __attribute__((unused))) { int error = 0; ulong status; struct timespec abstime; bool reset_flush_time = TRUE; + struct handler_cb *cb= NULL; my_thread_init(); DBUG_ENTER("handle_manager"); @@ -68,6 +99,11 @@ pthread_handler_t handle_manager(void *arg __attribute__((unused))) } status = manager_status; manager_status = 0; + if (cb == NULL) + { + cb= cb_list; + cb_list= NULL; + } pthread_mutex_unlock(&LOCK_manager); if (abort_loop) @@ -80,13 +116,13 @@ pthread_handler_t handle_manager(void *arg __attribute__((unused))) reset_flush_time = TRUE; } -#ifdef HAVE_BERKELEY_DB - if (status & MANAGER_BERKELEY_LOG_CLEANUP) + while (cb) { - berkeley_cleanup_log_files(); - status &= ~MANAGER_BERKELEY_LOG_CLEANUP; + struct handler_cb *next= cb->next; + cb->action(); + my_free((gptr)cb, MYF(0)); + cb= next; } -#endif if (status) DBUG_PRINT("error", ("manager did not handle something: %lx", status)); diff --git a/sql/sql_manager.h b/sql/sql_manager.h index 35704705820..d42deb8ff81 100644 --- a/sql/sql_manager.h +++ b/sql/sql_manager.h @@ -14,6 +14,6 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#ifdef HAVE_BERKELEY_DB +#ifdef WITH_BERKELEY_STORAGE_ENGINE void berkeley_cleanup_log_files(void); -#endif /* HAVE_BERKELEY_DB */ +#endif /* WITH_BERKELEY_STORAGE_ENGINE */ diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index d06cceba77b..74e4b094bbe 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -16,19 +16,12 @@ #include "mysql_priv.h" #include "sql_repl.h" +#include "rpl_filter.h" #include "repl_failsafe.h" #include <m_ctype.h> #include <myisam.h> #include <my_dir.h> -#ifdef HAVE_INNOBASE_DB -#include "ha_innodb.h" -#endif - -#ifdef HAVE_NDBCLUSTER_DB -#include "ha_ndbcluster.h" -#endif - #include "sp_head.h" #include "sp.h" #include "sp_cache.h" @@ -189,7 +182,8 @@ static bool begin_trans(THD *thd) */ inline bool all_tables_not_ok(THD *thd, TABLE_LIST *tables) { - return table_rules_on && tables && !tables_ok(thd,tables); + return rpl_filter->is_on() && tables && !thd->spcont && + !rpl_filter->tables_ok(thd->db, tables); } #endif @@ -1777,8 +1771,9 @@ bool dispatch_command(enum enum_server_command command, THD *thd, TABLE_LIST table_list; LEX_STRING conv_name; /* Saved variable value */ - my_bool old_innodb_table_locks= - IF_INNOBASE_DB(thd->variables.innodb_table_locks, FALSE); + my_bool old_innodb_table_locks= thd->variables.innodb_table_locks; + + /* used as fields initializator */ lex_start(thd, 0, 0); @@ -2687,29 +2682,20 @@ mysql_execute_command(THD *thd) res = load_master_data(thd); break; #endif /* HAVE_REPLICATION */ -#ifdef HAVE_NDBCLUSTER_DB - case SQLCOM_SHOW_NDBCLUSTER_STATUS: - { - res = ndbcluster_show_status(thd); - break; - } -#endif -#ifdef HAVE_INNOBASE_DB - case SQLCOM_SHOW_INNODB_STATUS: + case SQLCOM_SHOW_ENGINE_STATUS: { if (check_global_access(thd, SUPER_ACL)) - goto error; - res = innodb_show_status(thd); + goto error; + res = ha_show_status(thd, lex->create_info.db_type, HA_ENGINE_STATUS); break; } - case SQLCOM_SHOW_MUTEX_STATUS: + case SQLCOM_SHOW_ENGINE_MUTEX: { if (check_global_access(thd, SUPER_ACL)) goto error; - res = innodb_mutex_show_status(thd); + res = ha_show_status(thd, lex->create_info.db_type, HA_ENGINE_MUTEX); break; } -#endif #ifdef HAVE_REPLICATION case SQLCOM_LOAD_MASTER_TABLE: { @@ -3438,13 +3424,16 @@ end_with_restore_list: case SQLCOM_SHOW_STORAGE_ENGINES: res= mysqld_show_storage_engines(thd); break; + case SQLCOM_SHOW_AUTHORS: + res= mysqld_show_authors(thd); + break; case SQLCOM_SHOW_PRIVILEGES: res= mysqld_show_privileges(thd); break; case SQLCOM_SHOW_COLUMN_TYPES: res= mysqld_show_column_types(thd); break; - case SQLCOM_SHOW_LOGS: + case SQLCOM_SHOW_ENGINE_LOGS: #ifdef DONT_ALLOW_SHOW_COMMANDS my_message(ER_NOT_ALLOWED_COMMAND, ER(ER_NOT_ALLOWED_COMMAND), MYF(0)); /* purecov: inspected */ @@ -3453,7 +3442,7 @@ end_with_restore_list: { if (grant_option && check_access(thd, FILE_ACL, any_db,0,0,0,0)) goto error; - res= mysqld_show_logs(thd); + res= ha_show_status(thd, lex->create_info.db_type, HA_ENGINE_LOGS); break; } #endif @@ -3572,9 +3561,9 @@ end_with_restore_list: above was not called. So we have to check rules again here. */ #ifdef HAVE_REPLICATION - if (thd->slave_thread && - (!db_ok(lex->name, replicate_do_db, replicate_ignore_db) || - !db_ok_with_wild_table(lex->name))) + if (thd->slave_thread && + (!rpl_filter->db_ok(lex->name) || + !rpl_filter->db_ok_with_wild_table(lex->name))) { my_message(ER_SLAVE_IGNORED_TABLE, ER(ER_SLAVE_IGNORED_TABLE), MYF(0)); break; @@ -3607,8 +3596,8 @@ end_with_restore_list: */ #ifdef HAVE_REPLICATION if (thd->slave_thread && - (!db_ok(lex->name, replicate_do_db, replicate_ignore_db) || - !db_ok_with_wild_table(lex->name))) + (!rpl_filter->db_ok(lex->name) || + !rpl_filter->db_ok_with_wild_table(lex->name))) { my_message(ER_SLAVE_IGNORED_TABLE, ER(ER_SLAVE_IGNORED_TABLE), MYF(0)); break; @@ -3647,8 +3636,8 @@ end_with_restore_list: */ #ifdef HAVE_REPLICATION if (thd->slave_thread && - (!db_ok(db, replicate_do_db, replicate_ignore_db) || - !db_ok_with_wild_table(db))) + (!rpl_filter->db_ok(lex->name) || + !rpl_filter->db_ok_with_wild_table(lex->name))) { my_message(ER_SLAVE_IGNORED_TABLE, ER(ER_SLAVE_IGNORED_TABLE), MYF(0)); break; @@ -4821,6 +4810,15 @@ end_with_restore_list: case SQLCOM_XA_RECOVER: res= mysql_xa_recover(thd); break; + case SQLCOM_INSTALL_PLUGIN: + if (! (res= mysql_install_plugin(thd, &thd->lex->comment, + &thd->lex->ident))) + send_ok(thd); + break; + case SQLCOM_UNINSTALL_PLUGIN: + if (! (res= mysql_uninstall_plugin(thd, &thd->lex->comment))) + send_ok(thd); + break; default: DBUG_ASSERT(0); /* Impossible */ send_ok(thd); diff --git a/sql/sql_partition.cc b/sql/sql_partition.cc new file mode 100644 index 00000000000..202c747cf8c --- /dev/null +++ b/sql/sql_partition.cc @@ -0,0 +1,3228 @@ +/* Copyright (C) 2005 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + This file was introduced as a container for general functionality related + to partitioning introduced in MySQL version 5.1. It contains functionality + used by all handlers that support partitioning, which in the first version + is the partitioning handler itself and the NDB handler. + + The first version was written by Mikael Ronstrom. + + This version supports RANGE partitioning, LIST partitioning, HASH + partitioning and composite partitioning (hereafter called subpartitioning) + where each RANGE/LIST partitioning is HASH partitioned. The hash function + can either be supplied by the user or by only a list of fields (also + called KEY partitioning, where the MySQL server will use an internal + hash function. + There are quite a few defaults that can be used as well. +*/ + +/* Some general useful functions */ + +#include "mysql_priv.h" +#include <errno.h> +#include <m_ctype.h> +#include "md5.h" + +#ifdef WITH_PARTITION_STORAGE_ENGINE +/* + Partition related functions declarations and some static constants; +*/ +static const char *hash_str= "HASH"; +static const char *range_str= "RANGE"; +static const char *list_str= "LIST"; +static const char *part_str= "PARTITION"; +static const char *sub_str= "SUB"; +static const char *by_str= "BY"; +static const char *key_str= "KEY"; +static const char *space_str= " "; +static const char *equal_str= "="; +static const char *end_paren_str= ")"; +static const char *begin_paren_str= "("; +static const char *comma_str= ","; +static char buff[22]; + +bool get_partition_id_list(partition_info *part_info, + uint32 *part_id); +bool get_partition_id_range(partition_info *part_info, + uint32 *part_id); +bool get_partition_id_hash_nosub(partition_info *part_info, + uint32 *part_id); +bool get_partition_id_key_nosub(partition_info *part_info, + uint32 *part_id); +bool get_partition_id_linear_hash_nosub(partition_info *part_info, + uint32 *part_id); +bool get_partition_id_linear_key_nosub(partition_info *part_info, + uint32 *part_id); +bool get_partition_id_range_sub_hash(partition_info *part_info, + uint32 *part_id); +bool get_partition_id_range_sub_key(partition_info *part_info, + uint32 *part_id); +bool get_partition_id_range_sub_linear_hash(partition_info *part_info, + uint32 *part_id); +bool get_partition_id_range_sub_linear_key(partition_info *part_info, + uint32 *part_id); +bool get_partition_id_list_sub_hash(partition_info *part_info, + uint32 *part_id); +bool get_partition_id_list_sub_key(partition_info *part_info, + uint32 *part_id); +bool get_partition_id_list_sub_linear_hash(partition_info *part_info, + uint32 *part_id); +bool get_partition_id_list_sub_linear_key(partition_info *part_info, + uint32 *part_id); +uint32 get_partition_id_hash_sub(partition_info *part_info); +uint32 get_partition_id_key_sub(partition_info *part_info); +uint32 get_partition_id_linear_hash_sub(partition_info *part_info); +uint32 get_partition_id_linear_key_sub(partition_info *part_info); +#endif + + +/* + A routine used by the parser to decide whether we are specifying a full + partitioning or if only partitions to add or to split. + SYNOPSIS + is_partition_management() + lex Reference to the lex object + RETURN VALUE + TRUE Yes, it is part of a management partition command + FALSE No, not a management partition command + DESCRIPTION + This needs to be outside of WITH_PARTITION_STORAGE_ENGINE since it is + used from the sql parser that doesn't have any #ifdef's +*/ + +my_bool is_partition_management(LEX *lex) +{ + return (lex->sql_command == SQLCOM_ALTER_TABLE && + (lex->alter_info.flags == ALTER_ADD_PARTITION || + lex->alter_info.flags == ALTER_REORGANISE_PARTITION)); +} + +#ifdef WITH_PARTITION_STORAGE_ENGINE +/* + A support function to check if a partition name is in a list of strings + SYNOPSIS + is_partition_in_list() + part_name String searched for + list_part_names A list of names searched in + RETURN VALUES + TRUE String found + FALSE String not found +*/ + +bool is_partition_in_list(char *part_name, + List<char> list_part_names) +{ + List_iterator<char> part_names_it(list_part_names); + uint no_names= list_part_names.elements; + uint i= 0; + do + { + char *list_name= part_names_it++; + if (!(my_strcasecmp(system_charset_info, part_name, list_name))) + return TRUE; + } while (++i < no_names); + return FALSE; +} + + +/* + A support function to check partition names for duplication in a + partitioned table + SYNOPSIS + is_partitions_in_table() + new_part_info New partition info + old_part_info Old partition info + RETURN VALUES + TRUE Duplicate names found + FALSE Duplicate names not found + DESCRIPTION + Can handle that the new and old parts are the same in which case it + checks that the list of names in the partitions doesn't contain any + duplicated names. +*/ + +bool is_partitions_in_table(partition_info *new_part_info, + partition_info *old_part_info) +{ + uint no_new_parts= new_part_info->partitions.elements, new_count; + uint no_old_parts= old_part_info->partitions.elements, old_count; + List_iterator<partition_element> new_parts_it(new_part_info->partitions); + bool same_part_info= (new_part_info == old_part_info); + DBUG_ENTER("is_partitions_in_table"); + + new_count= 0; + do + { + List_iterator<partition_element> old_parts_it(old_part_info->partitions); + char *new_name= (new_parts_it++)->partition_name; + new_count++; + old_count= 0; + do + { + char *old_name= (old_parts_it++)->partition_name; + old_count++; + if (same_part_info && old_count == new_count) + break; + if (!(my_strcasecmp(system_charset_info, old_name, new_name))) + { + DBUG_RETURN(TRUE); + } + } while (old_count < no_old_parts); + } while (new_count < no_new_parts); + DBUG_RETURN(FALSE); +} + + +/* + A useful routine used by update_row for partition handlers to calculate + the partition ids of the old and the new record. + SYNOPSIS + get_part_for_update() + old_data Buffer of old record + new_data Buffer of new record + rec0 Reference to table->record[0] + part_info Reference to partition information + part_field_array A NULL-terminated array of fields for partition + function + old_part_id The returned partition id of old record + new_part_id The returned partition id of new record + RETURN VALUE + 0 Success + > 0 Error code + DESCRIPTION + Dependent on whether buf is not record[0] we need to prepare the + fields. Then we call the function pointer get_partition_id to + calculate the partition ids. +*/ + +int get_parts_for_update(const byte *old_data, byte *new_data, + const byte *rec0, partition_info *part_info, + uint32 *old_part_id, uint32 *new_part_id) +{ + Field **part_field_array= part_info->full_part_field_array; + int error; + DBUG_ENTER("get_parts_for_update"); + DBUG_ASSERT(new_data == rec0); + + set_field_ptr(part_field_array, old_data, rec0); + error= part_info->get_partition_id(part_info, old_part_id); + set_field_ptr(part_field_array, rec0, old_data); + if (unlikely(error)) // Should never happen + { + DBUG_ASSERT(0); + DBUG_RETURN(error); + } +#ifdef NOT_NEEDED + if (new_data == rec0) +#endif + { + if (unlikely(error= part_info->get_partition_id(part_info,new_part_id))) + { + DBUG_RETURN(error); + } + } +#ifdef NOT_NEEDED + else + { + /* + This branch should never execute but it is written anyways for + future use. It will be tested by ensuring that the above + condition is false in one test situation before pushing the code. + */ + set_field_ptr(part_field_array, new_data, rec0); + error= part_info->get_partition_id(part_info, new_part_id); + set_field_ptr(part_field_array, rec0, new_data); + if (unlikely(error)) + { + DBUG_RETURN(error); + } + } +#endif + DBUG_RETURN(0); +} + + +/* + A useful routine used by delete_row for partition handlers to calculate + the partition id. + SYNOPSIS + get_part_for_delete() + buf Buffer of old record + rec0 Reference to table->record[0] + part_info Reference to partition information + part_field_array A NULL-terminated array of fields for partition + function + part_id The returned partition id to delete from + RETURN VALUE + 0 Success + > 0 Error code + DESCRIPTION + Dependent on whether buf is not record[0] we need to prepare the + fields. Then we call the function pointer get_partition_id to + calculate the partition id. +*/ + +int get_part_for_delete(const byte *buf, const byte *rec0, + partition_info *part_info, uint32 *part_id) +{ + int error; + DBUG_ENTER("get_part_for_delete"); + + if (likely(buf == rec0)) + { + if (unlikely((error= part_info->get_partition_id(part_info, part_id)))) + { + DBUG_RETURN(error); + } + DBUG_PRINT("info", ("Delete from partition %d", *part_id)); + } + else + { + Field **part_field_array= part_info->full_part_field_array; + set_field_ptr(part_field_array, buf, rec0); + error= part_info->get_partition_id(part_info, part_id); + set_field_ptr(part_field_array, rec0, buf); + if (unlikely(error)) + { + DBUG_RETURN(error); + } + DBUG_PRINT("info", ("Delete from partition %d (path2)", *part_id)); + } + DBUG_RETURN(0); +} + + +/* + This routine allocates an array for all range constants to achieve a fast + check what partition a certain value belongs to. At the same time it does + also check that the range constants are defined in increasing order and + that the expressions are constant integer expressions. + SYNOPSIS + check_range_constants() + part_info + RETURN VALUE + TRUE An error occurred during creation of range constants + FALSE Successful creation of range constant mapping + DESCRIPTION + This routine is called from check_partition_info to get a quick error + before we came too far into the CREATE TABLE process. It is also called + from fix_partition_func every time we open the .frm file. It is only + called for RANGE PARTITIONed tables. +*/ + +static bool check_range_constants(partition_info *part_info) +{ + partition_element* part_def; + longlong current_largest_int= LONGLONG_MIN, part_range_value_int; + uint no_parts= part_info->no_parts, i; + List_iterator<partition_element> it(part_info->partitions); + bool result= TRUE; + DBUG_ENTER("check_range_constants"); + DBUG_PRINT("enter", ("INT_RESULT with %d parts", no_parts)); + + part_info->part_result_type= INT_RESULT; + part_info->range_int_array= + (longlong*)sql_alloc(no_parts * sizeof(longlong)); + if (unlikely(part_info->range_int_array == NULL)) + { + my_error(ER_OUTOFMEMORY, MYF(0), no_parts*sizeof(longlong)); + goto end; + } + i= 0; + do + { + part_def= it++; + if ((i != (no_parts - 1)) || !part_info->defined_max_value) + part_range_value_int= part_def->range_value; + else + part_range_value_int= LONGLONG_MAX; + if (likely(current_largest_int < part_range_value_int)) + { + current_largest_int= part_range_value_int; + part_info->range_int_array[i]= part_range_value_int; + } + else + { + my_error(ER_RANGE_NOT_INCREASING_ERROR, MYF(0)); + goto end; + } + } while (++i < no_parts); + result= FALSE; +end: + DBUG_RETURN(result); +} + + +/* + A support routine for check_list_constants used by qsort to sort the + constant list expressions. + SYNOPSIS + list_part_cmp() + a First list constant to compare with + b Second list constant to compare with + RETURN VALUE + +1 a > b + 0 a == b + -1 a < b +*/ + +static int list_part_cmp(const void* a, const void* b) +{ + longlong a1, b1; + a1= ((LIST_PART_ENTRY*)a)->list_value; + b1= ((LIST_PART_ENTRY*)b)->list_value; + if (a1 < b1) + return -1; + else if (a1 > b1) + return +1; + else + return 0; +} + + +/* + This routine allocates an array for all list constants to achieve a fast + check what partition a certain value belongs to. At the same time it does + also check that there are no duplicates among the list constants and that + that the list expressions are constant integer expressions. + SYNOPSIS + check_list_constants() + part_info + RETURN VALUE + TRUE An error occurred during creation of list constants + FALSE Successful creation of list constant mapping + DESCRIPTION + This routine is called from check_partition_info to get a quick error + before we came too far into the CREATE TABLE process. It is also called + from fix_partition_func every time we open the .frm file. It is only + called for LIST PARTITIONed tables. +*/ + +static bool check_list_constants(partition_info *part_info) +{ + uint i, no_list_values= 0, no_parts, list_index= 0; + longlong *list_value; + bool not_first, result= TRUE; + longlong curr_value, prev_value; + partition_element* part_def; + List_iterator<partition_element> list_func_it(part_info->partitions); + DBUG_ENTER("check_list_constants"); + + part_info->part_result_type= INT_RESULT; + + /* + We begin by calculating the number of list values that have been + defined in the first step. + + We use this number to allocate a properly sized array of structs + to keep the partition id and the value to use in that partition. + In the second traversal we assign them values in the struct array. + + Finally we sort the array of structs in order of values to enable + a quick binary search for the proper value to discover the + partition id. + After sorting the array we check that there are no duplicates in the + list. + */ + + no_parts= part_info->no_parts; + i= 0; + do + { + part_def= list_func_it++; + List_iterator<longlong> list_val_it1(part_def->list_val_list); + while (list_val_it1++) + no_list_values++; + } while (++i < no_parts); + list_func_it.rewind(); + part_info->no_list_values= no_list_values; + part_info->list_array= + (LIST_PART_ENTRY*)sql_alloc(no_list_values*sizeof(LIST_PART_ENTRY)); + if (unlikely(part_info->list_array == NULL)) + { + my_error(ER_OUTOFMEMORY, MYF(0), no_list_values*sizeof(LIST_PART_ENTRY)); + goto end; + } + + i= 0; + do + { + part_def= list_func_it++; + List_iterator<longlong> list_val_it2(part_def->list_val_list); + while ((list_value= list_val_it2++)) + { + part_info->list_array[list_index].list_value= *list_value; + part_info->list_array[list_index++].partition_id= i; + } + } while (++i < no_parts); + + qsort((void*)part_info->list_array, no_list_values, + sizeof(LIST_PART_ENTRY), &list_part_cmp); + + not_first= FALSE; + i= prev_value= 0; //prev_value initialised to quiet compiler + do + { + curr_value= part_info->list_array[i].list_value; + if (likely(!not_first || prev_value != curr_value)) + { + prev_value= curr_value; + not_first= TRUE; + } + else + { + my_error(ER_MULTIPLE_DEF_CONST_IN_LIST_PART_ERROR, MYF(0)); + goto end; + } + } while (++i < no_list_values); + result= FALSE; +end: + DBUG_RETURN(result); +} + + +/* + Create a memory area where default partition names are stored and fill it + up with the names. + SYNOPSIS + create_default_partition_names() + no_parts Number of partitions + subpart Is it subpartitions + RETURN VALUE + A pointer to the memory area of the default partition names + DESCRIPTION + A support routine for the partition code where default values are + generated. + The external routine needing this code is check_partition_info +*/ + +#define MAX_PART_NAME_SIZE 8 + +static char *create_default_partition_names(uint no_parts, uint start_no, + bool subpart) +{ + char *ptr= sql_calloc(no_parts*MAX_PART_NAME_SIZE); + char *move_ptr= ptr; + uint i= 0; + DBUG_ENTER("create_default_partition_names"); + if (likely(ptr != 0)) + { + do + { + if (subpart) + my_sprintf(move_ptr, (move_ptr,"sp%u", (start_no + i))); + else + my_sprintf(move_ptr, (move_ptr,"p%u", (start_no + i))); + move_ptr+=MAX_PART_NAME_SIZE; + } while (++i < no_parts); + } + else + { + my_error(ER_OUTOFMEMORY, MYF(0), no_parts*MAX_PART_NAME_SIZE); + } + DBUG_RETURN(ptr); +} + + +/* + Set up all the default partitions not set-up by the user in the SQL + statement. Also perform a number of checks that the user hasn't tried + to use default values where no defaults exists. + SYNOPSIS + set_up_default_partitions() + part_info The reference to all partition information + file A reference to a handler of the table + max_rows Maximum number of rows stored in the table + RETURN VALUE + TRUE Error, attempted default values not possible + FALSE Ok, default partitions set-up + DESCRIPTION + The routine uses the underlying handler of the partitioning to define + the default number of partitions. For some handlers this requires + knowledge of the maximum number of rows to be stored in the table. + This routine only accepts HASH and KEY partitioning and thus there is + no subpartitioning if this routine is successful. + The external routine needing this code is check_partition_info +*/ + +static bool set_up_default_partitions(partition_info *part_info, + handler *file, ulonglong max_rows, + uint start_no) +{ + uint no_parts, i; + char *default_name; + bool result= TRUE; + DBUG_ENTER("set_up_default_partitions"); + + if (part_info->part_type != HASH_PARTITION) + { + const char *error_string; + if (part_info->part_type == RANGE_PARTITION) + error_string= range_str; + else + error_string= list_str; + my_error(ER_PARTITIONS_MUST_BE_DEFINED_ERROR, MYF(0), error_string); + goto end; + } + if (part_info->no_parts == 0) + part_info->no_parts= file->get_default_no_partitions(max_rows); + no_parts= part_info->no_parts; + part_info->use_default_partitions= FALSE; + if (unlikely(no_parts > MAX_PARTITIONS)) + { + my_error(ER_TOO_MANY_PARTITIONS_ERROR, MYF(0)); + goto end; + } + if (unlikely((!(default_name= create_default_partition_names(no_parts, + start_no, + FALSE))))) + goto end; + i= 0; + do + { + partition_element *part_elem= new partition_element(); + if (likely(part_elem != 0)) + { + part_elem->engine_type= DB_TYPE_UNKNOWN; + part_elem->partition_name= default_name; + default_name+=MAX_PART_NAME_SIZE; + part_info->partitions.push_back(part_elem); + } + else + { + my_error(ER_OUTOFMEMORY, MYF(0), sizeof(partition_element)); + goto end; + } + } while (++i < no_parts); + result= FALSE; +end: + DBUG_RETURN(result); +} + + +/* + Set up all the default subpartitions not set-up by the user in the SQL + statement. Also perform a number of checks that the default partitioning + becomes an allowed partitioning scheme. + SYNOPSIS + set_up_default_subpartitions() + part_info The reference to all partition information + file A reference to a handler of the table + max_rows Maximum number of rows stored in the table + RETURN VALUE + TRUE Error, attempted default values not possible + FALSE Ok, default partitions set-up + DESCRIPTION + The routine uses the underlying handler of the partitioning to define + the default number of partitions. For some handlers this requires + knowledge of the maximum number of rows to be stored in the table. + This routine is only called for RANGE or LIST partitioning and those + need to be specified so only subpartitions are specified. + The external routine needing this code is check_partition_info +*/ + +static bool set_up_default_subpartitions(partition_info *part_info, + handler *file, ulonglong max_rows) +{ + uint i, j, no_parts, no_subparts; + char *default_name, *name_ptr; + bool result= TRUE; + partition_element *part_elem; + List_iterator<partition_element> part_it(part_info->partitions); + DBUG_ENTER("set_up_default_subpartitions"); + + if (part_info->no_subparts == 0) + part_info->no_subparts= file->get_default_no_partitions(max_rows); + no_parts= part_info->no_parts; + no_subparts= part_info->no_subparts; + part_info->use_default_subpartitions= FALSE; + if (unlikely((no_parts * no_subparts) > MAX_PARTITIONS)) + { + my_error(ER_TOO_MANY_PARTITIONS_ERROR, MYF(0)); + goto end; + } + if (unlikely((!(default_name= + create_default_partition_names(no_subparts, (uint)0, TRUE))))) + goto end; + i= 0; + do + { + part_elem= part_it++; + j= 0; + name_ptr= default_name; + do + { + partition_element *subpart_elem= new partition_element(); + if (likely(subpart_elem != 0)) + { + subpart_elem->engine_type= DB_TYPE_UNKNOWN; + subpart_elem->partition_name= name_ptr; + name_ptr+= MAX_PART_NAME_SIZE; + part_elem->subpartitions.push_back(subpart_elem); + } + else + { + my_error(ER_OUTOFMEMORY, MYF(0), sizeof(partition_element)); + goto end; + } + } while (++j < no_subparts); + } while (++i < no_parts); + result= FALSE; +end: + DBUG_RETURN(result); +} + + +/* + Set up defaults for partition or subpartition (cannot set-up for both, + this will return an error. + SYNOPSIS + set_up_defaults_for_partitioning() + part_info The reference to all partition information + file A reference to a handler of the table + max_rows Maximum number of rows stored in the table + RETURN VALUE + TRUE Error, attempted default values not possible + FALSE Ok, default partitions set-up + DESCRIPTION + Support routine for check_partition_info +*/ + +bool set_up_defaults_for_partitioning(partition_info *part_info, + handler *file, + ulonglong max_rows, uint start_no) +{ + DBUG_ENTER("set_up_defaults_for_partitioning"); + + if (part_info->use_default_partitions) + DBUG_RETURN(set_up_default_partitions(part_info, file, max_rows, + start_no)); + if (is_sub_partitioned(part_info) && part_info->use_default_subpartitions) + DBUG_RETURN(set_up_default_subpartitions(part_info, file, max_rows)); + DBUG_RETURN(FALSE); +} + + +/* + Check that all partitions use the same storage engine. + This is currently a limitation in this version. + SYNOPSIS + check_engine_mix() + engine_array An array of engine identifiers + no_parts Total number of partitions + RETURN VALUE + TRUE Error, mixed engines + FALSE Ok, no mixed engines +*/ + +static bool check_engine_mix(u_char *engine_array, uint no_parts) +{ + /* + Current check verifies only that all handlers are the same. + Later this check will be more sophisticated. + */ + uint i= 0; + bool result= FALSE; + DBUG_ENTER("check_engine_mix"); + + do + { + if (engine_array[i] != engine_array[0]) + { + result= TRUE; + break; + } + } while (++i < no_parts); + DBUG_RETURN(result); +} + + +/* + We will check that the partition info requested is possible to set-up in + this version. This routine is an extension of the parser one could say. + If defaults were used we will generate default data structures for all + partitions. + SYNOPSIS + check_partition_info() + part_info The reference to all partition information + db_type Default storage engine if no engine specified per + partition. + file A reference to a handler of the table + max_rows Maximum number of rows stored in the table + RETURN VALUE + TRUE Error, something went wrong + FALSE Ok, full partition data structures are now generated + DESCRIPTION + This code is used early in the CREATE TABLE and ALTER TABLE process. +*/ + +bool check_partition_info(partition_info *part_info,enum db_type eng_type, + handler *file, ulonglong max_rows) +{ + u_char *engine_array= NULL; + uint part_count= 0, i, no_parts, tot_partitions; + bool result= TRUE; + List_iterator<partition_element> part_it(part_info->partitions); + DBUG_ENTER("check_partition_info"); + + if (unlikely(is_sub_partitioned(part_info) && + (!(part_info->part_type == RANGE_PARTITION || + part_info->part_type == LIST_PARTITION)))) + { + /* Only RANGE and LIST partitioning can be subpartitioned */ + my_error(ER_SUBPARTITION_ERROR, MYF(0)); + goto end; + } + if (unlikely(set_up_defaults_for_partitioning(part_info, file, + max_rows, (uint)0))) + goto end; + tot_partitions= get_tot_partitions(part_info); + if (unlikely(tot_partitions > MAX_PARTITIONS)) + { + my_error(ER_TOO_MANY_PARTITIONS_ERROR, MYF(0)); + goto end; + } + if (unlikely(is_partitions_in_table(part_info, part_info))) + { + my_error(ER_SAME_NAME_PARTITION, MYF(0)); + goto end; + } + engine_array= (u_char*)my_malloc(tot_partitions, MYF(MY_WME)); + if (unlikely(!engine_array)) + goto end; + i= 0; + no_parts= part_info->no_parts; + do + { + partition_element *part_elem= part_it++; + if (!is_sub_partitioned(part_info)) + { + if (part_elem->engine_type == DB_TYPE_UNKNOWN) + part_elem->engine_type= eng_type; + DBUG_PRINT("info", ("engine = %u",(uint)part_elem->engine_type)); + engine_array[part_count++]= (u_char)part_elem->engine_type; + } + else + { + uint j= 0, no_subparts= part_info->no_subparts;; + List_iterator<partition_element> sub_it(part_elem->subpartitions); + do + { + part_elem= sub_it++; + if (part_elem->engine_type == DB_TYPE_UNKNOWN) + part_elem->engine_type= eng_type; + DBUG_PRINT("info", ("engine = %u",(uint)part_elem->engine_type)); + engine_array[part_count++]= (u_char)part_elem->engine_type; + } while (++j < no_subparts); + } + } while (++i < part_info->no_parts); + if (unlikely(check_engine_mix(engine_array, part_count))) + { + my_error(ER_MIX_HANDLER_ERROR, MYF(0)); + goto end; + } + + /* + We need to check all constant expressions that they are of the correct + type and that they are increasing for ranges and not overlapping for + list constants. + */ + + if (unlikely((part_info->part_type == RANGE_PARTITION && + check_range_constants(part_info)) || + (part_info->part_type == LIST_PARTITION && + check_list_constants(part_info)))) + goto end; + result= FALSE; +end: + my_free((char*)engine_array,MYF(MY_ALLOW_ZERO_PTR)); + DBUG_RETURN(result); +} + + +/* + A great number of functions below here is part of the fix_partition_func + method. It is used to set up the partition structures for execution from + openfrm. It is called at the end of the openfrm when the table struct has + been set-up apart from the partition information. + It involves: + 1) Setting arrays of fields for the partition functions. + 2) Setting up binary search array for LIST partitioning + 3) Setting up array for binary search for RANGE partitioning + 4) Setting up key_map's to assist in quick evaluation whether one + can deduce anything from a given index of what partition to use + 5) Checking whether a set of partitions can be derived from a range on + a field in the partition function. + As part of doing this there is also a great number of error controls. + This is actually the place where most of the things are checked for + partition information when creating a table. + Things that are checked includes + 1) No NULLable fields in partition function + 2) All fields of partition function in Primary keys and unique indexes + (if not supported) + 3) No fields in partition function that are BLOB's or VARCHAR with a + collation other than the binary collation. + + + + Create an array of partition fields (NULL terminated). Before this method + is called fix_fields or find_table_in_sef has been called to set + GET_FIXED_FIELDS_FLAG on all fields that are part of the partition + function. + SYNOPSIS + set_up_field_array() + table TABLE object for which partition fields are set-up + sub_part Is the table subpartitioned as well + RETURN VALUE + TRUE Error, some field didn't meet requirements + FALSE Ok, partition field array set-up + DESCRIPTION + This method is used to set-up both partition and subpartitioning + field array and used for all types of partitioning. + It is part of the logic around fix_partition_func. +*/ +static bool set_up_field_array(TABLE *table, + bool sub_part) +{ + Field **ptr, *field, **field_array; + uint no_fields= 0, size_field_array, i= 0; + partition_info *part_info= table->s->part_info; + int result= FALSE; + DBUG_ENTER("set_up_field_array"); + + ptr= table->field; + while ((field= *(ptr++))) + { + if (field->flags & GET_FIXED_FIELDS_FLAG) + no_fields++; + } + size_field_array= (no_fields+1)*sizeof(Field*); + field_array= (Field**)sql_alloc(size_field_array); + if (unlikely(!field_array)) + { + my_error(ER_OUTOFMEMORY, MYF(0), size_field_array); + result= TRUE; + } + ptr= table->field; + while ((field= *(ptr++))) + { + if (field->flags & GET_FIXED_FIELDS_FLAG) + { + field->flags&= ~GET_FIXED_FIELDS_FLAG; + field->flags|= FIELD_IN_PART_FUNC_FLAG; + if (likely(!result)) + { + field_array[i++]= field; + + /* + We check that the fields are proper. It is required for each + field in a partition function to: + 1) Not be a BLOB of any type + A BLOB takes too long time to evaluate so we don't want it for + performance reasons. + 2) Not be a VARCHAR other than VARCHAR with a binary collation + A VARCHAR with character sets can have several values being + equal with different number of spaces or NULL's. This is not a + good ground for a safe and exact partition function. Thus it is + not allowed in partition functions. + */ + + if (unlikely(field->flags & BLOB_FLAG)) + { + my_error(ER_BLOB_FIELD_IN_PART_FUNC_ERROR, MYF(0)); + result= TRUE; + } + else if (unlikely((!field->flags & BINARY_FLAG) && + field->real_type() == MYSQL_TYPE_VARCHAR)) + { + my_error(ER_CHAR_SET_IN_PART_FIELD_ERROR, MYF(0)); + result= TRUE; + } + } + } + } + field_array[no_fields]= 0; + if (!sub_part) + { + part_info->part_field_array= field_array; + part_info->no_part_fields= no_fields; + } + else + { + part_info->subpart_field_array= field_array; + part_info->no_subpart_fields= no_fields; + } + DBUG_RETURN(result); +} + + +/* + Create a field array including all fields of both the partitioning and the + subpartitioning functions. + SYNOPSIS + create_full_part_field_array() + table TABLE object for which partition fields are set-up + part_info Reference to partitioning data structure + RETURN VALUE + TRUE Memory allocation of field array failed + FALSE Ok + DESCRIPTION + If there is no subpartitioning then the same array is used as for the + partitioning. Otherwise a new array is built up using the flag + FIELD_IN_PART_FUNC in the field object. + This function is called from fix_partition_func +*/ + +static bool create_full_part_field_array(TABLE *table, + partition_info *part_info) +{ + bool result= FALSE; + DBUG_ENTER("create_full_part_field_array"); + + if (!is_sub_partitioned(part_info)) + { + part_info->full_part_field_array= part_info->part_field_array; + part_info->no_full_part_fields= part_info->no_part_fields; + } + else + { + Field **ptr, *field, **field_array; + uint no_part_fields=0, size_field_array; + ptr= table->field; + while ((field= *(ptr++))) + { + if (field->flags & FIELD_IN_PART_FUNC_FLAG) + no_part_fields++; + } + size_field_array= (no_part_fields+1)*sizeof(Field*); + field_array= (Field**)sql_alloc(size_field_array); + if (unlikely(!field_array)) + { + my_error(ER_OUTOFMEMORY, MYF(0), size_field_array); + result= TRUE; + goto end; + } + no_part_fields= 0; + ptr= table->field; + while ((field= *(ptr++))) + { + if (field->flags & FIELD_IN_PART_FUNC_FLAG) + field_array[no_part_fields++]= field; + } + field_array[no_part_fields]=0; + part_info->full_part_field_array= field_array; + part_info->no_full_part_fields= no_part_fields; + } +end: + DBUG_RETURN(result); +} + + +/* + These support routines is used to set/reset an indicator of all fields + in a certain key. It is used in conjunction with another support routine + that traverse all fields in the PF to find if all or some fields in the + PF is part of the key. This is used to check primary keys and unique + keys involve all fields in PF (unless supported) and to derive the + key_map's used to quickly decide whether the index can be used to + derive which partitions are needed to scan. + + + + Clear flag GET_FIXED_FIELDS_FLAG in all fields of a key previously set by + set_indicator_in_key_fields (always used in pairs). + SYNOPSIS + clear_indicator_in_key_fields() + key_info Reference to find the key fields +*/ + +static void clear_indicator_in_key_fields(KEY *key_info) +{ + KEY_PART_INFO *key_part; + uint key_parts= key_info->key_parts, i; + for (i= 0, key_part=key_info->key_part; i < key_parts; i++, key_part++) + key_part->field->flags&= (~GET_FIXED_FIELDS_FLAG); +} + + +/* + Set flag GET_FIXED_FIELDS_FLAG in all fields of a key. + SYNOPSIS + set_indicator_in_key_fields + key_info Reference to find the key fields +*/ + +static void set_indicator_in_key_fields(KEY *key_info) +{ + KEY_PART_INFO *key_part; + uint key_parts= key_info->key_parts, i; + for (i= 0, key_part=key_info->key_part; i < key_parts; i++, key_part++) + key_part->field->flags|= GET_FIXED_FIELDS_FLAG; +} + + +/* + Check if all or some fields in partition field array is part of a key + previously used to tag key fields. + SYNOPSIS + check_fields_in_PF() + ptr Partition field array + all_fields Is all fields of partition field array used in key + some_fields Is some fields of partition field array used in key + RETURN VALUE + all_fields, some_fields +*/ + +static void check_fields_in_PF(Field **ptr, bool *all_fields, + bool *some_fields) +{ + DBUG_ENTER("check_fields_in_PF"); + *all_fields= TRUE; + *some_fields= FALSE; + do + { + /* Check if the field of the PF is part of the current key investigated */ + if ((*ptr)->flags & GET_FIXED_FIELDS_FLAG) + *some_fields= TRUE; + else + *all_fields= FALSE; + } while (*(++ptr)); + DBUG_VOID_RETURN; +} + + +/* + Clear flag GET_FIXED_FIELDS_FLAG in all fields of the table. + This routine is used for error handling purposes. + SYNOPSIS + clear_field_flag() + table TABLE object for which partition fields are set-up +*/ + +static void clear_field_flag(TABLE *table) +{ + Field **ptr; + DBUG_ENTER("clear_field_flag"); + + for (ptr= table->field; *ptr; ptr++) + (*ptr)->flags&= (~GET_FIXED_FIELDS_FLAG); + DBUG_VOID_RETURN; +} + + +/* + This routine sets-up the partition field array for KEY partitioning, it + also verifies that all fields in the list of fields is actually a part of + the table. + SYNOPSIS + handle_list_of_fields() + it A list of field names for the partition function + table TABLE object for which partition fields are set-up + part_info Reference to partitioning data structure + sub_part Is the table subpartitioned as well + RETURN VALUE + TRUE Fields in list of fields not part of table + FALSE All fields ok and array created + DESCRIPTION + find_field_in_table_sef finds the field given its name. All fields get + GET_FIXED_FIELDS_FLAG set. +*/ + +static bool handle_list_of_fields(List_iterator<char> it, + TABLE *table, + partition_info *part_info, + bool sub_part) +{ + Field *field; + bool result; + char *field_name; + DBUG_ENTER("handle_list_of_fields"); + + while ((field_name= it++)) + { + field= find_field_in_table_sef(table, field_name); + if (likely(field != 0)) + field->flags|= GET_FIXED_FIELDS_FLAG; + else + { + my_error(ER_FIELD_NOT_FOUND_PART_ERROR, MYF(0)); + clear_field_flag(table); + result= TRUE; + goto end; + } + } + result= set_up_field_array(table, sub_part); +end: + DBUG_RETURN(result); +} + + +/* + This function is used to build an array of partition fields for the + partitioning function and subpartitioning function. The partitioning + function is an item tree that must reference at least one field in the + table. This is checked first in the parser that the function doesn't + contain non-cacheable parts (like a random function) and by checking + here that the function isn't a constant function. + SYNOPSIS + fix_fields_part_func() + thd The thread object + tables A list of one table, the partitioned table + func_expr The item tree reference of the partition function + part_info Reference to partitioning data structure + sub_part Is the table subpartitioned as well + RETURN VALUE + TRUE An error occurred, something was wrong with the + partition function. + FALSE Ok, a partition field array was created + DESCRIPTION + The function uses a new feature in fix_fields where the flag + GET_FIXED_FIELDS_FLAG is set for all fields in the item tree. + This field must always be reset before returning from the function + since it is used for other purposes as well. +*/ + +static bool fix_fields_part_func(THD *thd, TABLE_LIST *tables, + Item* func_expr, partition_info *part_info, + bool sub_part) +{ + /* + Calculate the number of fields in the partition function. + Use it allocate memory for array of Field pointers. + Initialise array of field pointers. Use information set when + calling fix_fields and reset it immediately after. + The get_fields_in_item_tree activates setting of bit in flags + on the field object. + */ + + bool result= TRUE; + TABLE *table= tables->table; + TABLE_LIST *save_table_list, *save_first_table, *save_last_table; + int error; + Name_resolution_context *context; + DBUG_ENTER("fix_fields_part_func"); + + context= thd->lex->current_context(); + table->map= 1; //To ensure correct calculation of const item + table->get_fields_in_item_tree= TRUE; + save_table_list= context->table_list; + save_first_table= context->first_name_resolution_table; + save_last_table= context->last_name_resolution_table; + context->table_list= tables; + context->first_name_resolution_table= tables; + context->last_name_resolution_table= NULL; + func_expr->walk(&Item::change_context_processor, (byte*) context); + thd->where= "partition function"; + error= func_expr->fix_fields(thd, (Item**)0); + context->table_list= save_table_list; + context->first_name_resolution_table= save_first_table; + context->last_name_resolution_table= save_last_table; + if (unlikely(error)) + { + DBUG_PRINT("info", ("Field in partition function not part of table")); + clear_field_flag(table); + goto end; + } + if (unlikely(func_expr->const_item())) + { + my_error(ER_CONST_EXPR_IN_PARTITION_FUNC_ERROR, MYF(0)); + clear_field_flag(table); + goto end; + } + result= set_up_field_array(table, sub_part); +end: + table->get_fields_in_item_tree= FALSE; + table->map= 0; //Restore old value + DBUG_RETURN(result); +} + + +/* + This function verifies that if there is a primary key that it contains + all the fields of the partition function. + This is a temporary limitation that will hopefully be removed after a + while. + SYNOPSIS + check_primary_key() + table TABLE object for which partition fields are set-up + RETURN VALUES + TRUE Not all fields in partitioning function was part + of primary key + FALSE Ok, all fields of partitioning function were part + of primary key +*/ + +static bool check_primary_key(TABLE *table) +{ + uint primary_key= table->s->primary_key; + bool all_fields, some_fields, result= FALSE; + DBUG_ENTER("check_primary_key"); + + if (primary_key < MAX_KEY) + { + set_indicator_in_key_fields(table->key_info+primary_key); + check_fields_in_PF(table->s->part_info->full_part_field_array, + &all_fields, &some_fields); + clear_indicator_in_key_fields(table->key_info+primary_key); + if (unlikely(!all_fields)) + { + my_error(ER_UNIQUE_KEY_NEED_ALL_FIELDS_IN_PF,MYF(0),"PRIMARY KEY"); + result= TRUE; + } + } + DBUG_RETURN(result); +} + + +/* + This function verifies that if there is a unique index that it contains + all the fields of the partition function. + This is a temporary limitation that will hopefully be removed after a + while. + SYNOPSIS + check_unique_keys() + table TABLE object for which partition fields are set-up + RETURN VALUES + TRUE Not all fields in partitioning function was part + of all unique keys + FALSE Ok, all fields of partitioning function were part + of unique keys +*/ + +static bool check_unique_keys(TABLE *table) +{ + bool all_fields, some_fields, result= FALSE; + uint keys= table->s->keys, i; + DBUG_ENTER("check_unique_keys"); + for (i= 0; i < keys; i++) + { + if (table->key_info[i].flags & HA_NOSAME) //Unique index + { + set_indicator_in_key_fields(table->key_info+i); + check_fields_in_PF(table->s->part_info->full_part_field_array, + &all_fields, &some_fields); + clear_indicator_in_key_fields(table->key_info+i); + if (unlikely(!all_fields)) + { + my_error(ER_UNIQUE_KEY_NEED_ALL_FIELDS_IN_PF,MYF(0),"UNIQUE INDEX"); + result= TRUE; + break; + } + } + } + DBUG_RETURN(result); +} + + +/* + An important optimisation is whether a range on a field can select a subset + of the partitions. + A prerequisite for this to happen is that the PF is a growing function OR + a shrinking function. + This can never happen for a multi-dimensional PF. Thus this can only happen + with PF with at most one field involved in the PF. + The idea is that if the function is a growing function and you know that + the field of the PF is 4 <= A <= 6 then we can convert this to a range + in the PF instead by setting the range to PF(4) <= PF(A) <= PF(6). In the + case of RANGE PARTITIONING and LIST PARTITIONING this can be used to + calculate a set of partitions rather than scanning all of them. + Thus the following prerequisites are there to check if sets of partitions + can be found. + 1) Only possible for RANGE and LIST partitioning (not for subpartitioning) + 2) Only possible if PF only contains 1 field + 3) Possible if PF is a growing function of the field + 4) Possible if PF is a shrinking function of the field + OBSERVATION: + 1) IF f1(A) is a growing function AND f2(A) is a growing function THEN + f1(A) + f2(A) is a growing function + f1(A) * f2(A) is a growing function if f1(A) >= 0 and f2(A) >= 0 + 2) IF f1(A) is a growing function and f2(A) is a shrinking function THEN + f1(A) / f2(A) is a growing function if f1(A) >= 0 and f2(A) > 0 + 3) IF A is a growing function then a function f(A) that removes the + least significant portion of A is a growing function + E.g. DATE(datetime) is a growing function + MONTH(datetime) is not a growing/shrinking function + 4) IF f1(A) is a growing function and f2(A) is a growing function THEN + f1(f2(A)) and f2(f1(A)) are also growing functions + 5) IF f1(A) is a shrinking function and f2(A) is a growing function THEN + f1(f2(A)) is a shrinking function and f2(f1(A)) is a shrinking function + 6) f1(A) = A is a growing function + 7) f1(A) = A*a + b (where a and b are constants) is a growing function + + By analysing the item tree of the PF we can use these deducements and + derive whether the PF is a growing function or a shrinking function or + neither of it. + + If the PF is range capable then a flag is set on the table object + indicating this to notify that we can use also ranges on the field + of the PF to deduce a set of partitions if the fields of the PF were + not all fully bound. + SYNOPSIS + check_range_capable_PF() + table TABLE object for which partition fields are set-up + DESCRIPTION + Support for this is not implemented yet. +*/ + +void check_range_capable_PF(TABLE *table) +{ + DBUG_ENTER("check_range_capable_PF"); + DBUG_VOID_RETURN; +} + + +/* + Set up partition key maps + SYNOPSIS + set_up_partition_key_maps() + table TABLE object for which partition fields are set-up + part_info Reference to partitioning data structure + RETURN VALUES + None + DESCRIPTION + This function sets up a couple of key maps to be able to quickly check + if an index ever can be used to deduce the partition fields or even + a part of the fields of the partition function. + We set up the following key_map's. + PF = Partition Function + 1) All fields of the PF is set even by equal on the first fields in the + key + 2) All fields of the PF is set if all fields of the key is set + 3) At least one field in the PF is set if all fields is set + 4) At least one field in the PF is part of the key +*/ + +static void set_up_partition_key_maps(TABLE *table, + partition_info *part_info) +{ + uint keys= table->s->keys, i; + bool all_fields, some_fields; + DBUG_ENTER("set_up_partition_key_maps"); + + part_info->all_fields_in_PF.clear_all(); + part_info->all_fields_in_PPF.clear_all(); + part_info->all_fields_in_SPF.clear_all(); + part_info->some_fields_in_PF.clear_all(); + for (i= 0; i < keys; i++) + { + set_indicator_in_key_fields(table->key_info+i); + check_fields_in_PF(part_info->full_part_field_array, + &all_fields, &some_fields); + if (all_fields) + part_info->all_fields_in_PF.set_bit(i); + if (some_fields) + part_info->some_fields_in_PF.set_bit(i); + if (is_sub_partitioned(part_info)) + { + check_fields_in_PF(part_info->part_field_array, + &all_fields, &some_fields); + if (all_fields) + part_info->all_fields_in_PPF.set_bit(i); + check_fields_in_PF(part_info->subpart_field_array, + &all_fields, &some_fields); + if (all_fields) + part_info->all_fields_in_SPF.set_bit(i); + } + clear_indicator_in_key_fields(table->key_info+i); + } + DBUG_VOID_RETURN; +} + + +/* + Set-up all function pointers for calculation of partition id, + subpartition id and the upper part in subpartitioning. This is to speed up + execution of get_partition_id which is executed once every record to be + written and deleted and twice for updates. + SYNOPSIS + set_up_partition_function_pointers() + part_info Reference to partitioning data structure +*/ + +static void set_up_partition_func_pointers(partition_info *part_info) +{ + if (is_sub_partitioned(part_info)) + { + if (part_info->part_type == RANGE_PARTITION) + { + part_info->get_part_partition_id= get_partition_id_range; + if (part_info->list_of_subpart_fields) + { + if (part_info->linear_hash_ind) + { + part_info->get_partition_id= get_partition_id_range_sub_linear_key; + part_info->get_subpartition_id= get_partition_id_linear_key_sub; + } + else + { + part_info->get_partition_id= get_partition_id_range_sub_key; + part_info->get_subpartition_id= get_partition_id_key_sub; + } + } + else + { + if (part_info->linear_hash_ind) + { + part_info->get_partition_id= get_partition_id_range_sub_linear_hash; + part_info->get_subpartition_id= get_partition_id_linear_hash_sub; + } + else + { + part_info->get_partition_id= get_partition_id_range_sub_hash; + part_info->get_subpartition_id= get_partition_id_hash_sub; + } + } + } + else //LIST Partitioning + { + part_info->get_part_partition_id= get_partition_id_list; + if (part_info->list_of_subpart_fields) + { + if (part_info->linear_hash_ind) + { + part_info->get_partition_id= get_partition_id_list_sub_linear_key; + part_info->get_subpartition_id= get_partition_id_linear_key_sub; + } + else + { + part_info->get_partition_id= get_partition_id_list_sub_key; + part_info->get_subpartition_id= get_partition_id_key_sub; + } + } + else + { + if (part_info->linear_hash_ind) + { + part_info->get_partition_id= get_partition_id_list_sub_linear_hash; + part_info->get_subpartition_id= get_partition_id_linear_hash_sub; + } + else + { + part_info->get_partition_id= get_partition_id_list_sub_hash; + part_info->get_subpartition_id= get_partition_id_hash_sub; + } + } + } + } + else //No subpartitioning + { + part_info->get_part_partition_id= NULL; + part_info->get_subpartition_id= NULL; + if (part_info->part_type == RANGE_PARTITION) + part_info->get_partition_id= get_partition_id_range; + else if (part_info->part_type == LIST_PARTITION) + part_info->get_partition_id= get_partition_id_list; + else //HASH partitioning + { + if (part_info->list_of_part_fields) + { + if (part_info->linear_hash_ind) + part_info->get_partition_id= get_partition_id_linear_key_nosub; + else + part_info->get_partition_id= get_partition_id_key_nosub; + } + else + { + if (part_info->linear_hash_ind) + part_info->get_partition_id= get_partition_id_linear_hash_nosub; + else + part_info->get_partition_id= get_partition_id_hash_nosub; + } + } + } +} + + +/* + For linear hashing we need a mask which is on the form 2**n - 1 where + 2**n >= no_parts. Thus if no_parts is 6 then mask is 2**3 - 1 = 8 - 1 = 7. + SYNOPSIS + set_linear_hash_mask() + part_info Reference to partitioning data structure + no_parts Number of parts in linear hash partitioning +*/ + +static void set_linear_hash_mask(partition_info *part_info, uint no_parts) +{ + uint mask; + for (mask= 1; mask < no_parts; mask<<=1) + ; + part_info->linear_hash_mask= mask - 1; +} + + +/* + This function calculates the partition id provided the result of the hash + function using linear hashing parameters, mask and number of partitions. + SYNOPSIS + get_part_id_from_linear_hash() + hash_value Hash value calculated by HASH function or KEY function + mask Mask calculated previously by set_linear_hash_mask + no_parts Number of partitions in HASH partitioned part + RETURN VALUE + part_id The calculated partition identity (starting at 0) + DESCRIPTION + The partition is calculated according to the theory of linear hashing. + See e.g. Linear hashing: a new tool for file and table addressing, + Reprinted from VLDB-80 in Readings Database Systems, 2nd ed, M. Stonebraker + (ed.), Morgan Kaufmann 1994. +*/ + +static uint32 get_part_id_from_linear_hash(longlong hash_value, uint mask, + uint no_parts) +{ + uint32 part_id= (uint32)(hash_value & mask); + if (part_id >= no_parts) + { + uint new_mask= ((mask + 1) >> 1) - 1; + part_id= hash_value & new_mask; + } + return part_id; +} + +/* + This function is called as part of opening the table by opening the .frm + file. It is a part of CREATE TABLE to do this so it is quite permissible + that errors due to erroneus syntax isn't found until we come here. + If the user has used a non-existing field in the table is one such example + of an error that is not discovered until here. + SYNOPSIS + fix_partition_func() + thd The thread object + name The name of the partitioned table + table TABLE object for which partition fields are set-up + RETURN VALUE + TRUE + FALSE + DESCRIPTION + The name parameter contains the full table name and is used to get the + database name of the table which is used to set-up a correct + TABLE_LIST object for use in fix_fields. +*/ + +bool fix_partition_func(THD *thd, const char* name, TABLE *table) +{ + bool result= TRUE; + uint dir_length, home_dir_length; + TABLE_LIST tables; + TABLE_SHARE *share= table->s; + char db_name_string[FN_REFLEN]; + char* db_name; + partition_info *part_info= share->part_info; + ulong save_set_query_id= thd->set_query_id; + DBUG_ENTER("fix_partition_func"); + + thd->set_query_id= 0; + /* + Set-up the TABLE_LIST object to be a list with a single table + Set the object to zero to create NULL pointers and set alias + and real name to table name and get database name from file name. + */ + + bzero((void*)&tables, sizeof(TABLE_LIST)); + tables.alias= tables.table_name= (char*)share->table_name; + tables.table= table; + tables.next_local= 0; + tables.next_name_resolution_table= 0; + strmov(db_name_string, name); + dir_length= dirname_length(db_name_string); + db_name_string[dir_length - 1]= 0; + home_dir_length= dirname_length(db_name_string); + db_name= &db_name_string[home_dir_length]; + tables.db= db_name; + + if (is_sub_partitioned(part_info)) + { + DBUG_ASSERT(part_info->subpart_type == HASH_PARTITION); + /* + Subpartition is defined. We need to verify that subpartitioning + function is correct. + */ + if (part_info->linear_hash_ind) + set_linear_hash_mask(part_info, part_info->no_subparts); + if (part_info->list_of_subpart_fields) + { + List_iterator<char> it(part_info->subpart_field_list); + if (unlikely(handle_list_of_fields(it, table, part_info, TRUE))) + goto end; + } + else + { + if (unlikely(fix_fields_part_func(thd, &tables, + part_info->subpart_expr, part_info, TRUE))) + goto end; + if (unlikely(part_info->subpart_expr->result_type() != INT_RESULT)) + { + my_error(ER_PARTITION_FUNC_NOT_ALLOWED_ERROR, MYF(0), + "SUBPARTITION"); + goto end; + } + } + } + DBUG_ASSERT(part_info->part_type != NOT_A_PARTITION); + /* + Partition is defined. We need to verify that partitioning + function is correct. + */ + if (part_info->part_type == HASH_PARTITION) + { + if (part_info->linear_hash_ind) + set_linear_hash_mask(part_info, part_info->no_parts); + if (part_info->list_of_part_fields) + { + List_iterator<char> it(part_info->part_field_list); + if (unlikely(handle_list_of_fields(it, table, part_info, FALSE))) + goto end; + } + else + { + if (unlikely(fix_fields_part_func(thd, &tables, part_info->part_expr, + part_info, FALSE))) + goto end; + if (unlikely(part_info->part_expr->result_type() != INT_RESULT)) + { + my_error(ER_PARTITION_FUNC_NOT_ALLOWED_ERROR, MYF(0), part_str); + goto end; + } + part_info->part_result_type= INT_RESULT; + } + } + else + { + const char *error_str; + if (part_info->part_type == RANGE_PARTITION) + { + error_str= range_str; + if (unlikely(check_range_constants(part_info))) + goto end; + } + else if (part_info->part_type == LIST_PARTITION) + { + error_str= list_str; + if (unlikely(check_list_constants(part_info))) + goto end; + } + else + { + DBUG_ASSERT(0); + my_error(ER_INCONSISTENT_PARTITION_INFO_ERROR, MYF(0)); + goto end; + } + if (unlikely(part_info->no_parts < 1)) + { + my_error(ER_PARTITIONS_MUST_BE_DEFINED_ERROR, MYF(0), error_str); + goto end; + } + if (unlikely(fix_fields_part_func(thd, &tables, part_info->part_expr, + part_info, FALSE))) + goto end; + if (unlikely(part_info->part_expr->result_type() != INT_RESULT)) + { + my_error(ER_PARTITION_FUNC_NOT_ALLOWED_ERROR, MYF(0), part_str); + goto end; + } + } + if (unlikely(create_full_part_field_array(table, part_info))) + goto end; + if (unlikely(check_primary_key(table))) + goto end; + if (unlikely((!table->file->partition_flags() & HA_CAN_PARTITION_UNIQUE) && + check_unique_keys(table))) + goto end; + check_range_capable_PF(table); + set_up_partition_key_maps(table, part_info); + set_up_partition_func_pointers(part_info); + result= FALSE; +end: + thd->set_query_id= save_set_query_id; + DBUG_RETURN(result); +} + + +/* + The code below is support routines for the reverse parsing of the + partitioning syntax. This feature is very useful to generate syntax for + all default values to avoid all default checking when opening the frm + file. It is also used when altering the partitioning by use of various + ALTER TABLE commands. Finally it is used for SHOW CREATE TABLES. +*/ + +static int add_write(File fptr, const char *buf, uint len) +{ + uint len_written= my_write(fptr, (const byte*)buf, len, MYF(0)); + if (likely(len == len_written)) + return 0; + else + return 1; +} + +static int add_string(File fptr, const char *string) +{ + return add_write(fptr, string, strlen(string)); +} + +static int add_string_len(File fptr, const char *string, uint len) +{ + return add_write(fptr, string, len); +} + +static int add_space(File fptr) +{ + return add_string(fptr, space_str); +} + +static int add_comma(File fptr) +{ + return add_string(fptr, comma_str); +} + +static int add_equal(File fptr) +{ + return add_string(fptr, equal_str); +} + +static int add_end_parenthesis(File fptr) +{ + return add_string(fptr, end_paren_str); +} + +static int add_begin_parenthesis(File fptr) +{ + return add_string(fptr, begin_paren_str); +} + +static int add_part_key_word(File fptr, const char *key_string) +{ + int err= add_string(fptr, key_string); + err+= add_space(fptr); + return err + add_begin_parenthesis(fptr); +} + +static int add_hash(File fptr) +{ + return add_part_key_word(fptr, hash_str); +} + +static int add_partition(File fptr) +{ + strxmov(buff, part_str, space_str, NullS); + return add_string(fptr, buff); +} + +static int add_subpartition(File fptr) +{ + int err= add_string(fptr, sub_str); + return err + add_partition(fptr); +} + +static int add_partition_by(File fptr) +{ + strxmov(buff, part_str, space_str, by_str, space_str, NullS); + return add_string(fptr, buff); +} + +static int add_subpartition_by(File fptr) +{ + int err= add_string(fptr, sub_str); + return err + add_partition_by(fptr); +} + +static int add_key_partition(File fptr, List<char> field_list) +{ + uint i, no_fields; + int err; + List_iterator<char> part_it(field_list); + err= add_part_key_word(fptr, key_str); + no_fields= field_list.elements; + i= 0; + do + { + const char *field_str= part_it++; + err+= add_string(fptr, field_str); + if (i != (no_fields-1)) + err+= add_comma(fptr); + } while (++i < no_fields); + return err; +} + +static int add_int(File fptr, longlong number) +{ + llstr(number, buff); + return add_string(fptr, buff); +} + +static int add_keyword_string(File fptr, const char *keyword, + const char *keystr) +{ + int err= add_string(fptr, keyword); + err+= add_space(fptr); + err+= add_equal(fptr); + err+= add_space(fptr); + err+= add_string(fptr, keystr); + return err + add_space(fptr); +} + +static int add_keyword_int(File fptr, const char *keyword, longlong num) +{ + int err= add_string(fptr, keyword); + err+= add_space(fptr); + err+= add_equal(fptr); + err+= add_space(fptr); + err+= add_int(fptr, num); + return err + add_space(fptr); +} + +static int add_engine(File fptr, enum db_type engine_type) +{ + const char *engine_str= ha_get_storage_engine(engine_type); + int err= add_string(fptr, "ENGINE = "); + return err + add_string(fptr, engine_str); + return err; +} + +static int add_partition_options(File fptr, partition_element *p_elem) +{ + int err= 0; + if (p_elem->tablespace_name) + err+= add_keyword_string(fptr,"TABLESPACE",p_elem->tablespace_name); + if (p_elem->nodegroup_id != UNDEF_NODEGROUP) + err+= add_keyword_int(fptr,"NODEGROUP",(longlong)p_elem->nodegroup_id); + if (p_elem->part_max_rows) + err+= add_keyword_int(fptr,"MAX_ROWS",(longlong)p_elem->part_max_rows); + if (p_elem->part_min_rows) + err+= add_keyword_int(fptr,"MIN_ROWS",(longlong)p_elem->part_min_rows); + if (p_elem->data_file_name) + err+= add_keyword_string(fptr,"DATA DIRECTORY",p_elem->data_file_name); + if (p_elem->index_file_name) + err+= add_keyword_string(fptr,"INDEX DIRECTORY",p_elem->index_file_name); + if (p_elem->part_comment) + err+= add_keyword_string(fptr, "COMMENT",p_elem->part_comment); + return err + add_engine(fptr,p_elem->engine_type); +} + +static int add_partition_values(File fptr, partition_info *part_info, + partition_element *p_elem) +{ + int err= 0; + if (part_info->part_type == RANGE_PARTITION) + { + err+= add_string(fptr, "VALUES LESS THAN "); + if (p_elem->range_value != LONGLONG_MAX) + { + err+= add_begin_parenthesis(fptr); + err+= add_int(fptr, p_elem->range_value); + err+= add_end_parenthesis(fptr); + } + else + err+= add_string(fptr, "MAXVALUE"); + } + else if (part_info->part_type == LIST_PARTITION) + { + uint i; + List_iterator<longlong> list_val_it(p_elem->list_val_list); + err+= add_string(fptr, "VALUES IN "); + uint no_items= p_elem->list_val_list.elements; + err+= add_begin_parenthesis(fptr); + i= 0; + do + { + longlong *list_value= list_val_it++; + err+= add_int(fptr, *list_value); + if (i != (no_items-1)) + err+= add_comma(fptr); + } while (++i < no_items); + err+= add_end_parenthesis(fptr); + } + return err + add_space(fptr); +} + +/* + Generate the partition syntax from the partition data structure. + Useful for support of generating defaults, SHOW CREATE TABLES + and easy partition management. + SYNOPSIS + generate_partition_syntax() + part_info The partitioning data structure + buf_length A pointer to the returned buffer length + use_sql_alloc Allocate buffer from sql_alloc if true + otherwise use my_malloc + add_default_info Add info generated by default + RETURN VALUES + NULL error + buf, buf_length Buffer and its length + DESCRIPTION + Here we will generate the full syntax for the given command where all + defaults have been expanded. By so doing the it is also possible to + make lots of checks of correctness while at it. + This could will also be reused for SHOW CREATE TABLES and also for all + type ALTER TABLE commands focusing on changing the PARTITION structure + in any fashion. + + The implementation writes the syntax to a temporary file (essentially + an abstraction of a dynamic array) and if all writes goes well it + allocates a buffer and writes the syntax into this one and returns it. + + As a security precaution the file is deleted before writing into it. This + means that no other processes on the machine can open and read the file + while this processing is ongoing. + + The code is optimised for minimal code size since it is not used in any + common queries. +*/ + +char *generate_partition_syntax(partition_info *part_info, + uint *buf_length, + bool use_sql_alloc, + bool add_default_info) +{ + uint i,j, no_parts, no_subparts; + partition_element *part_elem; + ulonglong buffer_length; + char path[FN_REFLEN]; + int err= 0; + DBUG_ENTER("generate_partition_syntax"); + File fptr; + char *buf= NULL; //Return buffer + const char *file_name; + sprintf(path, "%s_%lx_%lx", "part_syntax", current_pid, + current_thd->thread_id); + fn_format(path,path,mysql_tmpdir,".psy", MY_REPLACE_EXT); + file_name= &path[0]; + DBUG_PRINT("info", ("File name = %s", file_name)); + if (unlikely(((fptr= my_open(file_name,O_CREAT|O_RDWR, MYF(MY_WME))) == -1))) + DBUG_RETURN(NULL); +#if defined(MSDOS) || defined(__WIN__) || defined(__EMX__) || defined(OS2) +#else + my_delete(file_name, MYF(0)); +#endif + err+= add_space(fptr); + err+= add_partition_by(fptr); + switch (part_info->part_type) + { + case RANGE_PARTITION: + add_default_info= TRUE; + err+= add_part_key_word(fptr, range_str); + break; + case LIST_PARTITION: + add_default_info= TRUE; + err+= add_part_key_word(fptr, list_str); + break; + case HASH_PARTITION: + if (part_info->linear_hash_ind) + err+= add_string(fptr, "LINEAR "); + if (part_info->list_of_part_fields) + err+= add_key_partition(fptr, part_info->part_field_list); + else + err+= add_hash(fptr); + break; + default: + DBUG_ASSERT(0); + /* We really shouldn't get here, no use in continuing from here */ + current_thd->fatal_error(); + DBUG_RETURN(NULL); + } + if (part_info->part_expr) + err+= add_string_len(fptr, part_info->part_func_string, + part_info->part_func_len); + err+= add_end_parenthesis(fptr); + err+= add_space(fptr); + if (is_sub_partitioned(part_info)) + { + err+= add_subpartition_by(fptr); + /* Must be hash partitioning for subpartitioning */ + if (part_info->list_of_subpart_fields) + err+= add_key_partition(fptr, part_info->subpart_field_list); + else + err+= add_hash(fptr); + if (part_info->subpart_expr) + err+= add_string_len(fptr, part_info->subpart_func_string, + part_info->subpart_func_len); + err+= add_end_parenthesis(fptr); + err+= add_space(fptr); + } + if (add_default_info) + { + err+= add_begin_parenthesis(fptr); + List_iterator<partition_element> part_it(part_info->partitions); + no_parts= part_info->no_parts; + no_subparts= part_info->no_subparts; + i= 0; + do + { + part_elem= part_it++; + err+= add_partition(fptr); + err+= add_string(fptr, part_elem->partition_name); + err+= add_space(fptr); + err+= add_partition_values(fptr, part_info, part_elem); + if (!is_sub_partitioned(part_info)) + err+= add_partition_options(fptr, part_elem); + if (is_sub_partitioned(part_info)) + { + err+= add_space(fptr); + err+= add_begin_parenthesis(fptr); + List_iterator<partition_element> sub_it(part_elem->subpartitions); + j= 0; + do + { + part_elem= sub_it++; + err+= add_subpartition(fptr); + err+= add_string(fptr, part_elem->partition_name); + err+= add_space(fptr); + err+= add_partition_options(fptr, part_elem); + if (j != (no_subparts-1)) + { + err+= add_comma(fptr); + err+= add_space(fptr); + } + else + err+= add_end_parenthesis(fptr); + } while (++j < no_subparts); + } + if (i != (no_parts-1)) + { + err+= add_comma(fptr); + err+= add_space(fptr); + } + else + err+= add_end_parenthesis(fptr); + } while (++i < no_parts); + } + if (err) + goto close_file; + buffer_length= my_seek(fptr, 0L,MY_SEEK_END,MYF(0)); + if (unlikely(buffer_length == MY_FILEPOS_ERROR)) + goto close_file; + if (unlikely(my_seek(fptr, 0L, MY_SEEK_SET, MYF(0)) == MY_FILEPOS_ERROR)) + goto close_file; + *buf_length= (uint)buffer_length; + if (use_sql_alloc) + buf= sql_alloc(*buf_length+1); + else + buf= my_malloc(*buf_length+1, MYF(MY_WME)); + if (!buf) + goto close_file; + + if (unlikely(my_read(fptr, (byte*)buf, *buf_length, MYF(MY_FNABP)))) + { + if (!use_sql_alloc) + my_free(buf, MYF(0)); + else + buf= NULL; + } + else + buf[*buf_length]= 0; + +close_file: + /* + Delete the file before closing to ensure the file doesn't get synched + to disk unnecessary. We only used the file system as a dynamic array + implementation so we are not really interested in getting the file + present on disk. + This is not possible on Windows so here it has to be done after closing + the file. Also on Unix we delete immediately after opening to ensure no + other process can read the information written into the file. + */ + my_close(fptr, MYF(0)); +#if defined(MSDOS) || defined(__WIN__) || defined(__EMX__) || defined(OS2) + my_delete(file_name, MYF(0)); +#endif + DBUG_RETURN(buf); +} + + +/* + Check if partition key fields are modified and if it can be handled by the + underlying storage engine. + SYNOPSIS + partition_key_modified + table TABLE object for which partition fields are set-up + fields A list of the to be modifed + RETURN VALUES + TRUE Need special handling of UPDATE + FALSE Normal UPDATE handling is ok +*/ + +bool partition_key_modified(TABLE *table, List<Item> &fields) +{ + List_iterator_fast<Item> f(fields); + partition_info *part_info= table->s->part_info; + Item_field *item_field; + DBUG_ENTER("partition_key_modified"); + if (!part_info) + DBUG_RETURN(FALSE); + if (table->file->partition_flags() & HA_CAN_UPDATE_PARTITION_KEY) + DBUG_RETURN(FALSE); + f.rewind(); + while ((item_field=(Item_field*) f++)) + if (item_field->field->flags & FIELD_IN_PART_FUNC_FLAG) + DBUG_RETURN(TRUE); + DBUG_RETURN(FALSE); +} + + +/* + The next set of functions are used to calculate the partition identity. + A handler sets up a variable that corresponds to one of these functions + to be able to quickly call it whenever the partition id needs to calculated + based on the record in table->record[0] (or set up to fake that). + There are 4 functions for hash partitioning and 2 for RANGE/LIST partitions. + In addition there are 4 variants for RANGE subpartitioning and 4 variants + for LIST subpartitioning thus in total there are 14 variants of this + function. + + We have a set of support functions for these 14 variants. There are 4 + variants of hash functions and there is a function for each. The KEY + partitioning uses the function calculate_key_value to calculate the hash + value based on an array of fields. The linear hash variants uses the + method get_part_id_from_linear_hash to get the partition id using the + hash value and some parameters calculated from the number of partitions. +*/ + +/* + Calculate hash value for KEY partitioning using an array of fields. + SYNOPSIS + calculate_key_value() + field_array An array of the fields in KEY partitioning + RETURN VALUE + hash_value calculated + DESCRIPTION + Uses the hash function on the character set of the field. Integer and + floating point fields use the binary character set by default. +*/ + +static uint32 calculate_key_value(Field **field_array) +{ + uint32 hashnr= 0; + ulong nr2= 4; + do + { + Field *field= *field_array; + if (field->is_null()) + { + hashnr^= (hashnr << 1) | 1; + } + else + { + uint len= field->pack_length(); + ulong nr1= 1; + CHARSET_INFO *cs= field->charset(); + cs->coll->hash_sort(cs, (uchar*)field->ptr, len, &nr1, &nr2); + hashnr^= (uint32)nr1; + } + } while (*(++field_array)); + return hashnr; +} + + +/* + A simple support function to calculate part_id given local part and + sub part. + SYNOPSIS + get_part_id_for_sub() + loc_part_id Local partition id + sub_part_id Subpartition id + no_subparts Number of subparts +*/ + +inline +static uint32 get_part_id_for_sub(uint32 loc_part_id, uint32 sub_part_id, + uint no_subparts) +{ + return (uint32)((loc_part_id * no_subparts) + sub_part_id); +} + + +/* + Calculate part_id for (SUB)PARTITION BY HASH + SYNOPSIS + get_part_id_hash() + no_parts Number of hash partitions + part_expr Item tree of hash function + RETURN VALUE + Calculated partition id +*/ + +inline +static uint32 get_part_id_hash(uint no_parts, + Item *part_expr) +{ + DBUG_ENTER("get_part_id_hash"); + DBUG_RETURN((uint32)(part_expr->val_int() % no_parts)); +} + + +/* + Calculate part_id for (SUB)PARTITION BY LINEAR HASH + SYNOPSIS + get_part_id_linear_hash() + part_info A reference to the partition_info struct where all the + desired information is given + no_parts Number of hash partitions + part_expr Item tree of hash function + RETURN VALUE + Calculated partition id +*/ + +inline +static uint32 get_part_id_linear_hash(partition_info *part_info, + uint no_parts, + Item *part_expr) +{ + DBUG_ENTER("get_part_id_linear_hash"); + DBUG_RETURN(get_part_id_from_linear_hash(part_expr->val_int(), + part_info->linear_hash_mask, + no_parts)); +} + + +/* + Calculate part_id for (SUB)PARTITION BY KEY + SYNOPSIS + get_part_id_key() + field_array Array of fields for PARTTION KEY + no_parts Number of KEY partitions + RETURN VALUE + Calculated partition id +*/ + +inline +static uint32 get_part_id_key(Field **field_array, + uint no_parts) +{ + DBUG_ENTER("get_part_id_key"); + DBUG_RETURN(calculate_key_value(field_array) % no_parts); +} + + +/* + Calculate part_id for (SUB)PARTITION BY LINEAR KEY + SYNOPSIS + get_part_id_linear_key() + part_info A reference to the partition_info struct where all the + desired information is given + field_array Array of fields for PARTTION KEY + no_parts Number of KEY partitions + RETURN VALUE + Calculated partition id +*/ + +inline +static uint32 get_part_id_linear_key(partition_info *part_info, + Field **field_array, + uint no_parts) +{ + DBUG_ENTER("get_partition_id_linear_key"); + DBUG_RETURN(get_part_id_from_linear_hash(calculate_key_value(field_array), + part_info->linear_hash_mask, + no_parts)); +} + +/* + This function is used to calculate the partition id where all partition + fields have been prepared to point to a record where the partition field + values are bound. + SYNOPSIS + get_partition_id() + part_info A reference to the partition_info struct where all the + desired information is given + part_id The partition id is returned through this pointer + RETURN VALUE + part_id + return TRUE means that the fields of the partition function didn't fit + into any partition and thus the values of the PF-fields are not allowed. + DESCRIPTION + A routine used from write_row, update_row and delete_row from any + handler supporting partitioning. It is also a support routine for + get_partition_set used to find the set of partitions needed to scan + for a certain index scan or full table scan. + + It is actually 14 different variants of this function which are called + through a function pointer. + + get_partition_id_list + get_partition_id_range + get_partition_id_hash_nosub + get_partition_id_key_nosub + get_partition_id_linear_hash_nosub + get_partition_id_linear_key_nosub + get_partition_id_range_sub_hash + get_partition_id_range_sub_key + get_partition_id_range_sub_linear_hash + get_partition_id_range_sub_linear_key + get_partition_id_list_sub_hash + get_partition_id_list_sub_key + get_partition_id_list_sub_linear_hash + get_partition_id_list_sub_linear_key +*/ + +/* + This function is used to calculate the main partition to use in the case of + subpartitioning and we don't know enough to get the partition identity in + total. + SYNOPSIS + get_part_partition_id() + part_info A reference to the partition_info struct where all the + desired information is given + part_id The partition id is returned through this pointer + RETURN VALUE + part_id + return TRUE means that the fields of the partition function didn't fit + into any partition and thus the values of the PF-fields are not allowed. + DESCRIPTION + + It is actually 6 different variants of this function which are called + through a function pointer. + + get_partition_id_list + get_partition_id_range + get_partition_id_hash_nosub + get_partition_id_key_nosub + get_partition_id_linear_hash_nosub + get_partition_id_linear_key_nosub +*/ + + +bool get_partition_id_list(partition_info *part_info, + uint32 *part_id) +{ + DBUG_ENTER("get_partition_id_list"); + LIST_PART_ENTRY *list_array= part_info->list_array; + uint list_index; + longlong list_value; + uint min_list_index= 0, max_list_index= part_info->no_list_values - 1; + longlong part_func_value= part_info->part_expr->val_int(); + while (max_list_index >= min_list_index) + { + list_index= (max_list_index + min_list_index) >> 1; + list_value= list_array[list_index].list_value; + if (list_value < part_func_value) + min_list_index= list_index + 1; + else if (list_value > part_func_value) + max_list_index= list_index - 1; + else { + *part_id= (uint32)list_array[list_index].partition_id; + DBUG_RETURN(FALSE); + } + } + *part_id= 0; + DBUG_RETURN(TRUE); +} + + +bool get_partition_id_range(partition_info *part_info, + uint32 *part_id) +{ + DBUG_ENTER("get_partition_id_int_range"); + longlong *range_array= part_info->range_int_array; + uint max_partition= part_info->no_parts - 1; + uint min_part_id= 0, max_part_id= max_partition, loc_part_id; + longlong part_func_value= part_info->part_expr->val_int(); + while (max_part_id > min_part_id) + { + loc_part_id= (max_part_id + min_part_id + 1) >> 1; + if (range_array[loc_part_id] < part_func_value) + min_part_id= loc_part_id + 1; + else + max_part_id= loc_part_id - 1; + } + loc_part_id= max_part_id; + if (part_func_value >= range_array[loc_part_id]) + if (loc_part_id != max_partition) + loc_part_id++; + *part_id= (uint32)loc_part_id; + if (loc_part_id == max_partition) + if (range_array[loc_part_id] != LONGLONG_MAX) + if (part_func_value >= range_array[loc_part_id]) + DBUG_RETURN(TRUE); + DBUG_RETURN(FALSE); +} + +bool get_partition_id_hash_nosub(partition_info *part_info, + uint32 *part_id) +{ + *part_id= get_part_id_hash(part_info->no_parts, part_info->part_expr); + return FALSE; +} + + +bool get_partition_id_linear_hash_nosub(partition_info *part_info, + uint32 *part_id) +{ + *part_id= get_part_id_linear_hash(part_info, part_info->no_parts, + part_info->part_expr); + return FALSE; +} + + +bool get_partition_id_key_nosub(partition_info *part_info, + uint32 *part_id) +{ + *part_id= get_part_id_key(part_info->part_field_array, part_info->no_parts); + return FALSE; +} + + +bool get_partition_id_linear_key_nosub(partition_info *part_info, + uint32 *part_id) +{ + *part_id= get_part_id_linear_key(part_info, + part_info->part_field_array, + part_info->no_parts); + return FALSE; +} + + +bool get_partition_id_range_sub_hash(partition_info *part_info, + uint32 *part_id) +{ + uint32 loc_part_id, sub_part_id; + uint no_subparts; + DBUG_ENTER("get_partition_id_range_sub_hash"); + if (unlikely(get_partition_id_range(part_info, &loc_part_id))) + { + DBUG_RETURN(TRUE); + } + no_subparts= part_info->no_subparts; + sub_part_id= get_part_id_hash(no_subparts, part_info->subpart_expr); + *part_id= get_part_id_for_sub(loc_part_id, sub_part_id, no_subparts); + DBUG_RETURN(FALSE); +} + + +bool get_partition_id_range_sub_linear_hash(partition_info *part_info, + uint32 *part_id) +{ + uint32 loc_part_id, sub_part_id; + uint no_subparts; + DBUG_ENTER("get_partition_id_range_sub_linear_hash"); + if (unlikely(get_partition_id_range(part_info, &loc_part_id))) + { + DBUG_RETURN(TRUE); + } + no_subparts= part_info->no_subparts; + sub_part_id= get_part_id_linear_hash(part_info, no_subparts, + part_info->subpart_expr); + *part_id= get_part_id_for_sub(loc_part_id, sub_part_id, no_subparts); + DBUG_RETURN(FALSE); +} + + +bool get_partition_id_range_sub_key(partition_info *part_info, + uint32 *part_id) +{ + uint32 loc_part_id, sub_part_id; + uint no_subparts; + DBUG_ENTER("get_partition_id_range_sub_key"); + if (unlikely(get_partition_id_range(part_info, &loc_part_id))) + { + DBUG_RETURN(TRUE); + } + no_subparts= part_info->no_subparts; + sub_part_id= get_part_id_key(part_info->subpart_field_array, no_subparts); + *part_id= get_part_id_for_sub(loc_part_id, sub_part_id, no_subparts); + DBUG_RETURN(FALSE); +} + + +bool get_partition_id_range_sub_linear_key(partition_info *part_info, + uint32 *part_id) +{ + uint32 loc_part_id, sub_part_id; + uint no_subparts; + DBUG_ENTER("get_partition_id_range_sub_linear_key"); + if (unlikely(get_partition_id_range(part_info, &loc_part_id))) + { + DBUG_RETURN(TRUE); + } + no_subparts= part_info->no_subparts; + sub_part_id= get_part_id_linear_key(part_info, + part_info->subpart_field_array, + no_subparts); + *part_id= get_part_id_for_sub(loc_part_id, sub_part_id, no_subparts); + DBUG_RETURN(FALSE); +} + + +bool get_partition_id_list_sub_hash(partition_info *part_info, + uint32 *part_id) +{ + uint32 loc_part_id, sub_part_id; + uint no_subparts; + DBUG_ENTER("get_partition_id_list_sub_hash"); + if (unlikely(get_partition_id_list(part_info, &loc_part_id))) + { + DBUG_RETURN(TRUE); + } + no_subparts= part_info->no_subparts; + sub_part_id= get_part_id_hash(no_subparts, part_info->subpart_expr); + *part_id= get_part_id_for_sub(loc_part_id, sub_part_id, no_subparts); + DBUG_RETURN(FALSE); +} + + +bool get_partition_id_list_sub_linear_hash(partition_info *part_info, + uint32 *part_id) +{ + uint32 loc_part_id, sub_part_id; + uint no_subparts; + DBUG_ENTER("get_partition_id_list_sub_linear_hash"); + if (unlikely(get_partition_id_list(part_info, &loc_part_id))) + { + DBUG_RETURN(TRUE); + } + no_subparts= part_info->no_subparts; + sub_part_id= get_part_id_hash(no_subparts, part_info->subpart_expr); + *part_id= get_part_id_for_sub(loc_part_id, sub_part_id, no_subparts); + DBUG_RETURN(FALSE); +} + + +bool get_partition_id_list_sub_key(partition_info *part_info, + uint32 *part_id) +{ + uint32 loc_part_id, sub_part_id; + uint no_subparts; + DBUG_ENTER("get_partition_id_range_sub_key"); + if (unlikely(get_partition_id_list(part_info, &loc_part_id))) + { + DBUG_RETURN(TRUE); + } + no_subparts= part_info->no_subparts; + sub_part_id= get_part_id_key(part_info->subpart_field_array, no_subparts); + *part_id= get_part_id_for_sub(loc_part_id, sub_part_id, no_subparts); + DBUG_RETURN(FALSE); +} + + +bool get_partition_id_list_sub_linear_key(partition_info *part_info, + uint32 *part_id) +{ + uint32 loc_part_id, sub_part_id; + uint no_subparts; + DBUG_ENTER("get_partition_id_list_sub_linear_key"); + if (unlikely(get_partition_id_list(part_info, &loc_part_id))) + { + DBUG_RETURN(TRUE); + } + no_subparts= part_info->no_subparts; + sub_part_id= get_part_id_linear_key(part_info, + part_info->subpart_field_array, + no_subparts); + *part_id= get_part_id_for_sub(loc_part_id, sub_part_id, no_subparts); + DBUG_RETURN(FALSE); +} + + +/* + This function is used to calculate the subpartition id + SYNOPSIS + get_subpartition_id() + part_info A reference to the partition_info struct where all the + desired information is given + RETURN VALUE + part_id + The subpartition identity + DESCRIPTION + A routine used in some SELECT's when only partial knowledge of the + partitions is known. + + It is actually 4 different variants of this function which are called + through a function pointer. + + get_partition_id_hash_sub + get_partition_id_key_sub + get_partition_id_linear_hash_sub + get_partition_id_linear_key_sub +*/ + +uint32 get_partition_id_hash_sub(partition_info *part_info) +{ + return get_part_id_hash(part_info->no_subparts, part_info->subpart_expr); +} + + +uint32 get_partition_id_linear_hash_sub(partition_info *part_info) +{ + return get_part_id_linear_hash(part_info, part_info->no_subparts, + part_info->subpart_expr); +} + + +uint32 get_partition_id_key_sub(partition_info *part_info) +{ + return get_part_id_key(part_info->subpart_field_array, + part_info->no_subparts); +} + + +uint32 get_partition_id_linear_key_sub(partition_info *part_info) +{ + return get_part_id_linear_key(part_info, + part_info->subpart_field_array, + part_info->no_subparts); +} + + +/* + Set an indicator on all partition fields that are set by the key + SYNOPSIS + set_PF_fields_in_key() + key_info Information about the index + key_length Length of key + RETURN VALUE + TRUE Found partition field set by key + FALSE No partition field set by key +*/ + +static bool set_PF_fields_in_key(KEY *key_info, uint key_length) +{ + KEY_PART_INFO *key_part; + bool found_part_field= FALSE; + DBUG_ENTER("set_PF_fields_in_key"); + + for (key_part= key_info->key_part; (int)key_length > 0; key_part++) + { + if (key_part->null_bit) + key_length--; + if (key_part->type == HA_KEYTYPE_BIT) + { + if (((Field_bit*)key_part->field)->bit_len) + key_length--; + } + if (key_part->key_part_flag & (HA_BLOB_PART + HA_VAR_LENGTH_PART)) + { + key_length-= HA_KEY_BLOB_LENGTH; + } + if (key_length < key_part->length) + break; + key_length-= key_part->length; + if (key_part->field->flags & FIELD_IN_PART_FUNC_FLAG) + { + found_part_field= TRUE; + key_part->field->flags|= GET_FIXED_FIELDS_FLAG; + } + } + DBUG_RETURN(found_part_field); +} + + +/* + We have found that at least one partition field was set by a key, now + check if a partition function has all its fields bound or not. + SYNOPSIS + check_part_func_bound() + ptr Array of fields NULL terminated (partition fields) + RETURN VALUE + TRUE All fields in partition function are set + FALSE Not all fields in partition function are set +*/ + +static bool check_part_func_bound(Field **ptr) +{ + bool result= TRUE; + DBUG_ENTER("check_part_func_bound"); + + for (; *ptr; ptr++) + { + if (!((*ptr)->flags & GET_FIXED_FIELDS_FLAG)) + { + result= FALSE; + break; + } + } + DBUG_RETURN(result); +} + + +/* + Get the id of the subpartitioning part by using the key buffer of the + index scan. + SYNOPSIS + get_sub_part_id_from_key() + table The table object + buf A buffer that can be used to evaluate the partition function + key_info The index object + key_spec A key_range containing key and key length + RETURN VALUES + part_id Subpartition id to use + DESCRIPTION + Use key buffer to set-up record in buf, move field pointers and + get the partition identity and restore field pointers afterwards. +*/ + +static uint32 get_sub_part_id_from_key(const TABLE *table,byte *buf, + KEY *key_info, + const key_range *key_spec) +{ + byte *rec0= table->record[0]; + partition_info *part_info= table->s->part_info; + uint32 part_id; + DBUG_ENTER("get_sub_part_id_from_key"); + + key_restore(buf, (byte*)key_spec->key, key_info, key_spec->length); + if (likely(rec0 == buf)) + part_id= part_info->get_subpartition_id(part_info); + else + { + Field **part_field_array= part_info->subpart_field_array; + set_field_ptr(part_field_array, buf, rec0); + part_id= part_info->get_subpartition_id(part_info); + set_field_ptr(part_field_array, rec0, buf); + } + DBUG_RETURN(part_id); +} + +/* + Get the id of the partitioning part by using the key buffer of the + index scan. + SYNOPSIS + get_part_id_from_key() + table The table object + buf A buffer that can be used to evaluate the partition function + key_info The index object + key_spec A key_range containing key and key length + part_id Partition to use + RETURN VALUES + TRUE Partition to use not found + FALSE Ok, part_id indicates partition to use + DESCRIPTION + Use key buffer to set-up record in buf, move field pointers and + get the partition identity and restore field pointers afterwards. +*/ +bool get_part_id_from_key(const TABLE *table, byte *buf, KEY *key_info, + const key_range *key_spec, uint32 *part_id) +{ + bool result; + byte *rec0= table->record[0]; + partition_info *part_info= table->s->part_info; + DBUG_ENTER("get_part_id_from_key"); + + key_restore(buf, (byte*)key_spec->key, key_info, key_spec->length); + if (likely(rec0 == buf)) + result= part_info->get_part_partition_id(part_info, part_id); + else + { + Field **part_field_array= part_info->part_field_array; + set_field_ptr(part_field_array, buf, rec0); + result= part_info->get_part_partition_id(part_info, part_id); + set_field_ptr(part_field_array, rec0, buf); + } + DBUG_RETURN(result); +} + +/* + Get the partitioning id of the full PF by using the key buffer of the + index scan. + SYNOPSIS + get_full_part_id_from_key() + table The table object + buf A buffer that is used to evaluate the partition function + key_info The index object + key_spec A key_range containing key and key length + part_spec A partition id containing start part and end part + RETURN VALUES + part_spec + No partitions to scan is indicated by end_part > start_part when returning + DESCRIPTION + Use key buffer to set-up record in buf, move field pointers if needed and + get the partition identity and restore field pointers afterwards. +*/ + +void get_full_part_id_from_key(const TABLE *table, byte *buf, + KEY *key_info, + const key_range *key_spec, + part_id_range *part_spec) +{ + bool result; + partition_info *part_info= table->s->part_info; + byte *rec0= table->record[0]; + DBUG_ENTER("get_full_part_id_from_key"); + + key_restore(buf, (byte*)key_spec->key, key_info, key_spec->length); + if (likely(rec0 == buf)) + result= part_info->get_partition_id(part_info, &part_spec->start_part); + else + { + Field **part_field_array= part_info->full_part_field_array; + set_field_ptr(part_field_array, buf, rec0); + result= part_info->get_partition_id(part_info, &part_spec->start_part); + set_field_ptr(part_field_array, rec0, buf); + } + part_spec->end_part= part_spec->start_part; + if (unlikely(result)) + part_spec->start_part++; + DBUG_VOID_RETURN; +} + +/* + Get the set of partitions to use in query. + SYNOPSIS + get_partition_set() + table The table object + buf A buffer that can be used to evaluate the partition function + index The index of the key used, if MAX_KEY no index used + key_spec A key_range containing key and key length + part_spec Contains start part, end part and indicator if bitmap is + used for which partitions to scan + DESCRIPTION + This function is called to discover which partitions to use in an index + scan or a full table scan. + It returns a range of partitions to scan. If there are holes in this + range with partitions that are not needed to scan a bit array is used + to signal which partitions to use and which not to use. + If start_part > end_part at return it means no partition needs to be + scanned. If start_part == end_part it always means a single partition + needs to be scanned. + RETURN VALUE + part_spec +*/ +void get_partition_set(const TABLE *table, byte *buf, const uint index, + const key_range *key_spec, part_id_range *part_spec) +{ + partition_info *part_info= table->s->part_info; + uint no_parts= get_tot_partitions(part_info), i, part_id; + uint sub_part= no_parts; + uint32 part_part= no_parts; + KEY *key_info= NULL; + bool found_part_field= FALSE; + DBUG_ENTER("get_partition_set"); + + part_spec->use_bit_array= FALSE; + part_spec->start_part= 0; + part_spec->end_part= no_parts - 1; + if ((index < MAX_KEY) && + key_spec->flag == (uint)HA_READ_KEY_EXACT && + part_info->some_fields_in_PF.is_set(index)) + { + key_info= table->key_info+index; + /* + The index can potentially provide at least one PF-field (field in the + partition function). Thus it is interesting to continue our probe. + */ + if (key_spec->length == key_info->key_length) + { + /* + The entire key is set so we can check whether we can immediately + derive either the complete PF or if we can derive either + the top PF or the subpartitioning PF. This can be established by + checking precalculated bits on each index. + */ + if (part_info->all_fields_in_PF.is_set(index)) + { + /* + We can derive the exact partition to use, no more than this one + is needed. + */ + get_full_part_id_from_key(table,buf,key_info,key_spec,part_spec); + DBUG_VOID_RETURN; + } + else if (is_sub_partitioned(part_info)) + { + if (part_info->all_fields_in_SPF.is_set(index)) + sub_part= get_sub_part_id_from_key(table, buf, key_info, key_spec); + else if (part_info->all_fields_in_PPF.is_set(index)) + { + if (get_part_id_from_key(table,buf,key_info,key_spec,(uint32*)&part_part)) + { + /* + The value of the RANGE or LIST partitioning was outside of + allowed values. Thus it is certain that the result of this + scan will be empty. + */ + part_spec->start_part= no_parts; + DBUG_VOID_RETURN; + } + } + } + } + else + { + /* + Set an indicator on all partition fields that are bound. + If at least one PF-field was bound it pays off to check whether + the PF or PPF or SPF has been bound. + (PF = Partition Function, SPF = Subpartition Function and + PPF = Partition Function part of subpartitioning) + */ + if ((found_part_field= set_PF_fields_in_key(key_info, + key_spec->length))) + { + if (check_part_func_bound(part_info->full_part_field_array)) + { + /* + We were able to bind all fields in the partition function even + by using only a part of the key. Calculate the partition to use. + */ + get_full_part_id_from_key(table,buf,key_info,key_spec,part_spec); + clear_indicator_in_key_fields(key_info); + DBUG_VOID_RETURN; + } + else if (check_part_func_bound(part_info->part_field_array)) + sub_part= get_sub_part_id_from_key(table, buf, key_info, key_spec); + else if (check_part_func_bound(part_info->subpart_field_array)) + { + if (get_part_id_from_key(table,buf,key_info,key_spec,(uint32*)&part_part)) + { + part_spec->start_part= no_parts; + clear_indicator_in_key_fields(key_info); + DBUG_VOID_RETURN; + } + } + } + } + } + { + /* + The next step is to analyse the table condition to see whether any + information about which partitions to scan can be derived from there. + Currently not implemented. + */ + } + /* + If we come here we have found a range of sorts we have either discovered + nothing or we have discovered a range of partitions with possible holes + in it. We need a bitvector to further the work here. + */ + if (!(part_part == no_parts && sub_part == no_parts)) + { + /* + We can only arrive here if we are using subpartitioning. + */ + if (part_part != no_parts) + { + /* + We know the top partition and need to scan all underlying + subpartitions. This is a range without holes. + */ + DBUG_ASSERT(sub_part == no_parts); + part_spec->start_part= part_part * part_info->no_parts; + part_spec->end_part= part_spec->start_part+part_info->no_subparts - 1; + } + else + { + DBUG_ASSERT(sub_part != no_parts); + part_spec->use_bit_array= TRUE; + part_spec->start_part= sub_part; + part_spec->end_part=sub_part+ + (part_info->no_subparts*(part_info->no_parts-1)); + for (i= 0, part_id= sub_part; i < part_info->no_parts; + i++, part_id+= part_info->no_subparts) + ; //Set bit part_id in bit array + } + } + if (found_part_field) + clear_indicator_in_key_fields(key_info); + DBUG_VOID_RETURN; +} + + +/* + If the table is partitioned we will read the partition info into the + .frm file here. + ------------------------------- + | Fileinfo 64 bytes | + ------------------------------- + | Formnames 7 bytes | + ------------------------------- + | Not used 4021 bytes | + ------------------------------- + | Keyinfo + record | + ------------------------------- + | Padded to next multiple | + | of IO_SIZE | + ------------------------------- + | Forminfo 288 bytes | + ------------------------------- + | Screen buffer, to make | + | field names readable | + ------------------------------- + | Packed field info | + | 17 + 1 + strlen(field_name) | + | + 1 end of file character | + ------------------------------- + | Partition info | + ------------------------------- + We provide the length of partition length in Fileinfo[55-58]. + + Read the partition syntax from the frm file and parse it to get the + data structures of the partitioning. + SYNOPSIS + mysql_unpack_partition() + file File reference of frm file + thd Thread object + part_info_len Length of partition syntax + table Table object of partitioned table + RETURN VALUE + TRUE Error + FALSE Sucess + DESCRIPTION + Read the partition syntax from the current position in the frm file. + Initiate a LEX object, save the list of item tree objects to free after + the query is done. Set-up partition info object such that parser knows + it is called from internally. Call parser to create data structures + (best possible recreation of item trees and so forth since there is no + serialisation of these objects other than in parseable text format). + We need to save the text of the partition functions since it is not + possible to retrace this given an item tree. +*/ + +bool mysql_unpack_partition(THD *thd, uchar *part_buf, uint part_info_len, + TABLE* table, enum db_type default_db_type) +{ + Item *thd_free_list= thd->free_list; + bool result= TRUE; + partition_info *part_info; + LEX *old_lex= thd->lex, lex; + DBUG_ENTER("mysql_unpack_partition"); + thd->lex= &lex; + lex_start(thd, part_buf, part_info_len); + /* + We need to use the current SELECT_LEX since I need to keep the + Name_resolution_context object which is referenced from the + Item_field objects. + This is not a nice solution since if the parser uses current_select + for anything else it will corrupt the current LEX object. + */ + thd->lex->current_select= old_lex->current_select; + /* + All Items created is put into a free list on the THD object. This list + is used to free all Item objects after completing a query. We don't + want that to happen with the Item tree created as part of the partition + info. This should be attached to the table object and remain so until + the table object is released. + Thus we move away the current list temporarily and start a new list that + we then save in the partition info structure. + */ + thd->free_list= NULL; + lex.part_info= (partition_info*)1; //Indicate yyparse from this place + if (yyparse((void*)thd) || thd->is_fatal_error) + { + free_items(thd->free_list); + goto end; + } + part_info= lex.part_info; + table->s->part_info= part_info; + if (part_info->default_engine_type == DB_TYPE_UNKNOWN) + part_info->default_engine_type= default_db_type; + else + { + DBUG_ASSERT(part_info->default_engine_type == default_db_type); + } + part_info->item_free_list= thd->free_list; + + { + /* + This code part allocates memory for the serialised item information for + the partition functions. In most cases this is not needed but if the + table is used for SHOW CREATE TABLES or ALTER TABLE that modifies + partition information it is needed and the info is lost if we don't + save it here so unfortunately we have to do it here even if in most + cases it is not needed. This is a consequence of that item trees are + not serialisable. + */ + uint part_func_len= part_info->part_func_len; + uint subpart_func_len= part_info->subpart_func_len; + char *part_func_string, *subpart_func_string= NULL; + if (!((part_func_string= sql_alloc(part_func_len))) || + (subpart_func_len && + !((subpart_func_string= sql_alloc(subpart_func_len))))) + { + my_error(ER_OUTOFMEMORY, MYF(0), part_func_len); + free_items(thd->free_list); + part_info->item_free_list= 0; + goto end; + } + memcpy(part_func_string, part_info->part_func_string, part_func_len); + if (subpart_func_len) + memcpy(subpart_func_string, part_info->subpart_func_string, + subpart_func_len); + part_info->part_func_string= part_func_string; + part_info->subpart_func_string= subpart_func_string; + } + + result= FALSE; +end: + thd->free_list= thd_free_list; + thd->lex= old_lex; + DBUG_RETURN(result); +} +#endif + +/* + Prepare for calling val_int on partition function by setting fields to + point to the record where the values of the PF-fields are stored. + SYNOPSIS + set_field_ptr() + ptr Array of fields to change ptr + new_buf New record pointer + old_buf Old record pointer + DESCRIPTION + Set ptr in field objects of field array to refer to new_buf record + instead of previously old_buf. Used before calling val_int and after + it is used to restore pointers to table->record[0]. + This routine is placed outside of partition code since it can be useful + also for other programs. +*/ + +void set_field_ptr(Field **ptr, const byte *new_buf, + const byte *old_buf) +{ + my_ptrdiff_t diff= (new_buf - old_buf); + DBUG_ENTER("set_nullable_field_ptr"); + + do + { + (*ptr)->move_field(diff); + } while (*(++ptr)); + DBUG_VOID_RETURN; +} + + +/* + Prepare for calling val_int on partition function by setting fields to + point to the record where the values of the PF-fields are stored. + This variant works on a key_part reference. + It is not required that all fields are NOT NULL fields. + SYNOPSIS + set_key_field_ptr() + key_part key part with a set of fields to change ptr + new_buf New record pointer + old_buf Old record pointer + DESCRIPTION + Set ptr in field objects of field array to refer to new_buf record + instead of previously old_buf. Used before calling val_int and after + it is used to restore pointers to table->record[0]. + This routine is placed outside of partition code since it can be useful + also for other programs. +*/ + +void set_key_field_ptr(KEY *key_info, const byte *new_buf, + const byte *old_buf) +{ + KEY_PART_INFO *key_part= key_info->key_part; + uint key_parts= key_info->key_parts, i= 0; + my_ptrdiff_t diff= (new_buf - old_buf); + DBUG_ENTER("set_key_field_ptr"); + + do + { + key_part->field->move_field(diff); + key_part++; + } while (++i < key_parts); + DBUG_VOID_RETURN; +} diff --git a/sql/sql_plugin.cc b/sql/sql_plugin.cc new file mode 100644 index 00000000000..efe8f256af5 --- /dev/null +++ b/sql/sql_plugin.cc @@ -0,0 +1,622 @@ +/* Copyright (C) 2005 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "mysql_priv.h" +#include <my_pthread.h> +#define REPORT_TO_LOG 1 +#define REPORT_TO_USER 2 + +char *opt_plugin_dir_ptr; +char opt_plugin_dir[FN_REFLEN]; + +static const char *plugin_interface_version_sym= + "_mysql_plugin_interface_version_"; +static const char *plugin_declarations_sym= "_mysql_plugin_declarations_"; +static int min_plugin_interface_version= 0x0000; + +static DYNAMIC_ARRAY plugin_dl_array; +static DYNAMIC_ARRAY plugin_array; +static HASH plugin_hash[MYSQL_MAX_PLUGIN_TYPE_NUM]; +static rw_lock_t THR_LOCK_plugin; +static bool initialized= 0; + + +static struct st_plugin_dl *plugin_dl_find(LEX_STRING *dl) +{ + uint i; + DBUG_ENTER("plugin_dl_find"); + for (i= 0; i < plugin_dl_array.elements; i++) + { + struct st_plugin_dl *tmp= dynamic_element(&plugin_dl_array, i, + struct st_plugin_dl *); + if (tmp->ref_count && + ! my_strnncoll(files_charset_info, + (const uchar *)dl->str, dl->length, + (const uchar *)tmp->dl.str, tmp->dl.length)) + DBUG_RETURN(tmp); + } + DBUG_RETURN(0); +} + + +static st_plugin_dl *plugin_dl_add(LEX_STRING *dl, int report) +{ +#ifdef HAVE_DLOPEN + char dlpath[FN_REFLEN]; + uint plugin_dir_len, dummy_errors; + struct st_plugin_dl *tmp, plugin_dl; + void *sym; + DBUG_ENTER("plugin_dl_add"); + plugin_dir_len= strlen(opt_plugin_dir); + /* + Ensure that the dll doesn't have a path. + This is done to ensure that only approved libraries from the + plugin directory are used (to make this even remotely secure). + */ + if (my_strchr(files_charset_info, dl->str, dl->str + dl->length, FN_LIBCHAR) || + dl->length > NAME_LEN || + plugin_dir_len + dl->length + 1 >= FN_REFLEN) + { + if (report & REPORT_TO_USER) + my_error(ER_UDF_NO_PATHS, MYF(0)); + if (report & REPORT_TO_LOG) + sql_print_error(ER(ER_UDF_NO_PATHS)); + DBUG_RETURN(0); + } + /* If this dll is already loaded just increase ref_count. */ + if ((tmp= plugin_dl_find(dl))) + { + tmp->ref_count++; + DBUG_RETURN(tmp); + } + /* Compile dll path */ + strxnmov(dlpath, sizeof(dlpath) - 1, opt_plugin_dir, "/", dl->str, NullS); + plugin_dl.ref_count= 1; + /* Open new dll handle */ + if (!(plugin_dl.handle= dlopen(dlpath, RTLD_NOW))) + { + if (report & REPORT_TO_USER) + my_error(ER_CANT_OPEN_LIBRARY, MYF(0), dlpath, errno, dlerror()); + if (report & REPORT_TO_LOG) + sql_print_error(ER(ER_CANT_OPEN_LIBRARY), dlpath, errno, dlerror()); + DBUG_RETURN(0); + } + /* Determine interface version */ + if (!(sym= dlsym(plugin_dl.handle, plugin_interface_version_sym))) + { + dlclose(plugin_dl.handle); + if (report & REPORT_TO_USER) + my_error(ER_CANT_FIND_DL_ENTRY, MYF(0), plugin_interface_version_sym); + if (report & REPORT_TO_LOG) + sql_print_error(ER(ER_CANT_FIND_DL_ENTRY), plugin_interface_version_sym); + DBUG_RETURN(0); + } + plugin_dl.version= *(int *)sym; + /* Versioning */ + if (plugin_dl.version < min_plugin_interface_version || + (plugin_dl.version >> 8) > (MYSQL_PLUGIN_INTERFACE_VERSION >> 8)) + { + dlclose(plugin_dl.handle); + if (report & REPORT_TO_USER) + my_error(ER_CANT_OPEN_LIBRARY, MYF(0), dlpath, 0, + "plugin interface version mismatch"); + if (report & REPORT_TO_LOG) + sql_print_error(ER(ER_CANT_OPEN_LIBRARY), dlpath, 0, + "plugin interface version mismatch"); + DBUG_RETURN(0); + } + /* Find plugin declarations */ + if (!(sym= dlsym(plugin_dl.handle, plugin_declarations_sym))) + { + dlclose(plugin_dl.handle); + if (report & REPORT_TO_USER) + my_error(ER_CANT_FIND_DL_ENTRY, MYF(0), plugin_declarations_sym); + if (report & REPORT_TO_LOG) + sql_print_error(ER(ER_CANT_FIND_DL_ENTRY), plugin_declarations_sym); + DBUG_RETURN(0); + } + plugin_dl.plugins= (struct st_mysql_plugin *)sym; + /* Duplicate and convert dll name */ + plugin_dl.dl.length= dl->length * files_charset_info->mbmaxlen + 1; + if (! (plugin_dl.dl.str= my_malloc(plugin_dl.dl.length, MYF(0)))) + { + dlclose(plugin_dl.handle); + if (report & REPORT_TO_USER) + my_error(ER_OUTOFMEMORY, MYF(0), plugin_dl.dl.length); + if (report & REPORT_TO_LOG) + sql_print_error(ER(ER_OUTOFMEMORY), plugin_dl.dl.length); + DBUG_RETURN(0); + } + plugin_dl.dl.length= copy_and_convert(plugin_dl.dl.str, plugin_dl.dl.length, + files_charset_info, dl->str, dl->length, system_charset_info, + &dummy_errors); + plugin_dl.dl.str[plugin_dl.dl.length]= 0; + /* Add this dll to array */ + if (insert_dynamic(&plugin_dl_array, (gptr)&plugin_dl)) + { + dlclose(plugin_dl.handle); + my_free(plugin_dl.dl.str, MYF(0)); + if (report & REPORT_TO_USER) + my_error(ER_OUTOFMEMORY, MYF(0), sizeof(struct st_plugin_dl)); + if (report & REPORT_TO_LOG) + sql_print_error(ER(ER_OUTOFMEMORY), sizeof(struct st_plugin_dl)); + DBUG_RETURN(0); + } + DBUG_RETURN(dynamic_element(&plugin_dl_array, plugin_dl_array.elements - 1, + struct st_plugin_dl *)); +#else + DBUG_ENTER("plugin_dl_add"); + if (report & REPORT_TO_USER) + my_error(ER_FEATURE_DISABLED, MYF(0), "plugin", "HAVE_DLOPEN"); + if (report & REPORT_TO_LOG) + sql_print_error(ER(ER_FEATURE_DISABLED), "plugin", "HAVE_DLOPEN"); + DBUG_RETURN(0); +#endif +} + + +static void plugin_dl_del(LEX_STRING *dl) +{ +#ifdef HAVE_DLOPEN + uint i; + DBUG_ENTER("plugin_dl_del"); + for (i= 0; i < plugin_dl_array.elements; i++) + { + struct st_plugin_dl *tmp= dynamic_element(&plugin_dl_array, i, + struct st_plugin_dl *); + if (tmp->ref_count && + ! my_strnncoll(files_charset_info, + (const uchar *)dl->str, dl->length, + (const uchar *)tmp->dl.str, tmp->dl.length)) + { + /* Do not remove this element, unless no other plugin uses this dll. */ + if (! --tmp->ref_count) + { + dlclose(tmp->handle); + my_free(tmp->dl.str, MYF(0)); + bzero(tmp, sizeof(struct st_plugin_dl)); + } + break; + } + } + DBUG_VOID_RETURN; +#endif +} + + +static struct st_plugin_int *plugin_find_internal(LEX_STRING *name, int type) +{ + uint i; + DBUG_ENTER("plugin_find_internal"); + if (! initialized) + DBUG_RETURN(0); + if (type == MYSQL_ANY_PLUGIN) + { + for (i= 0; i < MYSQL_MAX_PLUGIN_TYPE_NUM; i++) + { + struct st_plugin_int *plugin= (st_plugin_int *) + hash_search(&plugin_hash[i], name->str, name->length); + if (plugin) + DBUG_RETURN(plugin); + } + } + else + DBUG_RETURN((st_plugin_int *) + hash_search(&plugin_hash[type], name->str, name->length)); + DBUG_RETURN(0); +} + + +my_bool plugin_is_ready(LEX_STRING *name, int type) +{ + my_bool rc= FALSE; + struct st_plugin_int *plugin; + DBUG_ENTER("plugin_is_ready"); + rw_rdlock(&THR_LOCK_plugin); + if ((plugin= plugin_find_internal(name, type)) && + plugin->state == PLUGIN_IS_READY) + rc= TRUE; + rw_unlock(&THR_LOCK_plugin); + DBUG_RETURN(rc); +} + + +struct st_plugin_int *plugin_lock(LEX_STRING *name, int type) +{ + struct st_plugin_int *rc; + DBUG_ENTER("plugin_find"); + rw_wrlock(&THR_LOCK_plugin); + if ((rc= plugin_find_internal(name, type))) + { + if (rc->state == PLUGIN_IS_READY) + rc->ref_count++; + else + rc= 0; + } + rw_unlock(&THR_LOCK_plugin); + DBUG_RETURN(rc); +} + + +static my_bool plugin_add(LEX_STRING *name, LEX_STRING *dl, int report) +{ + struct st_plugin_int tmp; + struct st_mysql_plugin *plugin; + DBUG_ENTER("plugin_add"); + if (plugin_find_internal(name, MYSQL_ANY_PLUGIN)) + { + if (report & REPORT_TO_USER) + my_error(ER_UDF_EXISTS, MYF(0), name->str); + if (report & REPORT_TO_LOG) + sql_print_error(ER(ER_UDF_EXISTS), name->str); + DBUG_RETURN(TRUE); + } + if (! (tmp.plugin_dl= plugin_dl_add(dl, report))) + DBUG_RETURN(TRUE); + /* Find plugin by name */ + for (plugin= tmp.plugin_dl->plugins; plugin->info; plugin++) + { + uint name_len= strlen(plugin->name); + if (plugin->type >= 0 && plugin->type < MYSQL_MAX_PLUGIN_TYPE_NUM && + ! my_strnncoll(system_charset_info, + (const uchar *)name->str, name->length, + (const uchar *)plugin->name, + name_len)) + { + tmp.plugin= plugin; + tmp.name.str= (char *)plugin->name; + tmp.name.length= name_len; + tmp.ref_count= 0; + tmp.state= PLUGIN_IS_UNINITIALIZED; + if (insert_dynamic(&plugin_array, (gptr)&tmp)) + { + if (report & REPORT_TO_USER) + my_error(ER_OUTOFMEMORY, MYF(0), sizeof(struct st_plugin_int)); + if (report & REPORT_TO_LOG) + sql_print_error(ER(ER_OUTOFMEMORY), sizeof(struct st_plugin_int)); + goto err; + } + if (my_hash_insert(&plugin_hash[plugin->type], + (byte*)dynamic_element(&plugin_array, + plugin_array.elements - 1, + struct st_plugin_int *))) + { + struct st_plugin_int *tmp_plugin= dynamic_element(&plugin_array, + plugin_array.elements - 1, struct st_plugin_int *); + tmp_plugin->state= PLUGIN_IS_FREED; + if (report & REPORT_TO_USER) + my_error(ER_OUTOFMEMORY, MYF(0), sizeof(struct st_plugin_int)); + if (report & REPORT_TO_LOG) + sql_print_error(ER(ER_OUTOFMEMORY), sizeof(struct st_plugin_int)); + goto err; + } + DBUG_RETURN(FALSE); + } + } + if (report & REPORT_TO_USER) + my_error(ER_CANT_FIND_DL_ENTRY, MYF(0), name->str); + if (report & REPORT_TO_LOG) + sql_print_error(ER(ER_CANT_FIND_DL_ENTRY), name->str); +err: + plugin_dl_del(dl); + DBUG_RETURN(TRUE); +} + + +static void plugin_del(LEX_STRING *name) +{ + uint i; + struct st_plugin_int *plugin; + DBUG_ENTER("plugin_del"); + if ((plugin= plugin_find_internal(name, MYSQL_ANY_PLUGIN))) + { + hash_delete(&plugin_hash[plugin->plugin->type], (byte*)plugin); + plugin_dl_del(&plugin->plugin_dl->dl); + plugin->state= PLUGIN_IS_FREED; + } + DBUG_VOID_RETURN; +} + + +void plugin_unlock(struct st_plugin_int *plugin) +{ + DBUG_ENTER("plugin_release"); + rw_wrlock(&THR_LOCK_plugin); + DBUG_ASSERT(plugin && plugin->ref_count); + plugin->ref_count--; + if (plugin->state == PLUGIN_IS_DELETED && ! plugin->ref_count) + { + if (plugin->plugin->deinit) + plugin->plugin->deinit(); + plugin_del(&plugin->name); + } + rw_unlock(&THR_LOCK_plugin); + DBUG_VOID_RETURN; +} + + +static void plugin_call_initializer(void) +{ + uint i; + DBUG_ENTER("plugin_call_initializer"); + for (i= 0; i < plugin_array.elements; i++) + { + struct st_plugin_int *tmp= dynamic_element(&plugin_array, i, + struct st_plugin_int *); + if (tmp->state == PLUGIN_IS_UNINITIALIZED && tmp->plugin->init) + { + DBUG_PRINT("info", ("Initializing plugin: '%s'", tmp->name.str)); + if (tmp->plugin->init()) + { + sql_print_error("Plugin '%s' init function returned error.", + tmp->name.str); + DBUG_PRINT("warning", ("Plugin '%s' init function returned error.", + tmp->name.str)) + plugin_del(&tmp->name); + } + } + if (tmp->state == PLUGIN_IS_UNINITIALIZED) + tmp->state= PLUGIN_IS_READY; + } + DBUG_VOID_RETURN; +} + + +static void plugin_call_deinitializer(void) +{ + uint i; + DBUG_ENTER("plugin_call_deinitializer"); + for (i= 0; i < plugin_array.elements; i++) + { + struct st_plugin_int *tmp= dynamic_element(&plugin_array, i, + struct st_plugin_int *); + if (tmp->state == PLUGIN_IS_READY) + { + if (tmp->plugin->deinit) + { + DBUG_PRINT("info", ("Deinitializing plugin: '%s'", tmp->name.str)); + if (tmp->plugin->deinit()) + { + DBUG_PRINT("warning", ("Plugin '%s' deinit function returned error.", + tmp->name.str)) + } + } + tmp->state= PLUGIN_IS_UNINITIALIZED; + } + } + DBUG_VOID_RETURN; +} + + +static byte *get_hash_key(const byte *buff, uint *length, + my_bool not_used __attribute__((unused))) +{ + struct st_plugin_int *plugin= (st_plugin_int *)buff; + *length= (uint)plugin->name.length; + return((byte *)plugin->name.str); +} + + +void plugin_init(void) +{ + TABLE_LIST tables; + TABLE *table; + READ_RECORD read_record_info; + int error, i; + MEM_ROOT mem; + DBUG_ENTER("plugin_init"); + if (initialized) + DBUG_VOID_RETURN; + my_rwlock_init(&THR_LOCK_plugin, NULL); + THD *new_thd = new THD; + if (!new_thd || + my_init_dynamic_array(&plugin_dl_array,sizeof(struct st_plugin_dl),16,16) || + my_init_dynamic_array(&plugin_array,sizeof(struct st_plugin_int),16,16)) + { + sql_print_error("Can't allocate memory for plugin structures"); + delete new_thd; + delete_dynamic(&plugin_dl_array); + delete_dynamic(&plugin_array); + DBUG_VOID_RETURN; + } + for (i= 0; i < MYSQL_MAX_PLUGIN_TYPE_NUM; i++) + { + if (hash_init(&plugin_hash[i], system_charset_info, 16, 0, 0, + get_hash_key, NULL, 0)) + { + sql_print_error("Can't allocate memory for plugin structures"); + delete new_thd; + delete_dynamic(&plugin_dl_array); + delete_dynamic(&plugin_array); + DBUG_VOID_RETURN; + } + } + init_sql_alloc(&mem, 1024, 0); + initialized= 1; + new_thd->store_globals(); + new_thd->db= my_strdup("mysql", MYF(0)); + new_thd->db_length= 5; + bzero((gptr)&tables, sizeof(tables)); + tables.alias= tables.table_name= (char*)"plugin"; + tables.lock_type= TL_READ; + tables.db= new_thd->db; + if (simple_open_n_lock_tables(new_thd, &tables)) + { + DBUG_PRINT("error",("Can't open plugin table")); + sql_print_error("Can't open the mysql.plugin table. Please run the mysql_install_db script to create it."); + delete_dynamic(&plugin_dl_array); + delete_dynamic(&plugin_array); + for (i= 0; i < MYSQL_MAX_PLUGIN_TYPE_NUM; i++) + hash_free(&plugin_hash[i]); + goto end; + } + table= tables.table; + init_read_record(&read_record_info, new_thd, table, NULL, 1, 0); + while (!(error= read_record_info.read_record(&read_record_info))) + { + DBUG_PRINT("info", ("init plugin record")); + LEX_STRING name, dl; + name.str= get_field(&mem, table->field[0]); + name.length= strlen(name.str); + dl.str= get_field(&mem, table->field[1]); + dl.length= strlen(dl.str); + if (plugin_add(&name, &dl, REPORT_TO_LOG)) + DBUG_PRINT("warning", ("Couldn't load plugin named '%s' with soname '%s'.", + name.str, dl.str)); + } + plugin_call_initializer(); + if (error > 0) + sql_print_error(ER(ER_GET_ERRNO), my_errno); + end_read_record(&read_record_info); + new_thd->version--; // Force close to free memory +end: + free_root(&mem, MYF(0)); + close_thread_tables(new_thd); + delete new_thd; + /* Remember that we don't have a THD */ + my_pthread_setspecific_ptr(THR_THD, 0); + DBUG_VOID_RETURN; +} + + +void plugin_free(void) +{ + uint i; + DBUG_ENTER("plugin_free"); + plugin_call_deinitializer(); + for (i= 0; i < MYSQL_MAX_PLUGIN_TYPE_NUM; i++) + hash_free(&plugin_hash[i]); + delete_dynamic(&plugin_array); + for (i= 0; i < plugin_dl_array.elements; i++) + { + struct st_plugin_dl *tmp= dynamic_element(&plugin_dl_array, i, + struct st_plugin_dl *); +#ifdef HAVE_DLOPEN + if (tmp->handle) + { + dlclose(tmp->handle); + my_free(tmp->dl.str, MYF(0)); + } +#endif + } + delete_dynamic(&plugin_dl_array); + if (initialized) + { + initialized= 0; + rwlock_destroy(&THR_LOCK_plugin); + } + DBUG_VOID_RETURN; +} + + +my_bool mysql_install_plugin(THD *thd, LEX_STRING *name, LEX_STRING *dl) +{ + TABLE_LIST tables; + TABLE *table; + int error; + struct st_plugin_int *tmp; + DBUG_ENTER("mysql_install_plugin"); + bzero(&tables, sizeof(tables)); + tables.db= (char *)"mysql"; + tables.table_name= tables.alias= (char *)"plugin"; + if (check_table_access(thd, INSERT_ACL, &tables, 0)) + DBUG_RETURN(TRUE); + rw_wrlock(&THR_LOCK_plugin); + if (plugin_add(name, dl, REPORT_TO_USER)) + goto err; + tmp= plugin_find_internal(name, MYSQL_ANY_PLUGIN); + if (tmp->plugin->init) + { + if (tmp->plugin->init()) + { + my_error(ER_CANT_INITIALIZE_UDF, MYF(0), name->str, + "Plugin initialization function failed."); + goto err; + } + tmp->state= PLUGIN_IS_READY; + } + if (! (table = open_ltable(thd, &tables, TL_WRITE))) + goto deinit; + restore_record(table, s->default_values); + table->field[0]->store(name->str, name->length, system_charset_info); + table->field[1]->store(dl->str, dl->length, files_charset_info); + error= table->file->write_row(table->record[0]); + if (error) + { + table->file->print_error(error, MYF(0)); + goto deinit; + } + rw_unlock(&THR_LOCK_plugin); + DBUG_RETURN(FALSE); +deinit: + if (tmp->plugin->deinit) + tmp->plugin->deinit(); +err: + plugin_del(name); + rw_unlock(&THR_LOCK_plugin); + DBUG_RETURN(TRUE); +} + + +my_bool mysql_uninstall_plugin(THD *thd, LEX_STRING *name) +{ + TABLE *table; + TABLE_LIST tables; + struct st_plugin_int *plugin; + DBUG_ENTER("mysql_uninstall_plugin"); + rw_wrlock(&THR_LOCK_plugin); + if (! (plugin= plugin_find_internal(name, MYSQL_ANY_PLUGIN))) + { + my_error(ER_SP_DOES_NOT_EXIST, MYF(0), "PLUGIN", name->str); + goto err; + } + if (plugin->ref_count) + { + plugin->state= PLUGIN_IS_DELETED; + push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, 0, + "Plugin is not deleted, waiting on tables."); + } + else + { + if (plugin->plugin->deinit) + plugin->plugin->deinit(); + plugin_del(name); + } + bzero(&tables, sizeof(tables)); + tables.db= (char *)"mysql"; + tables.table_name= tables.alias= (char *)"plugin"; + if (! (table= open_ltable(thd, &tables, TL_WRITE))) + goto err; + table->field[0]->store(name->str, name->length, system_charset_info); + table->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS); + if (! table->file->index_read_idx(table->record[0], 0, + (byte *)table->field[0]->ptr, + table->key_info[0].key_length, + HA_READ_KEY_EXACT)) + { + int error; + if ((error= table->file->delete_row(table->record[0]))) + { + table->file->print_error(error, MYF(0)); + goto err; + } + } + rw_unlock(&THR_LOCK_plugin); + DBUG_RETURN(FALSE); +err: + rw_unlock(&THR_LOCK_plugin); + DBUG_RETURN(TRUE); +} diff --git a/sql/sql_plugin.h b/sql/sql_plugin.h new file mode 100644 index 00000000000..8fb186b62de --- /dev/null +++ b/sql/sql_plugin.h @@ -0,0 +1,63 @@ +/* Copyright (C) 2005 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef _sql_plugin_h +#define _sql_plugin_h + +#include <plugin.h> + +#define MYSQL_ANY_PLUGIN -1 + +enum enum_plugin_state +{ + PLUGIN_IS_FREED= 0, + PLUGIN_IS_DELETED, + PLUGIN_IS_UNINITIALIZED, + PLUGIN_IS_READY +}; + +/* A handle for the dynamic library containing a plugin or plugins. */ + +struct st_plugin_dl +{ + LEX_STRING dl; + void *handle; + struct st_mysql_plugin *plugins; + int version; + uint ref_count; /* number of plugins loaded from the library */ +}; + +/* A handle of a plugin */ + +struct st_plugin_int +{ + LEX_STRING name; + struct st_mysql_plugin *plugin; + struct st_plugin_dl *plugin_dl; + enum enum_plugin_state state; + uint ref_count; /* number of threads using the plugin */ +}; + +extern char *opt_plugin_dir_ptr; +extern char opt_plugin_dir[FN_REFLEN]; +extern void plugin_init(void); +extern void plugin_free(void); +extern my_bool plugin_is_ready(LEX_STRING *name, int type); +extern st_plugin_int *plugin_lock(LEX_STRING *name, int type); +extern void plugin_unlock(struct st_plugin_int *plugin); +extern my_bool mysql_install_plugin(THD *thd, LEX_STRING *name, LEX_STRING *dl); +extern my_bool mysql_uninstall_plugin(THD *thd, LEX_STRING *name); +#endif diff --git a/sql/sql_prepare.cc b/sql/sql_prepare.cc index d405f0791fb..2f08583d974 100644 --- a/sql/sql_prepare.cc +++ b/sql/sql_prepare.cc @@ -1726,7 +1726,9 @@ static bool check_prepared_statement(Prepared_statement *stmt, case SQLCOM_SHOW_COLUMN_TYPES: case SQLCOM_SHOW_STATUS: case SQLCOM_SHOW_VARIABLES: - case SQLCOM_SHOW_LOGS: + case SQLCOM_SHOW_ENGINE_LOGS: + case SQLCOM_SHOW_ENGINE_STATUS: + case SQLCOM_SHOW_ENGINE_MUTEX: case SQLCOM_SHOW_TABLES: case SQLCOM_SHOW_OPEN_TABLES: case SQLCOM_SHOW_CHARSETS: @@ -2739,11 +2741,11 @@ bool Prepared_statement::prepare(const char *packet, uint packet_len) old_stmt_arena= thd->stmt_arena; thd->stmt_arena= this; lex_start(thd, (uchar*) thd->query, thd->query_length); - lex->safe_to_cache_query= FALSE; lex->stmt_prepare_mode= TRUE; error= yyparse((void *)thd) || thd->is_fatal_error || thd->net.report_error || init_param_array(this); + lex->safe_to_cache_query= FALSE; /* While doing context analysis of the query (in check_prepared_statement) we allocate a lot of additional memory: for open tables, JOINs, derived diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc index cd293fc21c7..dd70f90b3da 100644 --- a/sql/sql_repl.cc +++ b/sql/sql_repl.cc @@ -19,6 +19,7 @@ #include "sql_repl.h" #include "log_event.h" +#include "rpl_filter.h" #include <my_dir.h> int max_binlog_dump_events = 0; // unlimited @@ -1457,8 +1458,8 @@ bool show_binlog_info(THD* thd) int dir_len = dirname_length(li.log_file_name); protocol->store(li.log_file_name + dir_len, &my_charset_bin); protocol->store((ulonglong) li.pos); - protocol->store(&binlog_do_db); - protocol->store(&binlog_ignore_db); + protocol->store(binlog_filter->get_do_db()); + protocol->store(binlog_filter->get_ignore_db()); if (protocol->write()) DBUG_RETURN(TRUE); } diff --git a/sql/sql_repl.h b/sql/sql_repl.h index 9eb6456ee20..ba64e626adc 100644 --- a/sql/sql_repl.h +++ b/sql/sql_repl.h @@ -31,7 +31,6 @@ typedef struct st_slave_info extern my_bool opt_show_slave_auth_info; extern char *master_host, *master_info_file; extern bool server_id_supplied; -extern I_List<i_string> binlog_do_db, binlog_ignore_db; extern int max_binlog_dump_events; extern my_bool opt_sporadic_binlog_dump_fail; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 470015f8869..6ea9231c87c 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -949,23 +949,19 @@ JOIN::optimize() } /* - Need to tell Innobase that to play it safe, it should fetch all - columns of the tables: this is because MySQL may build row - pointers for the rows, and for all columns of the primary key the - field->query_id has not necessarily been set to thd->query_id by - MySQL. + Need to tell handlers that to play it safe, it should fetch all + columns of the primary key of the tables: this is because MySQL may + build row pointers for the rows, and for all columns of the primary key + the read set has not necessarily been set by the server code. */ - -#ifdef HAVE_INNOBASE_DB if (need_tmp || select_distinct || group_list || order) { for (uint i_h = const_tables; i_h < tables; i_h++) { TABLE* table_h = join_tab[i_h].table; - table_h->file->extra(HA_EXTRA_RETRIEVE_PRIMARY_KEY); + table_h->file->ha_retrieve_all_pk(); } } -#endif DBUG_EXECUTE("info",TEST_join(this);); @@ -1313,6 +1309,9 @@ JOIN::exec() /* Copy data to the temporary table */ thd->proc_info= "Copying to tmp table"; DBUG_PRINT("info", ("%s", thd->proc_info)); + if (!curr_join->sort_and_group && + curr_join->const_tables != curr_join->tables) + curr_join->join_tab[curr_join->const_tables].sorted= 0; if ((tmp_error= do_select(curr_join, (List<Item> *) 0, curr_tmp_table, 0))) { error= tmp_error; @@ -1459,6 +1458,9 @@ JOIN::exec() 1, TRUE)) DBUG_VOID_RETURN; curr_join->group_list= 0; + if (!curr_join->sort_and_group && + curr_join->const_tables != curr_join->tables) + curr_join->join_tab[curr_join->const_tables].sorted= 0; if (setup_sum_funcs(curr_join->thd, curr_join->sum_funcs) || (tmp_error= do_select(curr_join, (List<Item> *) 0, curr_tmp_table, 0))) @@ -1645,6 +1647,16 @@ JOIN::exec() (select_options & OPTION_FOUND_ROWS ? HA_POS_ERROR : unit->select_limit_cnt))) DBUG_VOID_RETURN; + if (curr_join->const_tables != curr_join->tables && + !curr_join->join_tab[curr_join->const_tables].table->sort.io_cache) + { + /* + If no IO cache exists for the first table then we are using an + INDEX SCAN and no filesort. Thus we should not remove the sorted + attribute on the INDEX SCAN. + */ + skip_sort_order= 1; + } } } /* XXX: When can we have here thd->net.report_error not zero? */ @@ -5518,6 +5530,7 @@ make_join_readinfo(JOIN *join, uint options) uint i; bool statistics= test(!(join->select_options & SELECT_DESCRIBE)); + bool sorted= 1; DBUG_ENTER("make_join_readinfo"); for (i=join->const_tables ; i < join->tables ; i++) @@ -5527,6 +5540,8 @@ make_join_readinfo(JOIN *join, uint options) tab->read_record.table= table; tab->read_record.file=table->file; tab->next_select=sub_select; /* normal select */ + tab->sorted= sorted; + sorted= 0; // only first must be sorted switch (tab->type) { case JT_SYSTEM: // Only happens with left join table->status=STATUS_NO_RECORD; @@ -8282,7 +8297,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, uint hidden_null_count, hidden_null_pack_length, hidden_field_count; uint blob_count,group_null_items, string_count; uint temp_pool_slot=MY_BIT_NONE; - ulong reclength, string_total_length; + ulong reclength, string_total_length, fieldnr= 0; bool using_unique_constraint= 0; bool use_packed_rows= 0; bool not_all_columns= !(select_options & TMP_TABLE_ALL_COLUMNS); @@ -8305,7 +8320,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, statistic_increment(thd->status_var.created_tmp_tables, &LOCK_status); if (use_temp_pool && !(test_flags & TEST_KEEP_TMP_TABLES)) - temp_pool_slot = bitmap_set_next(&temp_pool); + temp_pool_slot = bitmap_lock_set_next(&temp_pool); if (temp_pool_slot != MY_BIT_NONE) // we got a slot sprintf(path, "%s_%lx_%i", tmp_file_prefix, @@ -8360,13 +8375,13 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, param->group_length : 0, NullS)) { - bitmap_clear_bit(&temp_pool, temp_pool_slot); + bitmap_lock_clear_bit(&temp_pool, temp_pool_slot); DBUG_RETURN(NULL); /* purecov: inspected */ } /* Copy_field belongs to TMP_TABLE_PARAM, allocate it in THD mem_root */ if (!(param->copy_field= copy= new (thd->mem_root) Copy_field[field_count])) { - bitmap_clear_bit(&temp_pool, temp_pool_slot); + bitmap_lock_clear_bit(&temp_pool, temp_pool_slot); free_root(&own_root, MYF(0)); /* purecov: inspected */ DBUG_RETURN(NULL); /* purecov: inspected */ } @@ -8402,6 +8417,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, table->s->tmp_table= TMP_TABLE; table->s->db_low_byte_first=1; // True for HEAP and MyISAM table->s->table_charset= param->table_charset; + table->s->primary_key= MAX_KEY; //Indicate no primary key table->s->keys_for_keyread.init(); table->s->keys_in_use.init(); /* For easier error reporting */ @@ -8481,6 +8497,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, (*argp)->maybe_null=1; } new_field->query_id= thd->query_id; + new_field->fieldnr= ++fieldnr; } } } @@ -8532,6 +8549,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, new_field->flags|= GROUP_FLAG; } new_field->query_id= thd->query_id; + new_field->fieldnr= ++fieldnr; new_field->field_index= (uint) (reg_field - table->field); *(reg_field++) =new_field; } @@ -8548,6 +8566,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, DBUG_ASSERT(field_count >= (uint) (reg_field - table->field)); field_count= (uint) (reg_field - table->field); *blob_field= 0; // End marker + table->s->fields= field_count; /* If result table is small; use a heap */ if (blob_count || using_unique_constraint || @@ -8566,7 +8585,11 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, table->file= get_new_handler(table, &table->mem_root, table->s->db_type= DB_TYPE_HEAP); } - + if (table->s->fields) + { + table->file->ha_set_all_bits_in_read_set(); + table->file->ha_set_all_bits_in_write_set(); + } if (!using_unique_constraint) reclength+= group_null_items; // null flag is stored separately @@ -8592,7 +8615,6 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, string_total_length / string_count >= AVG_STRING_LENGTH_TO_PACK_ROWS)) use_packed_rows= 1; - table->s->fields= field_count; table->s->reclength= reclength; { uint alloc_length=ALIGN_SIZE(reclength+MI_UNIQUE_HASH_LENGTH+1); @@ -8845,7 +8867,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, err: thd->mem_root= mem_root_save; free_tmp_table(thd,table); /* purecov: inspected */ - bitmap_clear_bit(&temp_pool, temp_pool_slot); + bitmap_lock_clear_bit(&temp_pool, temp_pool_slot); DBUG_RETURN(NULL); /* purecov: inspected */ } @@ -9101,16 +9123,8 @@ free_tmp_table(THD *thd, TABLE *entry) if (entry->file) { if (entry->db_stat) - { - (void) entry->file->close(); - } - /* - We can't call ha_delete_table here as the table may created in mixed case - here and we have to ensure that delete_table gets the table name in - the original case. - */ - if (!(test_flags & TEST_KEEP_TMP_TABLES) || - entry->s->db_type == DB_TYPE_HEAP) + entry->file->drop_table(entry->s->table_name); + else entry->file->delete_table(entry->s->table_name); delete entry->file; } @@ -9120,7 +9134,7 @@ free_tmp_table(THD *thd, TABLE *entry) (*ptr)->free(); free_io_cache(entry); - bitmap_clear_bit(&temp_pool, entry->temp_pool_slot); + bitmap_lock_clear_bit(&temp_pool, entry->temp_pool_slot); free_root(&own_root, MYF(0)); /* the table is allocated in its own root */ thd->proc_info=save_proc_info; @@ -9182,7 +9196,12 @@ bool create_myisam_from_heap(THD *thd, TABLE *table, TMP_TABLE_PARAM *param, new_table.file->extra(HA_EXTRA_WRITE_CACHE); #endif - /* copy all old rows */ + /* + copy all old rows from heap table to MyISAM table + This is the only code that uses record[1] to read/write but this + is safe as this is a temporary MyISAM table without timestamp/autoincrement + or partitioning. + */ while (!table->file->rnd_next(new_table.record[1])) { if ((write_err=new_table.file->write_row(new_table.record[1]))) @@ -9217,8 +9236,8 @@ bool create_myisam_from_heap(THD *thd, TABLE *table, TMP_TABLE_PARAM *param, (void) new_table.file->close(); err1: new_table.file->delete_table(new_table.s->table_name); - delete new_table.file; err2: + delete new_table.file; thd->proc_info=save_proc_info; DBUG_RETURN(1); } @@ -9313,7 +9332,7 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure) empty_record(table); if (table->group && join->tmp_table_param.sum_func_count && table->s->keys && !table->file->inited) - table->file->ha_index_init(0); + table->file->ha_index_init(0, 0); } /* Set up select_end */ join->join_tab[join->tables-1].next_select= setup_end_select_func(join); @@ -10016,7 +10035,7 @@ join_read_const(JOIN_TAB *tab) table->status= STATUS_NOT_FOUND; mark_as_null_row(tab->table); empty_record(table); - if (error != HA_ERR_KEY_NOT_FOUND) + if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) return report_error(table, error); return -1; } @@ -10039,7 +10058,9 @@ join_read_key(JOIN_TAB *tab) TABLE *table= tab->table; if (!table->file->inited) - table->file->ha_index_init(tab->ref.key); + { + table->file->ha_index_init(tab->ref.key, tab->sorted); + } if (cmp_buffer_with_ref(tab) || (table->status & (STATUS_GARBAGE | STATUS_NO_PARENT | STATUS_NULL_ROW))) { @@ -10051,7 +10072,7 @@ join_read_key(JOIN_TAB *tab) error=table->file->index_read(table->record[0], tab->ref.key_buff, tab->ref.key_length,HA_READ_KEY_EXACT); - if (error && error != HA_ERR_KEY_NOT_FOUND) + if (error && error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) return report_error(table, error); } table->null_row=0; @@ -10071,14 +10092,16 @@ join_read_always_key(JOIN_TAB *tab) return -1; } if (!table->file->inited) - table->file->ha_index_init(tab->ref.key); + { + table->file->ha_index_init(tab->ref.key, tab->sorted); + } if (cp_buffer_from_ref(tab->join->thd, &tab->ref)) return -1; if ((error=table->file->index_read(table->record[0], tab->ref.key_buff, tab->ref.key_length,HA_READ_KEY_EXACT))) { - if (error != HA_ERR_KEY_NOT_FOUND) + if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) return report_error(table, error); return -1; /* purecov: inspected */ } @@ -10098,14 +10121,14 @@ join_read_last_key(JOIN_TAB *tab) TABLE *table= tab->table; if (!table->file->inited) - table->file->ha_index_init(tab->ref.key); + table->file->ha_index_init(tab->ref.key, tab->sorted); if (cp_buffer_from_ref(tab->join->thd, &tab->ref)) return -1; if ((error=table->file->index_read_last(table->record[0], tab->ref.key_buff, tab->ref.key_length))) { - if (error != HA_ERR_KEY_NOT_FOUND) + if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) return report_error(table, error); return -1; /* purecov: inspected */ } @@ -10208,7 +10231,7 @@ join_read_first(JOIN_TAB *tab) tab->read_record.index=tab->index; tab->read_record.record=table->record[0]; if (!table->file->inited) - table->file->ha_index_init(tab->index); + table->file->ha_index_init(tab->index, tab->sorted); if ((error=tab->table->file->index_first(tab->table->record[0]))) { if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) @@ -10247,7 +10270,7 @@ join_read_last(JOIN_TAB *tab) tab->read_record.index=tab->index; tab->read_record.record=table->record[0]; if (!table->file->inited) - table->file->ha_index_init(tab->index); + table->file->ha_index_init(tab->index, 1); if ((error= tab->table->file->index_last(tab->table->record[0]))) return report_error(table, error); return 0; @@ -10271,7 +10294,7 @@ join_ft_read_first(JOIN_TAB *tab) TABLE *table= tab->table; if (!table->file->inited) - table->file->ha_index_init(tab->ref.key); + table->file->ha_index_init(tab->ref.key, 1); #if NOT_USED_YET if (cp_buffer_from_ref(tab->join->thd, &tab->ref)) // as ft-key doesn't use store_key's return -1; // see also FT_SELECT::init() @@ -10666,7 +10689,7 @@ end_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), error, 0)) DBUG_RETURN(NESTED_LOOP_ERROR); // Not a table_is_full error /* Change method to update rows */ - table->file->ha_index_init(0); + table->file->ha_index_init(0, 0); join->join_tab[join->tables-1].next_select=end_unique_update; } join->send_records++; diff --git a/sql/sql_select.h b/sql/sql_select.h index 2f53c9a3b35..30c36e67528 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -133,6 +133,7 @@ typedef struct st_join_table { uint used_fields,used_fieldlength,used_blobs; enum join_type type; bool cached_eq_ref_table,eq_ref_table,not_used_in_distinct; + bool sorted; TABLE_REF ref; JOIN_CACHE cache; JOIN *join; diff --git a/sql/sql_show.cc b/sql/sql_show.cc index 9449224c01a..de7e1c4f8be 100644 --- a/sql/sql_show.cc +++ b/sql/sql_show.cc @@ -23,11 +23,9 @@ #include "sp.h" #include "sp_head.h" #include "sql_trigger.h" +#include "authors.h" #include <my_dir.h> -#ifdef HAVE_BERKELEY_DB -#include "ha_berkeley.h" // For berkeley_show_logs -#endif static const char *grant_names[]={ "select","insert","update","delete","create","drop","reload","shutdown", @@ -59,6 +57,9 @@ bool mysqld_show_storage_engines(THD *thd) field_list.push_back(new Item_empty_string("Engine",10)); field_list.push_back(new Item_empty_string("Support",10)); field_list.push_back(new Item_empty_string("Comment",80)); + field_list.push_back(new Item_empty_string("Transactions",3)); + field_list.push_back(new Item_empty_string("XA",3)); + field_list.push_back(new Item_empty_string("Savepoints",3)); if (protocol->send_fields(&field_list, Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF)) @@ -81,6 +82,9 @@ bool mysqld_show_storage_engines(THD *thd) option_name= "DEFAULT"; protocol->store(option_name, system_charset_info); protocol->store((*types)->comment, system_charset_info); + protocol->store((*types)->commit ? "YES" : "NO", system_charset_info); + protocol->store((*types)->prepare ? "YES" : "NO", system_charset_info); + protocol->store((*types)->savepoint_set ? "YES" : "NO", system_charset_info); if (protocol->write()) DBUG_RETURN(TRUE); } @@ -89,6 +93,38 @@ bool mysqld_show_storage_engines(THD *thd) DBUG_RETURN(FALSE); } +/*************************************************************************** +** List all Authors. +** If you can update it, you get to be in it :) +***************************************************************************/ + +bool mysqld_show_authors(THD *thd) +{ + List<Item> field_list; + Protocol *protocol= thd->protocol; + DBUG_ENTER("mysqld_show_authors"); + + field_list.push_back(new Item_empty_string("Name",40)); + field_list.push_back(new Item_empty_string("Location",40)); + field_list.push_back(new Item_empty_string("Comment",80)); + + if (protocol->send_fields(&field_list, + Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF)) + DBUG_RETURN(TRUE); + + show_table_authors_st *authors; + for (authors= show_table_authors; authors->name; authors++) + { + protocol->prepare_for_resend(); + protocol->store(authors->name, system_charset_info); + protocol->store(authors->location, system_charset_info); + protocol->store(authors->comment, system_charset_info); + if (protocol->write()) + DBUG_RETURN(TRUE); + } + send_eof(thd); + DBUG_RETURN(FALSE); +} /*************************************************************************** List all privileges supported @@ -529,29 +565,6 @@ bool mysqld_show_create_db(THD *thd, char *dbname, DBUG_RETURN(FALSE); } -bool -mysqld_show_logs(THD *thd) -{ - List<Item> field_list; - Protocol *protocol= thd->protocol; - DBUG_ENTER("mysqld_show_logs"); - - field_list.push_back(new Item_empty_string("File",FN_REFLEN)); - field_list.push_back(new Item_empty_string("Type",10)); - field_list.push_back(new Item_empty_string("Status",10)); - - if (protocol->send_fields(&field_list, - Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF)) - DBUG_RETURN(TRUE); - -#ifdef HAVE_BERKELEY_DB - if ((have_berkeley_db == SHOW_OPTION_YES) && berkeley_show_logs(protocol)) - DBUG_RETURN(TRUE); -#endif - - send_eof(thd); - DBUG_RETURN(FALSE); -} /**************************************************************************** @@ -965,6 +978,12 @@ store_create_info(THD *thd, TABLE_LIST *table_list, String *packet) } } packet->append(')'); + if (key_info->parser) + { + packet->append(" WITH PARSER ", 13); + append_identifier(thd, packet, key_info->parser->name.str, + key_info->parser->name.length); + } } /* @@ -985,7 +1004,15 @@ store_create_info(THD *thd, TABLE_LIST *table_list, String *packet) packet->append(STRING_WITH_LEN(" TYPE=")); else packet->append(STRING_WITH_LEN(" ENGINE=")); +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (table->s->part_info) + packet->append(ha_get_storage_engine( + table->s->part_info->default_engine_type)); + else + packet->append(file->table_type()); +#else packet->append(file->table_type()); +#endif if (share->table_charset && !(thd->variables.sql_mode & MODE_MYSQL323) && @@ -1057,6 +1084,23 @@ store_create_info(THD *thd, TABLE_LIST *table_list, String *packet) append_directory(thd, packet, "DATA", create_info.data_file_name); append_directory(thd, packet, "INDEX", create_info.index_file_name); } +#ifdef WITH_PARTITION_STORAGE_ENGINE + { + /* + Partition syntax for CREATE TABLE is at the end of the syntax. + */ + uint part_syntax_len; + char *part_syntax; + if (table->s->part_info && + ((part_syntax= generate_partition_syntax(table->s->part_info, + &part_syntax_len, + FALSE,FALSE)))) + { + packet->append(part_syntax, part_syntax_len); + my_free(part_syntax, MYF(0)); + } + } +#endif DBUG_RETURN(0); } @@ -2887,7 +2931,7 @@ int fill_schema_proc(THD *thd, TABLE_LIST *tables, COND *cond) { DBUG_RETURN(1); } - proc_table->file->ha_index_init(0); + proc_table->file->ha_index_init(0, 1); if ((res= proc_table->file->index_first(proc_table->record[0]))) { res= (res == HA_ERR_END_OF_FILE) ? 0 : 1; diff --git a/sql/sql_table.cc b/sql/sql_table.cc index e8ffdc23ec5..32470b9be00 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -17,9 +17,6 @@ /* drop and alter of tables */ #include "mysql_priv.h" -#ifdef HAVE_BERKELEY_DB -#include "ha_berkeley.h" -#endif #include <hash.h> #include <myisam.h> #include <my_dir.h> @@ -30,6 +27,7 @@ #include <io.h> #endif + const char *primary_key_name="PRIMARY"; static bool check_if_keyname_exists(const char *name,KEY *start, KEY *end); @@ -44,6 +42,64 @@ static bool prepare_blob_field(THD *thd, create_field *sql_field); static bool check_engine(THD *thd, const char *table_name, enum db_type *new_engine); +/* + SYNOPSIS + write_bin_log() + thd Thread object + clear_error is clear_error to be called + RETURN VALUES + NONE + DESCRIPTION + Write the binlog if open, routine used in multiple places in this + file +*/ + +static void write_bin_log(THD *thd, bool clear_error) +{ + if (mysql_bin_log.is_open()) + { + if (clear_error) + thd->clear_error(); + Query_log_event qinfo(thd, thd->query, thd->query_length, FALSE, FALSE); + mysql_bin_log.write(&qinfo); + } +} + +/* + SYNOPSIS + abort_and_upgrade_lock() + thd Thread object + table Table object + db Database name + table_name Table name + old_lock_level Old lock level + RETURN VALUES + TRUE Failure + FALSE Success + DESCRIPTION + Remember old lock level (for possible downgrade later on), abort all + waiting threads and ensure that all keeping locks currently are + completed such that we own the lock exclusively and no other interaction + is ongoing. +*/ + +static bool abort_and_upgrade_lock(THD *thd, TABLE *table, const char *db, + const char *table_name, + uint *old_lock_level) +{ + uint flags= RTFC_WAIT_OTHER_THREAD_FLAG | RTFC_CHECK_KILLED_FLAG; + DBUG_ENTER("abort_and_upgrade_locks"); + + *old_lock_level= table->reginfo.lock_type; + mysql_lock_abort(thd, table); + VOID(remove_table_from_cache(thd, db, table_name, flags)); + if (thd->killed) + { + thd->no_warnings_for_error= 0; + DBUG_RETURN(TRUE); + } + DBUG_RETURN(FALSE); +} /* Build the path to a file for a table (or the base path that can @@ -1042,6 +1098,8 @@ static int mysql_prepare_table(THD *thd, HA_CREATE_INFO *create_info, break; case Key::FULLTEXT: key_info->flags= HA_FULLTEXT; + if ((key_info->parser_name= key->parser_name)) + key_info->flags|= HA_USES_PARSER; break; case Key::SPATIAL: #ifdef HAVE_SPATIAL @@ -1371,6 +1429,34 @@ static int mysql_prepare_table(THD *thd, HA_CREATE_INFO *create_info, /* + Set table default charset, if not set + + SYNOPSIS + set_table_default_charset() + create_info Table create information + + DESCRIPTION + If the table character set was not given explicitely, + let's fetch the database default character set and + apply it to the table. +*/ + +static void set_table_default_charset(THD *thd, + HA_CREATE_INFO *create_info, char *db) +{ + if (!create_info->default_table_charset) + { + HA_CREATE_INFO db_info; + char path[FN_REFLEN]; + /* Abuse build_table_path() to build the path to the db.opt file */ + build_table_path(path, sizeof(path), db, MY_DB_OPT_FILE, ""); + load_db_opt(thd, path, &db_info); + create_info->default_table_charset= db_info.default_table_charset; + } +} + + +/* Extend long VARCHAR fields to blob & prepare field if it's a blob SYNOPSIS @@ -1527,7 +1613,72 @@ bool mysql_create_table(THD *thd,const char *db, const char *table_name, if (create_info->row_type == ROW_TYPE_DYNAMIC) db_options|=HA_OPTION_PACK_RECORD; alias= table_case_name(create_info, table_name); - file= get_new_handler((TABLE*) 0, thd->mem_root, create_info->db_type); + if (!(file=get_new_handler((TABLE*) 0, thd->mem_root, create_info->db_type))) + { + my_error(ER_OUTOFMEMORY, MYF(0), 128);//128 bytes invented + DBUG_RETURN(TRUE); + } +#ifdef WITH_PARTITION_STORAGE_ENGINE + partition_info *part_info= thd->lex->part_info; + if (part_info) + { + /* + The table has been specified as a partitioned table. + If this is part of an ALTER TABLE the handler will be the partition + handler but we need to specify the default handler to use for + partitions also in the call to check_partition_info. We transport + this information in the default_db_type variable, it is either + DB_TYPE_DEFAULT or the engine set in the ALTER TABLE command. + */ + enum db_type part_engine_type= create_info->db_type; + char *part_syntax_buf; + uint syntax_len; + if (part_engine_type == DB_TYPE_PARTITION_DB) + { + /* + This only happens at ALTER TABLE. + default_engine_type was assigned from the engine set in the ALTER + TABLE command. + */ + part_engine_type= ha_checktype(thd, + part_info->default_engine_type, 0, 0); + } + else + { + part_info->default_engine_type= create_info->db_type; + } + if (check_partition_info(part_info, part_engine_type, + file, create_info->max_rows)) + goto err; + + /* + We reverse the partitioning parser and generate a standard format + for syntax stored in frm file. + */ + if (!(part_syntax_buf= generate_partition_syntax(part_info, + &syntax_len, + TRUE,TRUE))) + goto err; + part_info->part_info_string= part_syntax_buf; + part_info->part_info_len= syntax_len; + if ((!(file->partition_flags() & HA_CAN_PARTITION)) || + create_info->db_type == DB_TYPE_PARTITION_DB) + { + /* + The handler assigned to the table cannot handle partitioning. + Assign the partition handler as the handler of the table. + */ + DBUG_PRINT("info", ("db_type: %d part_flag: %d", + create_info->db_type,file->partition_flags())); + delete file; + create_info->db_type= DB_TYPE_PARTITION_DB; + if (!(file= get_ha_partition(part_info))) + { + DBUG_RETURN(TRUE); + } + } + } +#endif #ifdef NOT_USED /* @@ -1541,30 +1692,17 @@ bool mysql_create_table(THD *thd,const char *db, const char *table_name, (file->table_flags() & HA_NO_TEMP_TABLES)) { my_error(ER_ILLEGAL_HA, MYF(0), table_name); - DBUG_RETURN(TRUE); + goto err; } #endif - /* - If the table character set was not given explicitely, - let's fetch the database default character set and - apply it to the table. - */ - if (!create_info->default_table_charset) - { - HA_CREATE_INFO db_info; - char path[FN_REFLEN]; - /* Abuse build_table_path() to build the path to the db.opt file */ - build_table_path(path, sizeof(path), db, MY_DB_OPT_FILE, ""); - load_db_opt(thd, path, &db_info); - create_info->default_table_charset= db_info.default_table_charset; - } + set_table_default_charset(thd, create_info, (char*) db); if (mysql_prepare_table(thd, create_info, &fields, &keys, internal_tmp_table, &db_options, file, &key_info_buffer, &key_count, select_field_count)) - DBUG_RETURN(TRUE); + goto err; /* Check if table exists */ if (create_info->options & HA_LEX_CREATE_TMP_TABLE) @@ -1589,13 +1727,14 @@ bool mysql_create_table(THD *thd,const char *db, const char *table_name, push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_NOTE, ER_TABLE_EXISTS_ERROR, ER(ER_TABLE_EXISTS_ERROR), alias); - DBUG_RETURN(FALSE); + error= 0; + goto err; } my_error(ER_TABLE_EXISTS_ERROR, MYF(0), alias); - DBUG_RETURN(TRUE); + goto err; } if (wait_if_global_read_lock(thd, 0, 1)) - DBUG_RETURN(TRUE); + goto err; VOID(pthread_mutex_lock(&LOCK_open)); if (!internal_tmp_table && !(create_info->options & HA_LEX_CREATE_TMP_TABLE)) { @@ -1604,7 +1743,7 @@ bool mysql_create_table(THD *thd,const char *db, const char *table_name, if (create_info->options & HA_LEX_CREATE_IF_NOT_EXISTS) goto warn; my_error(ER_TABLE_EXISTS_ERROR,MYF(0),table_name); - goto end; + goto unlock_and_end; } } @@ -1628,7 +1767,7 @@ bool mysql_create_table(THD *thd,const char *db, const char *table_name, if (create_if_not_exists) goto warn; my_error(ER_TABLE_EXISTS_ERROR,MYF(0),table_name); - goto end; + goto unlock_and_end; } } @@ -1641,15 +1780,15 @@ bool mysql_create_table(THD *thd,const char *db, const char *table_name, if (rea_create_table(thd, path, db, table_name, create_info, fields, key_count, - key_info_buffer)) - goto end; + key_info_buffer, file)) + goto unlock_and_end; if (create_info->options & HA_LEX_CREATE_TMP_TABLE) { /* Open table and put in temporary table list */ if (!(open_temporary_table(thd, path, db, table_name, 1))) { (void) rm_temporary_table(create_info->db_type, path); - goto end; + goto unlock_and_end; } thd->tmp_table_used= 1; } @@ -1659,12 +1798,15 @@ bool mysql_create_table(THD *thd,const char *db, const char *table_name, Query_log_event qinfo(thd, thd->query, thd->query_length, FALSE, FALSE); mysql_bin_log.write(&qinfo); } - error= FALSE; -end: + error= FALSE; +unlock_and_end: VOID(pthread_mutex_unlock(&LOCK_open)); start_waiting_global_read_lock(thd); + +err: thd->proc_info="After create"; + delete file; DBUG_RETURN(error); warn: @@ -1673,7 +1815,7 @@ warn: ER_TABLE_EXISTS_ERROR, ER(ER_TABLE_EXISTS_ERROR), alias); create_info->table_existed= 1; // Mark that table existed - goto end; + goto unlock_and_end; } /* @@ -2772,12 +2914,7 @@ bool mysql_create_like_table(THD* thd, TABLE_LIST* table, } // Must be written before unlock - if (mysql_bin_log.is_open()) - { - thd->clear_error(); - Query_log_event qinfo(thd, thd->query, thd->query_length, FALSE, FALSE); - mysql_bin_log.write(&qinfo); - } + write_bin_log(thd, TRUE); res= FALSE; goto err; @@ -2883,11 +3020,7 @@ mysql_discard_or_import_tablespace(THD *thd, error=1; if (error) goto err; - if (mysql_bin_log.is_open()) - { - Query_log_event qinfo(thd, thd->query, thd->query_length, FALSE, FALSE); - mysql_bin_log.write(&qinfo); - } + write_bin_log(thd, FALSE); err: close_thread_tables(thd); thd->tablespace_op=FALSE; @@ -3106,6 +3239,166 @@ int mysql_drop_indexes(THD *thd, TABLE_LIST *table_list, #endif /* NOT_USED */ + +#define ALTER_TABLE_DATA_CHANGED 1 +#define ALTER_TABLE_INDEX_CHANGED 2 + +/* + SYNOPSIS + compare tables() + table original table + create_list fields in new table + key_list keys in new table + create_info create options in new table + + DESCRIPTION + 'table' (first argument) contains information of the original + table, which includes all corresponding parts that the new + table has in arguments create_list, key_list and create_info. + + By comparing the changes between the original and new table + we can determine how much it has changed after ALTER TABLE + and whether we need to make a copy of the table, or just change + the .frm file. + + RETURN VALUES + 0 No copy needed + 1 Data changes, copy needed + 2 Index changes, copy needed +*/ + +uint compare_tables(TABLE *table, List<create_field> *create_list, + List<Key> *key_list, HA_CREATE_INFO *create_info, + ALTER_INFO *alter_info, uint order_num) +{ + Field **f_ptr, *field; + uint changes= 0, tmp; + List_iterator_fast<create_field> new_field_it(*create_list); + create_field *new_field; + + /* + Some very basic checks. If number of fields changes, or the + handler, we need to run full ALTER TABLE. In the future + new fields can be added and old dropped without copy, but + not yet. + + Test also that engine was not given during ALTER TABLE, or + we are force to run regular alter table (copy). + E.g. ALTER TABLE tbl_name ENGINE=MyISAM. + + For the following ones we also want to run regular alter table: + ALTER TABLE tbl_name ORDER BY .. + ALTER TABLE tbl_name CONVERT TO CHARACTER SET .. + + At the moment we can't handle altering temporary tables without a copy. + We also test if OPTIMIZE TABLE was given and was mapped to alter table. + In that case we always do full copy. + */ + if (table->s->fields != create_list->elements || + table->s->db_type != create_info->db_type || + table->s->tmp_table || + create_info->used_fields & HA_CREATE_USED_ENGINE || + create_info->used_fields & HA_CREATE_USED_CHARSET || + create_info->used_fields & HA_CREATE_USED_DEFAULT_CHARSET || + (alter_info->flags & ALTER_RECREATE) || + order_num) + return ALTER_TABLE_DATA_CHANGED; + + /* + Go through fields and check if the original ones are compatible + with new table. + */ + for (f_ptr= table->field, new_field= new_field_it++; + (field= *f_ptr); f_ptr++, new_field= new_field_it++) + { + /* Make sure we have at least the default charset in use. */ + if (!new_field->charset) + new_field->charset= create_info->default_table_charset; + + /* Check that NULL behavior is same for old and new fields */ + if ((new_field->flags & NOT_NULL_FLAG) != + (uint) (field->flags & NOT_NULL_FLAG)) + return ALTER_TABLE_DATA_CHANGED; + + /* Don't pack rows in old tables if the user has requested this. */ + if (create_info->row_type == ROW_TYPE_DYNAMIC || + (new_field->flags & BLOB_FLAG) || + new_field->sql_type == MYSQL_TYPE_VARCHAR && + create_info->row_type != ROW_TYPE_FIXED) + create_info->table_options|= HA_OPTION_PACK_RECORD; + + /* Evaluate changes bitmap and send to check_if_incompatible_data() */ + if (!(tmp= field->is_equal(new_field))) + return ALTER_TABLE_DATA_CHANGED; + + changes|= tmp; + } + /* Check if changes are compatible with current handler without a copy */ + if (table->file->check_if_incompatible_data(create_info, changes)) + return ALTER_TABLE_DATA_CHANGED; + + /* + Go through keys and check if the original ones are compatible + with new table. + */ + KEY *table_key_info= table->key_info; + List_iterator_fast<Key> key_it(*key_list); + Key *key= key_it++; + + /* Check if the number of key elements has changed */ + if (table->s->keys != key_list->elements) + return ALTER_TABLE_INDEX_CHANGED; + + for (uint i= 0; i < table->s->keys; i++, table_key_info++, key= key_it++) + { + /* + Check that the key types are compatible between old and new tables. + */ + if (table_key_info->algorithm != key->algorithm || + ((key->type == Key::PRIMARY || key->type == Key::UNIQUE) && + !(table_key_info->flags & HA_NOSAME)) || + (!(key->type == Key::PRIMARY || key->type == Key::UNIQUE) && + (table_key_info->flags & HA_NOSAME)) || + ((key->type == Key::SPATIAL) && + !(table_key_info->flags & HA_SPATIAL)) || + (!(key->type == Key::SPATIAL) && + (table_key_info->flags & HA_SPATIAL)) || + ((key->type == Key::FULLTEXT) && + !(table_key_info->flags & HA_FULLTEXT)) || + (!(key->type == Key::FULLTEXT) && + (table_key_info->flags & HA_FULLTEXT))) + return ALTER_TABLE_INDEX_CHANGED; + + if (table_key_info->key_parts != key->columns.elements) + return ALTER_TABLE_INDEX_CHANGED; + + /* + Check that the key parts remain compatible between the old and + new tables. + */ + KEY_PART_INFO *table_key_part= table_key_info->key_part; + List_iterator_fast<key_part_spec> key_part_it(key->columns); + key_part_spec *key_part= key_part_it++; + for (uint j= 0; j < table_key_info->key_parts; j++, + table_key_part++, key_part= key_part_it++) + { + /* + Key definition has changed if we are using a different field or + if the used key length is different + (If key_part->length == 0 it means we are using the whole field) + */ + if (strcmp(key_part->field_name, table_key_part->field->field_name) || + (key_part->length && key_part->length != table_key_part->length) || + (key_part->length == 0 && table_key_part->length != + table_key_part->field->pack_length())) + return ALTER_TABLE_INDEX_CHANGED; + } + } + + return 0; // Tables are compatible +} + + /* Alter table */ @@ -3127,7 +3420,13 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, ulonglong next_insert_id; uint db_create_options, used_fields; enum db_type old_db_type,new_db_type; - bool need_copy_table; + uint need_copy_table= 0; +#ifdef WITH_PARTITION_STORAGE_ENGINE + bool online_add_empty_partition= FALSE; + bool online_drop_partition= FALSE; + bool partition_changed= FALSE; + enum db_type default_engine_type; +#endif DBUG_ENTER("mysql_alter_table"); thd->proc_info="init"; @@ -3204,6 +3503,415 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, old_db_type= table->s->db_type; if (create_info->db_type == DB_TYPE_DEFAULT) create_info->db_type= old_db_type; + +#ifdef WITH_PARTITION_STORAGE_ENGINE + /* + We need to handle both partition management command such as Add Partition + and others here as well as an ALTER TABLE that completely changes the + partitioning and yet others that don't change anything at all. We start + by checking the partition management variants and then check the general + change patterns. + */ + if (alter_info->flags & (ALTER_ADD_PARTITION + + ALTER_DROP_PARTITION + ALTER_COALESCE_PARTITION + + ALTER_REORGANISE_PARTITION)) + { + partition_info *tab_part_info= table->s->part_info; + if (!tab_part_info) + { + my_error(ER_PARTITION_MGMT_ON_NONPARTITIONED, MYF(0)); + DBUG_RETURN(TRUE); + } + default_engine_type= tab_part_info->default_engine_type; + /* + We are going to manipulate the partition info on the table object + so we need to ensure that the data structure of the table object + is freed by setting version to 0. + */ + table->s->version= 0L; + if (alter_info->flags == ALTER_ADD_PARTITION) + { + /* + We start by moving the new partitions to the list of temporary + partitions. We will then check that the new partitions fit in the + partitioning scheme as currently set-up. + Partitions are always added at the end in ADD PARTITION. + */ + partition_info *alt_part_info= thd->lex->part_info; + uint no_new_partitions= alt_part_info->no_parts; + uint no_orig_partitions= tab_part_info->no_parts; + uint check_total_partitions= no_new_partitions + no_orig_partitions; + uint new_total_partitions= check_total_partitions; + /* + We allow quite a lot of values to be supplied by defaults, however we + must know the number of new partitions in this case. + */ + if (no_new_partitions == 0) + { + my_error(ER_ADD_PARTITION_NO_NEW_PARTITION, MYF(0)); + DBUG_RETURN(TRUE); + } + if (is_sub_partitioned(tab_part_info)) + { + if (alt_part_info->no_subparts == 0) + alt_part_info->no_subparts= tab_part_info->no_subparts; + else if (alt_part_info->no_subparts != tab_part_info->no_subparts) + { + my_error(ER_ADD_PARTITION_SUBPART_ERROR, MYF(0)); + DBUG_RETURN(TRUE); + } + check_total_partitions= new_total_partitions* + alt_part_info->no_subparts; + } + if (check_total_partitions > MAX_PARTITIONS) + { + my_error(ER_TOO_MANY_PARTITIONS_ERROR, MYF(0)); + DBUG_RETURN(TRUE); + } + alt_part_info->part_type= tab_part_info->part_type; + if (set_up_defaults_for_partitioning(alt_part_info, + table->file, + (ulonglong)0ULL, + tab_part_info->no_parts)) + { + DBUG_RETURN(TRUE); + } + /* + Need to concatenate the lists here to make it possible to check the + partition info for correctness using check_partition_info + */ + { + List_iterator<partition_element> alt_it(alt_part_info->partitions); + uint part_count= 0; + do + { + partition_element *part_elem= alt_it++; + tab_part_info->partitions.push_back(part_elem); + tab_part_info->temp_partitions.push_back(part_elem); + } while (++part_count < no_new_partitions); + tab_part_info->no_parts+= no_new_partitions; + } + { + List_iterator<partition_element> tab_it(tab_part_info->partitions); + partition_element *part_elem= tab_it++; + if (is_sub_partitioned(tab_part_info)) + { + List_iterator<partition_element> sub_it(part_elem->subpartitions); + part_elem= sub_it++; + } + if (check_partition_info(tab_part_info, part_elem->engine_type, + table->file, (ulonglong)0ULL)) + { + DBUG_RETURN(TRUE); + } + } + create_info->db_type= DB_TYPE_PARTITION_DB; + thd->lex->part_info= tab_part_info; + if (table->file->alter_table_flags() & HA_ONLINE_ADD_EMPTY_PARTITION && + (tab_part_info->part_type == RANGE_PARTITION || + tab_part_info->part_type == LIST_PARTITION)) + { + /* + For range and list partitions add partition is simply adding a new + empty partition to the table. If the handler support this we will + use the simple method of doing this. In this case we need to break + out the new partitions from the list again and only keep them in the + temporary list. Added partitions are always added at the end. + */ + { + List_iterator<partition_element> tab_it(tab_part_info->partitions); + uint part_count= 0; + do + { + tab_it++; + } while (++part_count < no_orig_partitions); + do + { + tab_it++; + tab_it.remove(); + } while (++part_count < new_total_partitions); + } + tab_part_info->no_parts-= no_new_partitions; + online_add_empty_partition= TRUE; + } + else + { + tab_part_info->temp_partitions.empty(); + } + } + else if (alter_info->flags == ALTER_DROP_PARTITION) + { + /* + Drop a partition from a range partition and list partitioning is + always safe and can be made more or less immediate. It is necessary + however to ensure that the partition to be removed is safely removed + and that REPAIR TABLE can remove the partition if for some reason the + command to drop the partition failed in the middle. + */ + uint part_count= 0; + uint no_parts_dropped= alter_info->partition_names.elements; + uint no_parts_found= 0; + List_iterator<partition_element> part_it(tab_part_info->partitions); + if (!(tab_part_info->part_type == RANGE_PARTITION || + tab_part_info->part_type == LIST_PARTITION)) + { + my_error(ER_ONLY_ON_RANGE_LIST_PARTITION, MYF(0), "DROP"); + DBUG_RETURN(TRUE); + } + if (no_parts_dropped >= tab_part_info->no_parts) + { + my_error(ER_DROP_LAST_PARTITION, MYF(0)); + DBUG_RETURN(TRUE); + } + do + { + partition_element *part_elem= part_it++; + if (is_partition_in_list(part_elem->partition_name, + alter_info->partition_names)) + { + /* + Remove the partition from the list and put it instead in the + list of temporary partitions with a new state. + */ + no_parts_found++; + part_elem->part_state= PART_IS_DROPPED; + } + } while (++part_count < tab_part_info->no_parts); + if (no_parts_found != no_parts_dropped) + { + my_error(ER_DROP_PARTITION_NON_EXISTENT, MYF(0)); + DBUG_RETURN(TRUE); + } + if (!(table->file->alter_table_flags() & HA_ONLINE_DROP_PARTITION)) + { + my_error(ER_DROP_PARTITION_FAILURE, MYF(0)); + DBUG_RETURN(TRUE); + } + if (table->file->is_fk_defined_on_table_or_index(MAX_KEY)) + { + my_error(ER_DROP_PARTITION_WHEN_FK_DEFINED, MYF(0)); + DBUG_RETURN(TRUE); + } + /* + This code needs set-up of structures needed by mysql_create_table + before it is called and thus we only set a boolean variable to be + checked later down in the code when all needed data structures are + prepared. + */ + online_drop_partition= TRUE; + } + else if (alter_info->flags == ALTER_COALESCE_PARTITION) + { + /* + In this version COALESCE PARTITION is implemented by simply removing + a partition from the table and using the normal ALTER TABLE code + and ensuring that copy to a new table occurs. Later on we can optimise + this function for Linear Hash partitions. In that case we can avoid + reorganising the entire table. For normal hash partitions it will + be a complete reorganise anyways so that can only be made on-line + if it still uses a copy table. + */ + uint part_count= 0; + uint no_parts_coalesced= alter_info->no_parts; + uint no_parts_remain= tab_part_info->no_parts - no_parts_coalesced; + List_iterator<partition_element> part_it(tab_part_info->partitions); + if (tab_part_info->part_type != HASH_PARTITION) + { + my_error(ER_COALESCE_ONLY_ON_HASH_PARTITION, MYF(0)); + DBUG_RETURN(TRUE); + } + if (no_parts_coalesced == 0) + { + my_error(ER_COALESCE_PARTITION_NO_PARTITION, MYF(0)); + DBUG_RETURN(TRUE); + } + if (no_parts_coalesced >= tab_part_info->no_parts) + { + my_error(ER_DROP_LAST_PARTITION, MYF(0)); + DBUG_RETURN(TRUE); + } + do + { + part_it++; + if (++part_count > no_parts_remain) + part_it.remove(); + } while (part_count < tab_part_info->no_parts); + tab_part_info->no_parts= no_parts_remain; + } + else if (alter_info->flags == ALTER_REORGANISE_PARTITION) + { + /* + Reorganise partitions takes a number of partitions that are next + to each other (at least for RANGE PARTITIONS) and then uses those + to create a set of new partitions. So data is copied from those + partitions into the new set of partitions. Those new partitions + can have more values in the LIST value specifications or less both + are allowed. The ranges can be different but since they are + changing a set of consecutive partitions they must cover the same + range as those changed from. + This command can be used on RANGE and LIST partitions. + */ + uint no_parts_reorged= alter_info->partition_names.elements; + uint no_parts_new= thd->lex->part_info->partitions.elements; + partition_info *alt_part_info= thd->lex->part_info; + uint check_total_partitions; + if (no_parts_reorged > tab_part_info->no_parts) + { + my_error(ER_REORG_PARTITION_NOT_EXIST, MYF(0)); + DBUG_RETURN(TRUE); + } + if (!(tab_part_info->part_type == RANGE_PARTITION || + tab_part_info->part_type == LIST_PARTITION)) + { + my_error(ER_ONLY_ON_RANGE_LIST_PARTITION, MYF(0), "REORGANISE"); + DBUG_RETURN(TRUE); + } + if (is_partitions_in_table(alt_part_info, tab_part_info)) + { + my_error(ER_SAME_NAME_PARTITION, MYF(0)); + DBUG_RETURN(TRUE); + } + check_total_partitions= tab_part_info->no_parts + no_parts_new; + check_total_partitions-= no_parts_reorged; + if (check_total_partitions > MAX_PARTITIONS) + { + my_error(ER_TOO_MANY_PARTITIONS_ERROR, MYF(0)); + DBUG_RETURN(TRUE); + } + { + List_iterator<partition_element> tab_it(tab_part_info->partitions); + uint part_count= 0; + bool found_first= FALSE, found_last= FALSE; + uint drop_count= 0; + longlong tab_max_range, alt_max_range; + do + { + partition_element *part_elem= tab_it++; + if (is_partition_in_list(part_elem->partition_name, + alter_info->partition_names)) + { + drop_count++; + tab_max_range= part_elem->range_value; + if (!found_first) + { + uint alt_part_count= 0; + found_first= TRUE; + List_iterator<partition_element> alt_it(alt_part_info->partitions); + do + { + partition_element *alt_part_elem= alt_it++; + alt_max_range= alt_part_elem->range_value; + if (alt_part_count == 0) + tab_it.replace(alt_part_elem); + else + tab_it.after(alt_part_elem); + } while (++alt_part_count < no_parts_new); + } + else if (found_last) + { + my_error(ER_CONSECUTIVE_REORG_PARTITIONS, MYF(0)); + DBUG_RETURN(TRUE); + } + else + tab_it.remove(); + } + else + { + if (found_first) + found_last= TRUE; + } + } while (++part_count < tab_part_info->no_parts); + if (drop_count != no_parts_reorged) + { + my_error(ER_DROP_PARTITION_NON_EXISTENT, MYF(0)); + DBUG_RETURN(TRUE); + } + if (tab_part_info->part_type == RANGE_PARTITION && + alt_max_range > tab_max_range) + { + my_error(ER_REORG_OUTSIDE_RANGE, MYF(0)); + DBUG_RETURN(TRUE); + } + } + } + partition_changed= TRUE; + create_info->db_type= DB_TYPE_PARTITION_DB; + thd->lex->part_info= tab_part_info; + if (alter_info->flags == ALTER_ADD_PARTITION || + alter_info->flags == ALTER_REORGANISE_PARTITION) + { + if (check_partition_info(tab_part_info, default_engine_type, + table->file, (ulonglong)0ULL)) + { + DBUG_RETURN(TRUE); + } + } + } + else + { + /* + When thd->lex->part_info has a reference to a partition_info the + ALTER TABLE contained a definition of a partitioning. + + Case I: + If there was a partition before and there is a new one defined. + We use the new partitioning. The new partitioning is already + defined in the correct variable so no work is needed to + accomplish this. + We do however need to update partition_changed to ensure that not + only the frm file is changed in the ALTER TABLE command. + + Case IIa: + There was a partitioning before and there is no new one defined. + Also the user has not specified an explicit engine to use. + + We use the old partitioning also for the new table. We do this + by assigning the partition_info from the table loaded in + open_ltable to the partition_info struct used by mysql_create_table + later in this method. + + Case IIb: + There was a partitioning before and there is no new one defined. + The user has specified an explicit engine to use. + + Since the user has specified an explicit engine to use we override + the old partitioning info and create a new table using the specified + engine. This is the reason for the extra check if old and new engine + is equal. + In this case the partition also is changed. + + Case III: + There was no partitioning before altering the table, there is + partitioning defined in the altered table. Use the new partitioning. + No work needed since the partitioning info is already in the + correct variable. + Also here partition has changed and thus a new table must be + created. + + Case IV: + There was no partitioning before and no partitioning defined. + Obviously no work needed. + */ + if (table->s->part_info) + { + if (!thd->lex->part_info && + create_info->db_type == old_db_type) + thd->lex->part_info= table->s->part_info; + } + if (thd->lex->part_info) + { + /* + Need to cater for engine types that can handle partition without + using the partition handler. + */ + if (thd->lex->part_info != table->s->part_info) + partition_changed= TRUE; + if (create_info->db_type != DB_TYPE_PARTITION_DB) + thd->lex->part_info->default_engine_type= create_info->db_type; + create_info->db_type= DB_TYPE_PARTITION_DB; + } + } +#endif if (check_engine(thd, new_name, &create_info->db_type)) DBUG_RETURN(TRUE); new_db_type= create_info->db_type; @@ -3276,12 +3984,7 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, } if (!error) { - if (mysql_bin_log.is_open()) - { - thd->clear_error(); - Query_log_event qinfo(thd, thd->query, thd->query_length, FALSE, FALSE); - mysql_bin_log.write(&qinfo); - } + write_bin_log(thd, TRUE); if (do_send_ok) send_ok(thd); } @@ -3362,8 +4065,8 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, def_it.remove(); } } - else - { // Use old field value + else // This field was not dropped and not changed, add it to the list + { // for the new table. create_list.push_back(def=new create_field(field,field)); alter_it.rewind(); // Change default if ALTER Alter_column *alter; @@ -3576,17 +4279,122 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, if (table->s->tmp_table) create_info->options|=HA_LEX_CREATE_TMP_TABLE; + set_table_default_charset(thd, create_info, db); + +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (thd->variables.old_alter_table || partition_changed) +#else + if (thd->variables.old_alter_table) +#endif + need_copy_table= 1; + else + need_copy_table= compare_tables(table, &create_list, &key_list, + create_info, alter_info, order_num); + /* better have a negative test here, instead of positive, like alter_info->flags & ALTER_ADD_COLUMN|ALTER_ADD_INDEX|... so that ALTER TABLE won't break when somebody will add new flag */ - need_copy_table= (alter_info->flags & - ~(ALTER_CHANGE_COLUMN_DEFAULT|ALTER_OPTIONS) || - (create_info->used_fields & - ~(HA_CREATE_USED_COMMENT|HA_CREATE_USED_PASSWORD)) || - table->s->tmp_table); - create_info->frm_only= !need_copy_table; + + if (!need_copy_table) + create_info->frm_only= 1; + +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (partition_changed) + { + if (online_drop_partition) + { + /* + Now after all checks and setting state on dropped partitions we can + start the actual dropping of the partitions. + 1) Lock table in TL_WRITE_ONLY to ensure all other accesses on table + are completed and no new ones are started until we have changed + the frm file. + 2) Write the new frm file where state of dropped partitions is + changed to PART_IS_DROPPED + 3) Perform the actual drop of the partition using the handler of the + table. + 4) Write a new frm file of the table where the partitions are dropped + from the table. + + */ + uint old_lock_type; + partition_info *part_info= table->s->part_info; + char path[FN_REFLEN+1]; + uint db_options= 0, key_count, syntax_len; + KEY *key_info_buffer; + char *part_syntax_buf; + + VOID(pthread_mutex_lock(&LOCK_open)); + if (abort_and_upgrade_lock(thd, table, db, table_name, &old_lock_type)) + { + DBUG_RETURN(TRUE); + } + VOID(pthread_mutex_unlock(&LOCK_open)); + mysql_prepare_table(thd, create_info, &create_list, + &key_list, /*tmp_table*/ 0, &db_options, + table->file, &key_info_buffer, &key_count, + /*select_field_count*/ 0); + if (!(part_syntax_buf= generate_partition_syntax(part_info, + &syntax_len, + TRUE,TRUE))) + { + DBUG_RETURN(TRUE); + } + part_info->part_info_string= part_syntax_buf; + part_info->part_info_len= syntax_len; + build_table_path(path, sizeof(path), db, table_name, reg_ext); + if (mysql_create_frm(thd, path, db, table_name, create_info, + create_list, key_count, key_info_buffer, + table->file)) + { + DBUG_RETURN(TRUE); + } + thd->lex->part_info= part_info; + build_table_path(path, sizeof(path), db, table_name, ""); + if (table->file->drop_partitions(path)) + { + DBUG_RETURN(TRUE); + } + { + List_iterator<partition_element> part_it(part_info->partitions); + uint i= 0, remove_count= 0; + do + { + partition_element *part_elem= part_it++; + if (is_partition_in_list(part_elem->partition_name, + alter_info->partition_names)) + { + part_it.remove(); + remove_count++; + } + } while (++i < part_info->no_parts); + part_info->no_parts-= remove_count; + } + if (!(part_syntax_buf= generate_partition_syntax(part_info, + &syntax_len, + TRUE,TRUE))) + { + DBUG_RETURN(TRUE); + } + part_info->part_info_string= part_syntax_buf; + part_info->part_info_len= syntax_len; + build_table_path(path, sizeof(path), db, table_name, reg_ext); + if (mysql_create_frm(thd, path, db, table_name, create_info, + create_list, key_count, key_info_buffer, + table->file) || + table->file->create_handler_files(path)) + { + DBUG_RETURN(TRUE); + } + thd->proc_info="end"; + write_bin_log(thd, FALSE); + send_ok(thd); + DBUG_RETURN(FALSE); + } + } +#endif /* Handling of symlinked tables: @@ -3714,12 +4522,7 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, my_free((gptr) new_table,MYF(0)); goto err; } - if (mysql_bin_log.is_open()) - { - thd->clear_error(); - Query_log_event qinfo(thd, thd->query, thd->query_length, FALSE, FALSE); - mysql_bin_log.write(&qinfo); - } + write_bin_log(thd, TRUE); goto end_temporary; } @@ -3850,16 +4653,14 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, goto err; } thd->proc_info="end"; - if (mysql_bin_log.is_open()) - { - thd->clear_error(); - Query_log_event qinfo(thd, thd->query, thd->query_length, FALSE, FALSE); - mysql_bin_log.write(&qinfo); - } + write_bin_log(thd, TRUE); VOID(pthread_cond_broadcast(&COND_refresh)); VOID(pthread_mutex_unlock(&LOCK_open)); -#ifdef HAVE_BERKELEY_DB - if (old_db_type == DB_TYPE_BERKELEY_DB) + /* + TODO RONM: This problem needs to handled for Berkeley DB partitions + as well + */ + if (ha_check_storage_engine_flag(old_db_type,HTON_FLUSH_AFTER_RENAME)) { /* For the alter table to be properly flushed to the logs, we @@ -3875,11 +4676,10 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, my_free((char*) table, MYF(0)); } else - sql_print_warning("Could not open BDB table %s.%s after rename\n", + sql_print_warning("Could not open table %s.%s after rename\n", new_db,table_name); - (void) berkeley_flush_logs(); + ha_flush_logs(old_db_type); } -#endif table_list->table=0; // For query cache query_cache_invalidate3(thd, table_list, 0); @@ -3895,7 +4695,7 @@ end_temporary: err: DBUG_RETURN(TRUE); } - +/* mysql_alter_table */ static int copy_data_between_tables(TABLE *from,TABLE *to, @@ -4001,7 +4801,8 @@ copy_data_between_tables(TABLE *from,TABLE *to, this function does not set field->query_id in the columns to the current query id */ - from->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS); + to->file->ha_set_all_bits_in_write_set(); + from->file->ha_retrieve_all_cols(); init_read_record(&info, thd, from, (SQL_SELECT *) 0, 1,1); if (ignore || handle_duplicates == DUP_REPLACE) @@ -4106,7 +4907,7 @@ bool mysql_recreate_table(THD *thd, TABLE_LIST *table_list, create_info.row_type=ROW_TYPE_NOT_USED; create_info.default_table_charset=default_charset_info; /* Force alter table to recreate table */ - lex->alter_info.flags= ALTER_CHANGE_COLUMN; + lex->alter_info.flags= (ALTER_CHANGE_COLUMN | ALTER_RECREATE); DBUG_RETURN(mysql_alter_table(thd, NullS, NullS, &create_info, table_list, lex->create_list, lex->key_list, 0, (ORDER *) 0, @@ -4165,10 +4966,11 @@ bool mysql_checksum_table(THD *thd, TABLE_LIST *tables, HA_CHECK_OPT *check_opt) ha_checksum crc= 0; uchar null_mask=256 - (1 << t->s->last_null_bit_pos); - /* InnoDB must be told explicitly to retrieve all columns, because - this function does not set field->query_id in the columns to the - current query id */ - t->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS); + /* + Set all bits in read set and inform InnoDB that we are reading all + fields + */ + t->file->ha_retrieve_all_cols(); if (t->file->ha_rnd_init(1)) protocol->store_null(); diff --git a/sql/sql_udf.cc b/sql/sql_udf.cc index ba3c598a784..2067f042133 100644 --- a/sql/sql_udf.cc +++ b/sql/sql_udf.cc @@ -38,36 +38,10 @@ #ifdef HAVE_DLOPEN extern "C" { -#if defined(__WIN__) - void* dlsym(void* lib,const char* name) - { - return GetProcAddress((HMODULE)lib,name); - } - void* dlopen(const char* libname,int unused) - { - return LoadLibraryEx(libname,NULL,0); - } - void dlclose(void* lib) - { - FreeLibrary((HMODULE)lib); - } - -#elif !defined(OS2) -#include <dlfcn.h> -#endif - #include <stdarg.h> #include <hash.h> } -#ifndef RTLD_NOW -#define RTLD_NOW 1 // For FreeBSD 2.2.2 -#endif - -#ifndef HAVE_DLERROR -#define dlerror() "" -#endif - static bool initialized = 0; static MEM_ROOT mem; static HASH udf_hash; @@ -194,9 +168,8 @@ void udf_init() This is done to ensure that only approved dll from the system directories are used (to make this even remotely secure). */ - if (strchr(dl_name, '/') || - IF_WIN(strchr(dl_name, '\\'),0) || - strlen(name.str) > NAME_LEN) + if (my_strchr(files_charset_info, dl_name, dl_name + strlen(dl_name), FN_LIBCHAR) || + strlen(name.str) > NAME_LEN) { sql_print_error("Invalid row in mysql.func table for function '%.64s'", name.str); @@ -214,10 +187,13 @@ void udf_init() void *dl = find_udf_dl(tmp->dl); if (dl == NULL) { - if (!(dl = dlopen(tmp->dl, RTLD_NOW))) + char dlpath[FN_REFLEN]; + strxnmov(dlpath, sizeof(dlpath) - 1, opt_plugin_dir, "/", tmp->dl, + NullS); + if (!(dl= dlopen(dlpath, RTLD_NOW))) { /* Print warning to log */ - sql_print_error(ER(ER_CANT_OPEN_LIBRARY), tmp->dl,errno,dlerror()); + sql_print_error(ER(ER_CANT_OPEN_LIBRARY), dlpath, errno, dlerror()); /* Keep the udf in the hash so that we can remove it later */ continue; } @@ -412,7 +388,7 @@ int mysql_create_function(THD *thd,udf_func *udf) This is done to ensure that only approved dll from the system directories are used (to make this even remotely secure). */ - if (strchr(udf->dl, '/') || IF_WIN(strchr(udf->dl, '\\'),0)) + if (my_strchr(files_charset_info, udf->dl, udf->dl + strlen(udf->dl), FN_LIBCHAR)) { my_message(ER_UDF_NO_PATHS, ER(ER_UDF_NO_PATHS), MYF(0)); DBUG_RETURN(1); @@ -431,12 +407,14 @@ int mysql_create_function(THD *thd,udf_func *udf) } if (!(dl = find_udf_dl(udf->dl))) { - if (!(dl = dlopen(udf->dl, RTLD_NOW))) + char dlpath[FN_REFLEN]; + strxnmov(dlpath, sizeof(dlpath) - 1, opt_plugin_dir, "/", udf->dl, NullS); + if (!(dl = dlopen(dlpath, RTLD_NOW))) { DBUG_PRINT("error",("dlopen of %s failed, error: %d (%s)", - udf->dl,errno,dlerror())); + dlpath, errno, dlerror())); my_error(ER_CANT_OPEN_LIBRARY, MYF(0), - udf->dl, errno, dlerror()); + dlpath, errno, dlerror()); goto err; } new_dl=1; @@ -528,7 +506,7 @@ int mysql_drop_function(THD *thd,const LEX_STRING *udf_name) if (!(table = open_ltable(thd,&tables,TL_WRITE))) goto err; table->field[0]->store(udf_name->str, udf_name->length, system_charset_info); - table->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS); + table->file->ha_retrieve_all_cols(); if (!table->file->index_read_idx(table->record[0], 0, (byte*) table->field[0]->ptr, table->key_info[0].key_length, diff --git a/sql/sql_update.cc b/sql/sql_update.cc index 039aa0f71fa..f6834117d24 100644 --- a/sql/sql_update.cc +++ b/sql/sql_update.cc @@ -119,10 +119,10 @@ int mysql_update(THD *thd, { bool using_limit= limit != HA_POS_ERROR; bool safe_update= thd->options & OPTION_SAFE_UPDATES; - bool used_key_is_modified, transactional_table; + bool used_key_is_modified, transactional_table, will_batch; int res; - int error; - uint used_index= MAX_KEY; + int error, loc_error; + uint used_index= MAX_KEY, dup_key_found; bool need_sort= TRUE; #ifndef NO_EMBEDDED_ACCESS_CHECKS uint want_privilege; @@ -197,7 +197,11 @@ int mysql_update(THD *thd, table_list->grant.want_privilege= table->grant.want_privilege= want_privilege; table_list->register_want_access(want_privilege); #endif - if (setup_fields_with_no_wrap(thd, 0, fields, 1, 0, 0)) + /* + Indicate that the set of fields is to be updated by passing 2 for + set_query_id. + */ + if (setup_fields_with_no_wrap(thd, 0, fields, 2, 0, 0)) DBUG_RETURN(1); /* purecov: inspected */ if (table_list->view && check_fields(thd, fields)) { @@ -214,7 +218,10 @@ int mysql_update(THD *thd, if (table->timestamp_field->query_id == thd->query_id) table->timestamp_field_type= TIMESTAMP_NO_AUTO_SET; else + { table->timestamp_field->query_id=timestamp_query_id; + table->file->ha_set_bit_in_write_set(table->timestamp_field->fieldnr); + } } #ifndef NO_EMBEDDED_ACCESS_CHECKS @@ -284,13 +291,18 @@ int mysql_update(THD *thd, used_key_is_modified= check_if_key_used(table, used_index, fields); } +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (used_key_is_modified || order || + partition_key_modified(table, fields)) +#else if (used_key_is_modified || order) +#endif { /* We can't update table directly; We must first search after all matching rows before updating the table! */ - table->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS); + table->file->ha_retrieve_all_cols(); if (used_index < MAX_KEY && old_used_keys.is_set(used_index)) { table->key_read=1; @@ -422,7 +434,7 @@ int mysql_update(THD *thd, (thd->variables.sql_mode & (MODE_STRICT_TRANS_TABLES | MODE_STRICT_ALL_TABLES))); - + will_batch= !table->file->start_bulk_update(); while (!(error=info.read_record(&info)) && !thd->killed) { if (!(select && select->skip_record())) @@ -449,8 +461,47 @@ int mysql_update(THD *thd, break; } } - if (!(error=table->file->update_row((byte*) table->record[1], - (byte*) table->record[0]))) + if (will_batch) + { + /* + Typically a batched handler can execute the batched jobs when: + 1) When specifically told to do so + 2) When it is not a good idea to batch anymore + 3) When it is necessary to send batch for other reasons + (One such reason is when READ's must be performed) + + 1) is covered by exec_bulk_update calls. + 2) and 3) is handled by the bulk_update_row method. + + bulk_update_row can execute the updates including the one + defined in the bulk_update_row or not including the row + in the call. This is up to the handler implementation and can + vary from call to call. + + The dup_key_found reports the number of duplicate keys found + in those updates actually executed. It only reports those if + the extra call with HA_EXTRA_IGNORE_DUP_KEY have been issued. + If this hasn't been issued it returns an error code and can + ignore this number. Thus any handler that implements batching + for UPDATE IGNORE must also handle this extra call properly. + + If a duplicate key is found on the record included in this + call then it should be included in the count of dup_key_found + and error should be set to 0 (only if these errors are ignored). + */ + error= table->file->bulk_update_row(table->record[1], + table->record[0], + &dup_key_found); + limit+= dup_key_found; + updated-= dup_key_found; + } + else + { + /* Non-batched update */ + error= table->file->update_row((byte*) table->record[1], + (byte*) table->record[0]); + } + if (!error) { updated++; thd->no_trans_update= !transactional_table; @@ -474,20 +525,74 @@ int mysql_update(THD *thd, if (!--limit && using_limit) { - error= -1; // Simulate end of file - break; + /* + We have reached end-of-file in most common situations where no + batching has occurred and if batching was supposed to occur but + no updates were made and finally when the batch execution was + performed without error and without finding any duplicate keys. + If the batched updates were performed with errors we need to + check and if no error but duplicate key's found we need to + continue since those are not counted for in limit. + */ + if (will_batch && + ((error= table->file->exec_bulk_update(&dup_key_found)) || + !dup_key_found)) + { + if (error) + { + /* + The handler should not report error of duplicate keys if they + are ignored. This is a requirement on batching handlers. + */ + table->file->print_error(error,MYF(0)); + error= 1; + break; + } + /* + Either an error was found and we are ignoring errors or there + were duplicate keys found. In both cases we need to correct + the counters and continue the loop. + */ + limit= dup_key_found; //limit is 0 when we get here so need to + + updated-= dup_key_found; + } + else + { + error= -1; // Simulate end of file + break; + } } } else table->file->unlock_row(); thd->row_count++; } + dup_key_found= 0; if (thd->killed && !error) error= 1; // Aborted + else if (will_batch && + (loc_error= table->file->exec_bulk_update(&dup_key_found))) + /* + An error has occurred when a batched update was performed and returned + an error indication. It cannot be an allowed duplicate key error since + we require the batching handler to treat this as a normal behavior. + + Otherwise we simply remove the number of duplicate keys records found + in the batched update. + */ + { + thd->fatal_error(); + table->file->print_error(loc_error,MYF(0)); + error= 1; + } + else + updated-= dup_key_found; + if (will_batch) + table->file->end_bulk_update(); end_read_record(&info); free_io_cache(table); // If ORDER BY delete select; - thd->proc_info="end"; + thd->proc_info= "end"; VOID(table->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY)); /* @@ -697,7 +802,7 @@ reopen_tables: &lex->select_lex.leaf_tables, FALSE)) DBUG_RETURN(TRUE); - if (setup_fields_with_no_wrap(thd, 0, *fields, 1, 0, 0)) + if (setup_fields_with_no_wrap(thd, 0, *fields, 2, 0, 0)) DBUG_RETURN(TRUE); for (tl= table_list; tl ; tl= tl->next_local) diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index 671e2b1740d..c79ad97be4e 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -73,6 +73,7 @@ inline Item *is_truth_value(Item *A, bool v1, bool v2) int num; ulong ulong_num; ulonglong ulonglong_number; + longlong longlong_number; LEX_STRING lex_str; LEX_STRING *lex_str_ptr; LEX_SYMBOL symbol; @@ -136,6 +137,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token ASCII_SYM %token ASENSITIVE_SYM %token ATAN +%token AUTHORS_SYM %token AUTO_INC %token AVG_ROW_LENGTH %token AVG_SYM @@ -335,6 +337,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token INSENSITIVE_SYM %token INSERT %token INSERT_METHOD +%token INSTALL_SYM %token INTERVAL_SYM %token INTO %token INT_SYM @@ -358,13 +361,16 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token LEAVES %token LEAVE_SYM %token LEFT +%token LESS_SYM %token LEVEL_SYM %token LEX_HOSTNAME %token LIKE %token LIMIT +%token LINEAR_SYM %token LINEFROMTEXT %token LINES %token LINESTRING +%token LIST_SYM %token LOAD %token LOCAL_SYM %token LOCATE @@ -404,6 +410,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token MAX_SYM %token MAX_UPDATES_PER_HOUR %token MAX_USER_CONNECTIONS_SYM +%token MAX_VALUE_SYM %token MEDIUMBLOB %token MEDIUMINT %token MEDIUMTEXT @@ -438,6 +445,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token NE %token NEW_SYM %token NEXT_SYM +%token NODEGROUP_SYM %token NONE_SYM %token NOT2_SYM %token NOT_SYM @@ -465,10 +473,14 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token OUTFILE %token OUT_SYM %token PACK_KEYS_SYM +%token PARSER_SYM %token PARTIAL +%token PARTITION_SYM +%token PARTITIONS_SYM %token PASSWORD %token PARAM_MARKER %token PHASE_SYM +%token PLUGIN_SYM %token POINTFROMTEXT %token POINT_SYM %token POLYFROMTEXT @@ -492,6 +504,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token RAID_STRIPED_SYM %token RAID_TYPE %token RAND +%token RANGE_SYM %token READS_SYM %token READ_SYM %token REAL @@ -505,6 +518,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token RELEASE_SYM %token RELOAD %token RENAME +%token REORGANISE_SYM %token REPAIR %token REPEATABLE_SYM %token REPEAT_SYM @@ -551,6 +565,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token SLAVE %token SMALLINT %token SNAPSHOT_SYM +%token SONAME_SYM %token SOUNDS_SYM %token SPATIAL_SYM %token SPECIFIC_SYM @@ -577,6 +592,8 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token STRING_SYM %token SUBDATE_SYM %token SUBJECT_SYM +%token SUBPARTITION_SYM +%token SUBPARTITIONS_SYM %token SUBSTRING %token SUBSTRING_INDEX %token SUM_SYM @@ -598,6 +615,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token TINYBLOB %token TINYINT %token TINYTEXT +%token THAN_SYM %token TO_SYM %token TRAILING %token TRANSACTION_SYM @@ -609,24 +627,21 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token TYPES_SYM %token TYPE_SYM %token UDF_RETURNS_SYM -%token UDF_SONAME_SYM %token ULONGLONG_NUM %token UNCOMMITTED_SYM %token UNDEFINED_SYM %token UNDERSCORE_CHARSET %token UNDO_SYM %token UNICODE_SYM +%token UNINSTALL_SYM %token UNION_SYM %token UNIQUE_SYM %token UNIQUE_USERS %token UNIX_TIMESTAMP %token UNKNOWN_SYM %token UNLOCK_SYM -%token UNLOCK_SYM %token UNSIGNED %token UNTIL_SYM -%token UNTIL_SYM -%token UPDATE_SYM %token UPDATE_SYM %token USAGE %token USER @@ -687,7 +702,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); sp_opt_label BIN_NUM label_ident %type <lex_str_ptr> - opt_table_alias + opt_table_alias opt_fulltext_parser %type <table> table_ident table_ident_nodb references xid @@ -714,6 +729,9 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %type <ulonglong_number> ulonglong_num +%type <longlong_number> + part_bit_expr + %type <lock_type> replace_lock_option opt_low_priority insert_lock_option load_data_lock @@ -730,6 +748,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); sp_opt_default simple_ident_nospvar simple_ident_q field_or_var limit_option + part_func_expr %type <item_num> NUM_literal @@ -834,6 +853,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); definer view_replace_or_algorithm view_replace view_algorithm_opt view_algorithm view_or_trigger_tail view_suid view_tail view_list_opt view_list view_select view_check_option trigger_tail + install uninstall partition_entry END_OF_INPUT %type <NONE> call sp_proc_stmts sp_proc_stmts1 sp_proc_stmt @@ -894,11 +914,13 @@ statement: | handler | help | insert + | install | kill | load | lock | optimize | keycache + | partition_entry | preload | prepare | purge @@ -917,6 +939,7 @@ statement: | slave | start | truncate + | uninstall | unlock | update | use @@ -1169,11 +1192,15 @@ create: lex->col_list.empty(); lex->change=NullS; } - '(' key_list ')' + '(' key_list ')' opt_fulltext_parser { LEX *lex=Lex; - - lex->key_list.push_back(new Key($2,$4.str, $5, 0, lex->col_list)); + if ($2 != Key::FULLTEXT && $12) + { + yyerror(ER(ER_SYNTAX_ERROR)); + YYABORT; + } + lex->key_list.push_back(new Key($2,$4.str,$5,0,lex->col_list,$12)); lex->col_list.empty(); } | CREATE DATABASE opt_if_not_exists ident @@ -1298,7 +1325,7 @@ sp_name: ; create_function_tail: - RETURNS_SYM udf_type UDF_SONAME_SYM TEXT_STRING_sys + RETURNS_SYM udf_type SONAME_SYM TEXT_STRING_sys { LEX *lex=Lex; lex->sql_command = SQLCOM_CREATE_FUNCTION; @@ -2472,7 +2499,9 @@ trg_event: create2: '(' create2a {} - | opt_create_table_options create3 {} + | opt_create_table_options + opt_partitioning {} + create3 {} | LIKE table_ident { LEX *lex=Lex; @@ -2488,8 +2517,12 @@ create2: ; create2a: - field_list ')' opt_create_table_options create3 {} - | create_select ')' { Select->set_braces(1);} union_opt {} + field_list ')' opt_create_table_options + opt_partitioning {} + create3 {} + | opt_partitioning {} + create_select ')' + { Select->set_braces(1);} union_opt {} ; create3: @@ -2500,6 +2533,484 @@ create3: { Select->set_braces(1);} union_opt {} ; +/* + This part of the parser is about handling of the partition information. + + It's first version was written by Mikael Ronström with lots of answers to + questions provided by Antony Curtis. + + The partition grammar can be called from three places. + 1) CREATE TABLE ... PARTITION .. + 2) ALTER TABLE table_name PARTITION ... + 3) PARTITION ... + + The first place is called when a new table is created from a MySQL client. + The second place is called when a table is altered with the ALTER TABLE + command from a MySQL client. + The third place is called when opening an frm file and finding partition + info in the .frm file. It is necessary to avoid allowing PARTITION to be + an allowed entry point for SQL client queries. This is arranged by setting + some state variables before arriving here. + + To be able to handle errors we will only set error code in this code + and handle the error condition in the function calling the parser. This + is necessary to ensure we can also handle errors when calling the parser + from the openfrm function. +*/ +opt_partitioning: + /* empty */ {} + | partitioning + ; + +partitioning: + PARTITION_SYM + { + LEX *lex= Lex; + lex->part_info= new partition_info(); + if (!lex->part_info) + { + my_error(ER_OUTOFMEMORY, MYF(0), sizeof(partition_info)); + YYABORT; + } + } + partition + ; + +partition_entry: + PARTITION_SYM + { + LEX *lex= Lex; + if (lex->part_info) + { + /* + We enter here when opening the frm file to translate + partition info string into part_info data structure. + */ + lex->part_info= new partition_info(); + if (!lex->part_info) + { + my_error(ER_OUTOFMEMORY, MYF(0), sizeof(partition_info)); + YYABORT; + } + } + else + { + yyerror(ER(ER_PARTITION_ENTRY_ERROR)); + YYABORT; + } + } + partition {} + ; + +partition: + BY part_type_def opt_no_parts {} opt_sub_part {} part_defs + ; + +part_type_def: + opt_linear KEY_SYM '(' part_field_list ')' + { + LEX *lex= Lex; + lex->part_info->list_of_part_fields= TRUE; + lex->part_info->part_type= HASH_PARTITION; + } + | opt_linear HASH_SYM + { Lex->part_info->part_type= HASH_PARTITION; } + part_func {} + | RANGE_SYM + { Lex->part_info->part_type= RANGE_PARTITION; } + part_func {} + | LIST_SYM + { Lex->part_info->part_type= LIST_PARTITION; } + part_func {} + ; + +opt_linear: + /* empty */ {} + | LINEAR_SYM + { Lex->part_info->linear_hash_ind= TRUE;} + ; + +part_field_list: + part_field_item {} + | part_field_list ',' part_field_item {} + ; + +part_field_item: + ident + { + Lex->part_info->part_field_list.push_back($1.str); + } + ; + +part_func: + '(' remember_name part_func_expr remember_end ')' + { + LEX *lex= Lex; + uint expr_len= (uint)($4 - $2) - 1; + lex->part_info->list_of_part_fields= FALSE; + lex->part_info->part_expr= $3; + lex->part_info->part_func_string= $2+1; + lex->part_info->part_func_len= expr_len; + } + ; + +sub_part_func: + '(' remember_name part_func_expr remember_end ')' + { + LEX *lex= Lex; + uint expr_len= (uint)($4 - $2) - 1; + lex->part_info->list_of_subpart_fields= FALSE; + lex->part_info->subpart_expr= $3; + lex->part_info->subpart_func_string= $2+1; + lex->part_info->subpart_func_len= expr_len; + } + ; + + +opt_no_parts: + /* empty */ {} + | PARTITIONS_SYM ulong_num + { + uint no_parts= $2; + if (no_parts == 0) + { + my_error(ER_NO_PARTS_ERROR, MYF(0), "partitions"); + YYABORT; + } + Lex->part_info->no_parts= no_parts; + } + ; + +opt_sub_part: + /* empty */ {} + | SUBPARTITION_SYM BY opt_linear HASH_SYM sub_part_func + { Lex->part_info->subpart_type= HASH_PARTITION; } + opt_no_subparts {} + | SUBPARTITION_SYM BY opt_linear KEY_SYM + '(' sub_part_field_list ')' + { + LEX *lex= Lex; + lex->part_info->subpart_type= HASH_PARTITION; + lex->part_info->list_of_subpart_fields= TRUE; + } + opt_no_subparts {} + ; + +sub_part_field_list: + sub_part_field_item {} + | sub_part_field_list ',' sub_part_field_item {} + ; + +sub_part_field_item: + ident + { Lex->part_info->subpart_field_list.push_back($1.str); } + ; + +part_func_expr: + bit_expr + { + LEX *lex= Lex; + bool not_corr_func; + not_corr_func= !lex->safe_to_cache_query; + lex->safe_to_cache_query= 1; + if (not_corr_func) + { + yyerror(ER(ER_CONST_EXPR_IN_PARTITION_FUNC_ERROR)); + YYABORT; + } + $$=$1; + } + ; + +opt_no_subparts: + /* empty */ {} + | SUBPARTITIONS_SYM ulong_num + { + uint no_parts= $2; + if (no_parts == 0) + { + my_error(ER_NO_PARTS_ERROR, MYF(0), "subpartitions"); + YYABORT; + } + Lex->part_info->no_subparts= no_parts; + } + ; + +part_defs: + /* empty */ + {} + | '(' part_def_list ')' + { + LEX *lex= Lex; + partition_info *part_info= lex->part_info; + if (part_info->no_parts != 0) + { + if (part_info->no_parts != + part_info->count_curr_parts) + { + yyerror(ER(ER_PARTITION_WRONG_NO_PART_ERROR)); + YYABORT; + } + } + else if (part_info->count_curr_parts > 0) + { + part_info->no_parts= part_info->count_curr_parts; + } + part_info->count_curr_subparts= 0; + part_info->count_curr_parts= 0; + } + ; + +part_def_list: + part_definition {} + | part_def_list ',' part_definition {} + ; + +part_definition: + PARTITION_SYM + { + LEX *lex= Lex; + partition_info *part_info= lex->part_info; + partition_element *p_elem= new partition_element(); + if (!p_elem) + { + my_error(ER_OUTOFMEMORY, MYF(0), sizeof(partition_element)); + YYABORT; + } + part_info->curr_part_elem= p_elem; + part_info->current_partition= p_elem; + part_info->use_default_partitions= FALSE; + part_info->partitions.push_back(p_elem); + p_elem->engine_type= DB_TYPE_UNKNOWN; + part_info->count_curr_parts++; + } + part_name {} + opt_part_values {} + opt_part_options {} + opt_sub_partition {} + ; + +part_name: + ident_or_text + { Lex->part_info->curr_part_elem->partition_name= $1.str; } + ; + +opt_part_values: + /* empty */ + { + LEX *lex= Lex; + if (!is_partition_management(lex)) + { + if (lex->part_info->part_type == RANGE_PARTITION) + { + my_error(ER_PARTITION_REQUIRES_VALUES_ERROR, MYF(0), + "RANGE", "LESS THAN"); + YYABORT; + } + if (lex->part_info->part_type == LIST_PARTITION) + { + my_error(ER_PARTITION_REQUIRES_VALUES_ERROR, MYF(0), + "LIST", "IN"); + YYABORT; + } + } + } + | VALUES LESS_SYM THAN_SYM part_func_max + { + LEX *lex= Lex; + if (!is_partition_management(lex)) + { + if (Lex->part_info->part_type != RANGE_PARTITION) + { + my_error(ER_PARTITION_WRONG_VALUES_ERROR, MYF(0), + "RANGE", "LESS THAN"); + YYABORT; + } + } + } + | VALUES IN_SYM '(' part_list_func ')' + { + LEX *lex= Lex; + if (!is_partition_management(lex)) + { + if (Lex->part_info->part_type != LIST_PARTITION) + { + my_error(ER_PARTITION_WRONG_VALUES_ERROR, MYF(0), + "LIST", "IN"); + YYABORT; + } + } + } + ; + +part_func_max: + MAX_VALUE_SYM + { + LEX *lex= Lex; + if (lex->part_info->defined_max_value) + { + yyerror(ER(ER_PARTITION_MAXVALUE_ERROR)); + YYABORT; + } + lex->part_info->defined_max_value= TRUE; + lex->part_info->curr_part_elem->range_value= LONGLONG_MAX; + } + | part_range_func + { + if (Lex->part_info->defined_max_value) + { + yyerror(ER(ER_PARTITION_MAXVALUE_ERROR)); + YYABORT; + } + } + ; + +part_range_func: + '(' part_bit_expr ')' + { + Lex->part_info->curr_part_elem->range_value= $2; + } + ; + +part_list_func: + part_list_item {} + | part_list_func ',' part_list_item {} + ; + +part_list_item: + part_bit_expr + { + longlong *value_ptr; + if (!(value_ptr= (longlong*)sql_alloc(sizeof(longlong)))) + { + my_error(ER_OUTOFMEMORY, MYF(0), sizeof(longlong)); + YYABORT; + } + *value_ptr= $1; + Lex->part_info->curr_part_elem->list_val_list.push_back(value_ptr); + } + ; + +part_bit_expr: + bit_expr + { + Item *part_expr= $1; + bool not_corr_func; + LEX *lex= Lex; + longlong item_value; + Name_resolution_context *context= &lex->current_select->context; + TABLE_LIST *save_list= context->table_list; + + context->table_list= 0; + part_expr->fix_fields(YYTHD, (Item**)0); + context->table_list= save_list; + not_corr_func= !part_expr->const_item() || + !lex->safe_to_cache_query; + if (not_corr_func) + { + yyerror(ER(ER_NO_CONST_EXPR_IN_RANGE_OR_LIST_ERROR)); + YYABORT; + } + if (part_expr->result_type() != INT_RESULT) + { + yyerror(ER(ER_INCONSISTENT_TYPE_OF_FUNCTIONS_ERROR)); + YYABORT; + } + item_value= part_expr->val_int(); + $$= item_value; + } + ; + +opt_sub_partition: + /* empty */ {} + | '(' sub_part_list ')' + { + LEX *lex= Lex; + partition_info *part_info= lex->part_info; + if (part_info->no_subparts != 0) + { + if (part_info->no_subparts != + part_info->count_curr_subparts) + { + yyerror(ER(ER_PARTITION_WRONG_NO_SUBPART_ERROR)); + YYABORT; + } + } + else if (part_info->count_curr_subparts > 0) + { + part_info->no_subparts= part_info->count_curr_subparts; + } + part_info->count_curr_subparts= 0; + } + ; + +sub_part_list: + sub_part_definition {} + | sub_part_list ',' sub_part_definition {} + ; + +sub_part_definition: + SUBPARTITION_SYM + { + LEX *lex= Lex; + partition_info *part_info= lex->part_info; + partition_element *p_elem= new partition_element(); + if (!p_elem) + { + my_error(ER_OUTOFMEMORY, MYF(0), sizeof(partition_element)); + YYABORT; + } + part_info->curr_part_elem= p_elem; + part_info->current_partition->subpartitions.push_back(p_elem); + part_info->use_default_subpartitions= FALSE; + part_info->count_curr_subparts++; + p_elem->engine_type= DB_TYPE_UNKNOWN; + } + sub_name opt_part_options {} + ; + +sub_name: + ident_or_text + { Lex->part_info->curr_part_elem->partition_name= $1.str; } + ; + +opt_part_options: + /* empty */ {} + | opt_part_option_list {} + ; + +opt_part_option_list: + opt_part_option_list opt_part_option {} + | opt_part_option {} + ; + +opt_part_option: + TABLESPACE opt_equal ident_or_text + { Lex->part_info->curr_part_elem->tablespace_name= $3.str; } + | opt_storage ENGINE_SYM opt_equal storage_engines + { + LEX *lex= Lex; + lex->part_info->curr_part_elem->engine_type= $4; + lex->part_info->default_engine_type= $4; + } + | NODEGROUP_SYM opt_equal ulong_num + { Lex->part_info->curr_part_elem->nodegroup_id= $3; } + | MAX_ROWS opt_equal ulonglong_num + { Lex->part_info->curr_part_elem->part_max_rows= $3; } + | MIN_ROWS opt_equal ulonglong_num + { Lex->part_info->curr_part_elem->part_min_rows= $3; } + | DATA_SYM DIRECTORY_SYM opt_equal TEXT_STRING_sys + { Lex->part_info->curr_part_elem->data_file_name= $4.str; } + | INDEX_SYM DIRECTORY_SYM opt_equal TEXT_STRING_sys + { Lex->part_info->curr_part_elem->index_file_name= $4.str; } + | COMMENT_SYM opt_equal TEXT_STRING_sys + { Lex->part_info->curr_part_elem->part_comment= $3.str; } + ; + +/* + End of partition parser part +*/ + create_select: SELECT_SYM { @@ -2683,7 +3194,9 @@ storage_engines: ident_or_text { $$ = ha_resolve_by_name($1.str,$1.length); - if ($$ == DB_TYPE_UNKNOWN) { + if ($$ == DB_TYPE_UNKNOWN && + test(YYTHD->variables.sql_mode & MODE_NO_ENGINE_SUBSTITUTION)) + { my_error(ER_UNKNOWN_STORAGE_ENGINE, MYF(0), $1.str); YYABORT; } @@ -2740,10 +3253,15 @@ column_def: ; key_def: - key_type opt_ident key_alg '(' key_list ')' + key_type opt_ident key_alg '(' key_list ')' opt_fulltext_parser { LEX *lex=Lex; - lex->key_list.push_back(new Key($1,$2, $3, 0, lex->col_list)); + if ($1 != Key::FULLTEXT && $7) + { + yyerror(ER(ER_SYNTAX_ERROR)); + YYABORT; + } + lex->key_list.push_back(new Key($1,$2, $3, 0, lex->col_list, $7)); lex->col_list.empty(); /* Alloced by sql_alloc */ } | opt_constraint constraint_key_type opt_ident key_alg '(' key_list ')' @@ -2778,6 +3296,20 @@ key_def: } ; +opt_fulltext_parser: + /* empty */ { $$= (LEX_STRING *)0; } + | WITH PARSER_SYM IDENT_sys + { + if (plugin_is_ready(&$3, MYSQL_FTPARSER_PLUGIN)) + $$= (LEX_STRING *)sql_memdup(&$3, sizeof(LEX_STRING)); + else + { + my_error(ER_FUNCTION_NOT_DEFINED, MYF(0), $3.str); + YYABORT; + } + } + ; + opt_check_constraint: /* empty */ | check_constraint @@ -3319,7 +3851,7 @@ alter: lex->alter_info.reset(); lex->alter_info.flags= 0; } - alter_list + alter_commands {} | ALTER DATABASE ident_or_empty { @@ -3387,11 +3919,102 @@ ident_or_empty: /* empty */ { $$= 0; } | ident { $$= $1.str; }; -alter_list: +alter_commands: | DISCARD TABLESPACE { Lex->alter_info.tablespace_op= DISCARD_TABLESPACE; } | IMPORT TABLESPACE { Lex->alter_info.tablespace_op= IMPORT_TABLESPACE; } - | alter_list_item - | alter_list ',' alter_list_item; + | alter_list + opt_partitioning + | partitioning +/* + This part was added for release 5.1 by Mikael Ronström. + From here we insert a number of commands to manage the partitions of a + partitioned table such as adding partitions, dropping partitions, + reorganising partitions in various manners. In future releases the list + will be longer and also include moving partitions to a + new table and so forth. +*/ + | add_partition_rule + | DROP PARTITION_SYM alt_part_name_list + { + Lex->alter_info.flags|= ALTER_DROP_PARTITION; + } + | COALESCE PARTITION_SYM ulong_num + { + LEX *lex= Lex; + lex->alter_info.flags|= ALTER_COALESCE_PARTITION; + lex->alter_info.no_parts= $3; + } + | reorg_partition_rule + ; + +add_partition_rule: + ADD PARTITION_SYM + { + LEX *lex= Lex; + lex->part_info= new partition_info(); + if (!lex->part_info) + { + my_error(ER_OUTOFMEMORY, MYF(0), sizeof(partition_info)); + YYABORT; + } + lex->alter_info.flags|= ALTER_ADD_PARTITION; + } + add_part_extra + {} + ; + +add_part_extra: + | '(' part_def_list ')' + { + LEX *lex= Lex; + lex->part_info->no_parts= lex->part_info->count_curr_parts; + } + | PARTITIONS_SYM ulong_num + { + LEX *lex= Lex; + lex->part_info->no_parts= $2; + } + ; + +reorg_partition_rule: + REORGANISE_SYM PARTITION_SYM + { + LEX *lex= Lex; + lex->part_info= new partition_info(); + if (!lex->part_info) + { + my_error(ER_OUTOFMEMORY, MYF(0), sizeof(partition_info)); + YYABORT; + } + lex->alter_info.flags|= ALTER_REORGANISE_PARTITION; + } + alt_part_name_list INTO '(' part_def_list ')' + { + LEX *lex= Lex; + lex->part_info->no_parts= lex->part_info->count_curr_parts; + } + ; + +alt_part_name_list: + alt_part_name_item {} + | alt_part_name_list ',' alt_part_name_item {} + ; + +alt_part_name_item: + ident + { + Lex->alter_info.partition_names.push_back($1.str); + } + ; + +/* + End of management of partition commands +*/ + +alter_list: + alter_list_item + | alter_list ',' alter_list_item + ; add_column: ADD opt_column @@ -4014,7 +4637,7 @@ select_options: /* empty*/ | select_option_list { - if (test_all_bits(Select->options, SELECT_ALL | SELECT_DISTINCT)) + if (Select->options & SELECT_DISTINCT && Select->options & SELECT_ALL) { my_error(ER_WRONG_USAGE, MYF(0), "ALL", "DISTINCT"); YYABORT; @@ -6390,6 +7013,9 @@ show_param: | ENGINE_SYM storage_engines { Lex->create_info.db_type= $2; } show_engine_param + | ENGINE_SYM ALL + { Lex->create_info.db_type= DB_TYPE_DEFAULT; } + show_engine_param | opt_full COLUMNS from_or_in table_ident opt_db wild_and_where { LEX *lex= Lex; @@ -6449,6 +7075,11 @@ show_param: LEX *lex=Lex; lex->sql_command= SQLCOM_SHOW_STORAGE_ENGINES; } + | AUTHORS_SYM + { + LEX *lex=Lex; + lex->sql_command= SQLCOM_SHOW_AUTHORS; + } | PRIVILEGES { LEX *lex=Lex; @@ -6472,9 +7103,19 @@ show_param: YYABORT; } | INNOBASE_SYM STATUS_SYM - { Lex->sql_command = SQLCOM_SHOW_INNODB_STATUS; WARN_DEPRECATED("SHOW INNODB STATUS", "SHOW ENGINE INNODB STATUS"); } + { + LEX *lex= Lex; + lex->sql_command = SQLCOM_SHOW_ENGINE_STATUS; + lex->create_info.db_type= DB_TYPE_INNODB; + WARN_DEPRECATED("SHOW INNODB STATUS", "SHOW ENGINE INNODB STATUS"); + } | MUTEX_SYM STATUS_SYM - { Lex->sql_command = SQLCOM_SHOW_MUTEX_STATUS; } + { + LEX *lex= Lex; + lex->sql_command = SQLCOM_SHOW_ENGINE_MUTEX; + lex->create_info.db_type= DB_TYPE_INNODB; + WARN_DEPRECATED("SHOW MUTEX STATUS", "SHOW ENGINE INNODB MUTEX"); + } | opt_full PROCESSLIST_SYM { Lex->sql_command= SQLCOM_SHOW_PROCESSLIST;} | opt_var_type VARIABLES wild_and_where @@ -6503,9 +7144,19 @@ show_param: YYABORT; } | BERKELEY_DB_SYM LOGS_SYM - { Lex->sql_command= SQLCOM_SHOW_LOGS; WARN_DEPRECATED("SHOW BDB LOGS", "SHOW ENGINE BDB LOGS"); } + { + LEX *lex= Lex; + lex->sql_command= SQLCOM_SHOW_ENGINE_LOGS; + lex->create_info.db_type= DB_TYPE_BERKELEY_DB; + WARN_DEPRECATED("SHOW BDB LOGS", "SHOW ENGINE BDB LOGS"); + } | LOGS_SYM - { Lex->sql_command= SQLCOM_SHOW_LOGS; WARN_DEPRECATED("SHOW LOGS", "SHOW ENGINE BDB LOGS"); } + { + LEX *lex= Lex; + lex->sql_command= SQLCOM_SHOW_ENGINE_LOGS; + lex->create_info.db_type= DB_TYPE_BERKELEY_DB; + WARN_DEPRECATED("SHOW LOGS", "SHOW ENGINE BDB LOGS"); + } | GRANTS { LEX *lex=Lex; @@ -6625,30 +7276,11 @@ show_param: show_engine_param: STATUS_SYM - { - switch (Lex->create_info.db_type) { - case DB_TYPE_NDBCLUSTER: - Lex->sql_command = SQLCOM_SHOW_NDBCLUSTER_STATUS; - break; - case DB_TYPE_INNODB: - Lex->sql_command = SQLCOM_SHOW_INNODB_STATUS; - break; - default: - my_error(ER_NOT_SUPPORTED_YET, MYF(0), "STATUS"); - YYABORT; - } - } + { Lex->sql_command= SQLCOM_SHOW_ENGINE_STATUS; } + | MUTEX_SYM + { Lex->sql_command= SQLCOM_SHOW_ENGINE_MUTEX; } | LOGS_SYM - { - switch (Lex->create_info.db_type) { - case DB_TYPE_BERKELEY_DB: - Lex->sql_command = SQLCOM_SHOW_LOGS; - break; - default: - my_error(ER_NOT_SUPPORTED_YET, MYF(0), "LOGS"); - YYABORT; - } - }; + { Lex->sql_command= SQLCOM_SHOW_ENGINE_LOGS; }; master_or_binary: MASTER_SYM @@ -7470,6 +8102,7 @@ user: keyword: keyword_sp {} | ASCII_SYM {} + | AUTHORS_SYM {} | BACKUP_SYM {} | BEGIN_SYM {} | BYTE_SYM {} @@ -7487,9 +8120,13 @@ keyword: | FLUSH_SYM {} | HANDLER_SYM {} | HELP_SYM {} + | INSTALL_SYM {} | LANGUAGE_SYM {} | NO_SYM {} | OPEN_SYM {} + | PARSER_SYM {} + | PARTITION_SYM {} + | PLUGIN_SYM {} | PREPARE_SYM {} | REPAIR {} | RESET_SYM {} @@ -7499,10 +8136,12 @@ keyword: | SECURITY_SYM {} | SIGNED_SYM {} | SLAVE {} + | SONAME_SYM {} | START_SYM {} | STOP_SYM {} | TRUNCATE_SYM {} | UNICODE_SYM {} + | UNINSTALL_SYM {} | XA_SYM {} ; @@ -7534,7 +8173,8 @@ keyword_sp: | CHANGED {} | CIPHER_SYM {} | CLIENT_SYM {} - | CODE_SYM {} + | COALESCE {} + | CODE_SYM {} | COLLATION_SYM {} | COLUMNS {} | COMMITTED_SYM {} @@ -7591,8 +8231,10 @@ keyword_sp: | RELAY_THREAD {} | LAST_SYM {} | LEAVES {} + | LESS_SYM {} | LEVEL_SYM {} | LINESTRING {} + | LIST_SYM {} | LOCAL_SYM {} | LOCKS_SYM {} | LOGS_SYM {} @@ -7616,6 +8258,7 @@ keyword_sp: | MAX_QUERIES_PER_HOUR {} | MAX_UPDATES_PER_HOUR {} | MAX_USER_CONNECTIONS_SYM {} + | MAX_VALUE_SYM {} | MEDIUM_SYM {} | MERGE_SYM {} | MICROSECOND_SYM {} @@ -7636,6 +8279,7 @@ keyword_sp: | NDBCLUSTER_SYM {} | NEXT_SYM {} | NEW_SYM {} + | NODEGROUP_SYM {} | NONE_SYM {} | NVARCHAR_SYM {} | OFFSET_SYM {} @@ -7644,6 +8288,7 @@ keyword_sp: | ONE_SYM {} | PACK_KEYS_SYM {} | PARTIAL {} + | PARTITIONS_SYM {} | PASSWORD {} | PHASE_SYM {} | POINT_SYM {} @@ -7665,6 +8310,7 @@ keyword_sp: | RELAY_LOG_FILE_SYM {} | RELAY_LOG_POS_SYM {} | RELOAD {} + | REORGANISE_SYM {} | REPEATABLE_SYM {} | REPLICATION {} | RESOURCES {} @@ -7694,6 +8340,8 @@ keyword_sp: | STRING_SYM {} | SUBDATE_SYM {} | SUBJECT_SYM {} + | SUBPARTITION_SYM {} + | SUBPARTITIONS_SYM {} | SUPER_SYM {} | SUSPEND_SYM {} | TABLES {} @@ -7701,6 +8349,7 @@ keyword_sp: | TEMPORARY {} | TEMPTABLE_SYM {} | TEXT_SYM {} + | THAN_SYM {} | TRANSACTION_SYM {} | TRIGGERS_SYM {} | TIMESTAMP {} @@ -7853,7 +8502,7 @@ sys_option_value: { LEX *lex=Lex; - if ($2.var == &trg_new_row_fake_var) + if ($2.var == trg_new_row_fake_var) { /* We are in trigger and assigning value to field of new row */ Item *it; @@ -8077,7 +8726,7 @@ internal_variable_name: YYABORT; } /* This special combination will denote field of NEW row */ - $$.var= &trg_new_row_fake_var; + $$.var= trg_new_row_fake_var; $$.base_name= $3; } else @@ -9178,4 +9827,19 @@ opt_migrate: | FOR_SYM MIGRATE_SYM { Lex->xa_opt=XA_FOR_MIGRATE; } ; +install: + INSTALL_SYM PLUGIN_SYM IDENT_sys SONAME_SYM TEXT_STRING_sys + { + LEX *lex= Lex; + lex->sql_command= SQLCOM_INSTALL_PLUGIN; + lex->comment= $3; + lex->ident= $5; + }; +uninstall: + UNINSTALL_SYM PLUGIN_SYM IDENT_sys + { + LEX *lex= Lex; + lex->sql_command= SQLCOM_UNINSTALL_PLUGIN; + lex->comment= $3; + }; diff --git a/sql/structs.h b/sql/structs.h index 9421ebdc2af..12ecf68a212 100644 --- a/sql/structs.h +++ b/sql/structs.h @@ -87,6 +87,15 @@ typedef struct st_key { uint extra_length; uint usable_key_parts; /* Should normally be = key_parts */ enum ha_key_alg algorithm; + /* + Note that parser is used when the table is opened for use, and + parser_name is used when the table is being created. + */ + union + { + struct st_plugin_int *parser; /* Fulltext [pre]parser */ + LEX_STRING *parser_name; /* Fulltext [pre]parser name */ + }; KEY_PART_INFO *key_part; char *name; /* Name of key */ /* diff --git a/sql/table.cc b/sql/table.cc index 8068a839052..b4548f4d90b 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -70,7 +70,7 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, int j,error, errarg= 0; uint rec_buff_length,n_length,int_length,records,key_parts,keys, interval_count,interval_parts,read_length,db_create_options; - uint key_info_length, com_length; + uint key_info_length, com_length, part_info_len=0, extra_rec_buf_length; ulong pos, record_offset; char index_file[FN_REFLEN], *names, *keynames, *comment_pos; uchar head[288],*disk_buff,new_field_pack_flag; @@ -87,6 +87,7 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, SQL_CRYPT *crypted=0; MEM_ROOT **root_ptr, *old_root; TABLE_SHARE *share; + enum db_type default_part_db_type; DBUG_ENTER("openfrm"); DBUG_PRINT("enter",("name: '%s' form: 0x%lx",name,outparam)); @@ -163,6 +164,7 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, if (share->frm_version == FRM_VER_TRUE_VARCHAR -1 && head[33] == 5) share->frm_version= FRM_VER_TRUE_VARCHAR; + default_part_db_type= ha_checktype(thd,(enum db_type) (uint) *(head+61),0,0); share->db_type= ha_checktype(thd,(enum db_type) (uint) *(head+3),0,0); share->db_create_options= db_create_options=uint2korr(head+30); share->db_options_in_use= share->db_create_options; @@ -308,9 +310,9 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, { /* Read extra data segment */ char *buff, *next_chunk, *buff_end; + DBUG_PRINT("info", ("extra segment size is %u bytes", n_length)); if (!(next_chunk= buff= my_malloc(n_length, MYF(MY_WME)))) goto err; - buff_end= buff + n_length; if (my_pread(file, (byte*)buff, n_length, record_offset + share->reclength, MYF(MY_NABP))) { @@ -321,26 +323,89 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, if (! (share->connect_string.str= strmake_root(&outparam->mem_root, next_chunk + 2, share->connect_string.length))) { + DBUG_PRINT("EDS", ("strmake_root failed for connect_string")); my_free(buff, MYF(0)); goto err; } next_chunk+= share->connect_string.length + 2; + buff_end= buff + n_length; if (next_chunk + 2 < buff_end) { uint str_db_type_length= uint2korr(next_chunk); - share->db_type= ha_resolve_by_name(next_chunk + 2, str_db_type_length); - DBUG_PRINT("enter", ("Setting dbtype to: %d - %d - '%.*s'\n", - share->db_type, - str_db_type_length, str_db_type_length, - next_chunk + 2)); + enum db_type tmp_db_type= ha_resolve_by_name(next_chunk + 2, + str_db_type_length); + if (tmp_db_type != DB_TYPE_UNKNOWN) + { + share->db_type= tmp_db_type; + DBUG_PRINT("info", ("setting dbtype to '%.*s' (%d)", + str_db_type_length, next_chunk + 2, + share->db_type)); + } +#ifdef WITH_PARTITION_STORAGE_ENGINE + else + { + if (!strncmp(next_chunk + 2, "partition", str_db_type_length)) + { + /* Use partition handler */ + share->db_type= DB_TYPE_PARTITION_DB; + DBUG_PRINT("info", ("setting dbtype to '%.*s' (%d)", + str_db_type_length, next_chunk + 2, + share->db_type)); + } + } +#endif next_chunk+= str_db_type_length + 2; } + if (next_chunk + 4 < buff_end) + { + part_info_len= uint4korr(next_chunk); + if (part_info_len > 0) + { +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (mysql_unpack_partition(thd, (uchar *)(next_chunk + 4), + part_info_len, outparam, + default_part_db_type)) + { + DBUG_PRINT("info", ("mysql_unpack_partition failed")); + my_free(buff, MYF(0)); + goto err; + } +#else + DBUG_PRINT("info", ("WITH_PARTITION_STORAGE_ENGINE is not defined")); + my_free(buff, MYF(0)); + goto err; +#endif + } + next_chunk+= part_info_len + 5; + } + keyinfo= outparam->key_info; + for (i= 0; i < keys; i++, keyinfo++) + { + if (keyinfo->flags & HA_USES_PARSER) + { + LEX_STRING parser_name; + if (next_chunk >= buff_end) + { + DBUG_PRINT("EDS", + ("fulltext key uses parser that is not defined in .frm")); + my_free(buff, MYF(0)); + goto err; + } + parser_name.str= next_chunk; + parser_name.length= strlen(next_chunk); + keyinfo->parser= plugin_lock(&parser_name, MYSQL_FTPARSER_PLUGIN); + if (! keyinfo->parser) + { + DBUG_PRINT("EDS", ("parser plugin is not loaded")); + my_free(buff, MYF(0)); + my_error(ER_PLUGIN_IS_NOT_LOADED, MYF(0), parser_name.str); + error_reported= 1; + goto err; + } + } + } my_free(buff, MYF(0)); } - /* Allocate handler */ - if (!(outparam->file= get_new_handler(outparam, &outparam->mem_root, - share->db_type))) - goto err; error=4; outparam->reginfo.lock_type= TL_UNLOCK; @@ -352,8 +417,9 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, if (prgflag & (READ_ALL+EXTRA_RECORD)) records++; /* QQ: TODO, remove the +1 from below */ + extra_rec_buf_length= uint2korr(head+59); rec_buff_length= ALIGN_SIZE(share->reclength + 1 + - outparam->file->extra_rec_buf_length()); + extra_rec_buf_length); share->rec_buff_length= rec_buff_length; if (!(record= (char *) alloc_root(&outparam->mem_root, rec_buff_length * records))) @@ -471,9 +537,15 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, if (keynames) fix_type_pointers(&int_array, &share->keynames, 1, &keynames); + VOID(my_close(file,MYF(MY_WME))); file= -1; + /* Allocate handler */ + if (!(outparam->file= get_new_handler(outparam, &outparam->mem_root, + share->db_type))) + goto err; + record= (char*) outparam->record[0]-1; /* Fieldstart = 1 */ if (null_field_first) { @@ -635,6 +707,7 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, goto err; /* purecov: inspected */ } + reg_field->fieldnr= i+1; //Set field number reg_field->field_index= i; reg_field->comment=comment; if (field_type == FIELD_TYPE_BIT && !f_bit_as_char(pack_flag)) @@ -896,6 +969,16 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, } } + if (outparam->file->ha_allocate_read_write_set(share->fields)) + goto err; + + /* Fix the partition functions and ensure they are not constant functions*/ + if (part_info_len > 0) +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (fix_partition_func(thd,name,outparam)) +#endif + goto err; + /* the correct null_bytes can now be set, since bitfields have been taken into account @@ -961,6 +1044,13 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, if (! error_reported) frm_error(error,outparam,name,ME_ERROR+ME_WAITTANG, errarg); delete outparam->file; +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (outparam->s->part_info) + { + free_items(outparam->s->part_info->item_free_list); + outparam->s->part_info->item_free_list= 0; + } +#endif outparam->file=0; // For easier errorchecking outparam->db_stat=0; hash_free(&share->name_hash); @@ -975,9 +1065,20 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, int closefrm(register TABLE *table) { int error=0; + uint idx; + KEY *key_info; DBUG_ENTER("closefrm"); if (table->db_stat) error=table->file->close(); + key_info= table->key_info; + for (idx= table->s->keys; idx; idx--, key_info++) + { + if (key_info->flags & HA_USES_PARSER) + { + plugin_unlock(key_info->parser); + key_info->flags= 0; + } + } my_free((char*) table->alias, MYF(MY_ALLOW_ZERO_PTR)); table->alias= 0; if (table->field) @@ -987,6 +1088,13 @@ int closefrm(register TABLE *table) table->field= 0; } delete table->file; +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (table->s->part_info) + { + free_items(table->s->part_info->item_free_list); + table->s->part_info->item_free_list= 0; + } +#endif table->file= 0; /* For easier errorchecking */ hash_free(&table->s->name_hash); free_root(&table->mem_root, MYF(0)); diff --git a/sql/table.h b/sql/table.h index d919b9a4203..64b25635779 100644 --- a/sql/table.h +++ b/sql/table.h @@ -21,6 +21,7 @@ class Item; /* Needed by ORDER */ class GRANT_TABLE; class st_select_lex_unit; class st_select_lex; +class partition_info; class COND_EQUAL; class Security_context; @@ -106,6 +107,9 @@ class Table_triggers_list; typedef struct st_table_share { +#ifdef WITH_PARTITION_STORAGE_ENGINE + partition_info *part_info; /* Partition related information */ +#endif /* hash of field names (contains pointers to elements of field array) */ HASH name_hash; /* hash of field names */ MEM_ROOT mem_root; @@ -214,6 +218,8 @@ struct st_table { ORDER *group; const char *alias; /* alias or table name */ uchar *null_flags; + MY_BITMAP *read_set; + MY_BITMAP *write_set; query_id_t query_id; ha_rows quick_rows[MAX_KEY]; @@ -267,6 +273,7 @@ struct st_table { my_bool auto_increment_field_not_null; my_bool insert_or_update; /* Can be used by the handler */ my_bool alias_name_used; /* true if table_name is alias */ + my_bool get_fields_in_item_tree; /* Signal to fix_field */ REGINFO reginfo; /* field connections */ MEM_ROOT mem_root; diff --git a/sql/tztime.cc b/sql/tztime.cc index 1d06d4d0b8e..afdbebea547 100644 --- a/sql/tztime.cc +++ b/sql/tztime.cc @@ -1623,7 +1623,7 @@ my_tz_init(THD *org_thd, const char *default_tzname, my_bool bootstrap) mysql.time_zone* tables are MyISAM and these operations always succeed for MyISAM. */ - (void)table->file->ha_index_init(0); + (void)table->file->ha_index_init(0, 1); tz_leapcnt= 0; res= table->file->index_first(table->record[0]); @@ -1800,7 +1800,7 @@ tz_load_from_open_tables(const String *tz_name, TABLE_LIST *tz_tables) mysql.time_zone* tables are MyISAM and these operations always succeed for MyISAM. */ - (void)table->file->ha_index_init(0); + (void)table->file->ha_index_init(0, 1); if (table->file->index_read(table->record[0], (byte*)table->field[0]->ptr, 0, HA_READ_KEY_EXACT)) @@ -1827,7 +1827,7 @@ tz_load_from_open_tables(const String *tz_name, TABLE_LIST *tz_tables) table= tz_tables->table; tz_tables= tz_tables->next_local; table->field[0]->store((longlong) tzid, TRUE); - (void)table->file->ha_index_init(0); + (void)table->file->ha_index_init(0, 1); if (table->file->index_read(table->record[0], (byte*)table->field[0]->ptr, 0, HA_READ_KEY_EXACT)) @@ -1854,7 +1854,7 @@ tz_load_from_open_tables(const String *tz_name, TABLE_LIST *tz_tables) table= tz_tables->table; tz_tables= tz_tables->next_local; table->field[0]->store((longlong) tzid, TRUE); - (void)table->file->ha_index_init(0); + (void)table->file->ha_index_init(0, 1); // FIXME Is there any better approach than explicitly specifying 4 ??? res= table->file->index_read(table->record[0], (byte*)table->field[0]->ptr, @@ -1926,7 +1926,7 @@ tz_load_from_open_tables(const String *tz_name, TABLE_LIST *tz_tables) */ table= tz_tables->table; table->field[0]->store((longlong) tzid, TRUE); - (void)table->file->ha_index_init(0); + (void)table->file->ha_index_init(0, 1); // FIXME Is there any better approach than explicitly specifying 4 ??? res= table->file->index_read(table->record[0], (byte*)table->field[0]->ptr, diff --git a/sql/unireg.cc b/sql/unireg.cc index 0ab77462f61..a91be308cce 100644 --- a/sql/unireg.cc +++ b/sql/unireg.cc @@ -46,7 +46,8 @@ static bool pack_fields(File file, List<create_field> &create_fields, static bool make_empty_rec(THD *thd, int file, enum db_type table_type, uint table_options, List<create_field> &create_fields, - uint reclength, ulong data_offset); + uint reclength, ulong data_offset, + handler *handler); /* Create a frm (table definition) file @@ -76,21 +77,26 @@ bool mysql_create_frm(THD *thd, my_string file_name, handler *db_file) { LEX_STRING str_db_type; - uint reclength,info_length,screens,key_info_length,maxlength; + uint reclength,info_length,screens,key_info_length,maxlength,i; ulong key_buff_length; File file; ulong filepos, data_offset; uchar fileinfo[64],forminfo[288],*keybuff; TYPELIB formnames; uchar *screen_buff; - char buff[2]; + char buff[5]; +#ifdef WITH_PARTITION_STORAGE_ENGINE + partition_info *part_info= thd->lex->part_info; +#endif DBUG_ENTER("mysql_create_frm"); +#ifdef WITH_PARTITION_STORAGE_ENGINE + thd->lex->part_info= NULL; +#endif formnames.type_names=0; if (!(screen_buff=pack_screens(create_fields,&info_length,&screens,0))) DBUG_RETURN(1); - if (db_file == NULL) - db_file= get_new_handler((TABLE*) 0, thd->mem_root, create_info->db_type); + DBUG_ASSERT(db_file != NULL); /* If fixed row records, we need one bit to check for deleted rows */ if (!(create_info->table_options & HA_OPTION_PACK_RECORD)) @@ -121,8 +127,21 @@ bool mysql_create_frm(THD *thd, my_string file_name, /* Calculate extra data segment length */ str_db_type.str= (char *) ha_get_storage_engine(create_info->db_type); str_db_type.length= strlen(str_db_type.str); + /* str_db_type */ create_info->extra_size= (2 + str_db_type.length + 2 + create_info->connect_string.length); + /* Partition */ + create_info->extra_size+= 5; +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (part_info) + create_info->extra_size+= part_info->part_info_len; +#endif + + for (i= 0; i < keys; i++) + { + if (key_info[i].parser_name) + create_info->extra_size+= key_info[i].parser_name->length + 1; + } if ((file=create_frm(thd, file_name, db, table, reclength, fileinfo, create_info, keys)) < 0) @@ -147,6 +166,11 @@ bool mysql_create_frm(THD *thd, my_string file_name, 60); forminfo[46]=(uchar) strlen((char*)forminfo+47); // Length of comment +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (part_info) + fileinfo[61]= (uchar) part_info->default_engine_type; +#endif + int2store(fileinfo+59,db_file->extra_rec_buf_length()); if (my_pwrite(file,(byte*) fileinfo,64,0L,MYF_RW) || my_pwrite(file,(byte*) keybuff,key_info_length, (ulong) uint2korr(fileinfo+6),MYF_RW)) @@ -155,21 +179,47 @@ bool mysql_create_frm(THD *thd, my_string file_name, (ulong) uint2korr(fileinfo+6)+ (ulong) key_buff_length, MY_SEEK_SET,MYF(0))); if (make_empty_rec(thd,file,create_info->db_type,create_info->table_options, - create_fields,reclength, data_offset)) + create_fields,reclength, data_offset, db_file)) goto err; int2store(buff, create_info->connect_string.length); - if (my_write(file, (const byte*)buff, sizeof(buff), MYF(MY_NABP)) || + if (my_write(file, (const byte*)buff, 2, MYF(MY_NABP)) || my_write(file, (const byte*)create_info->connect_string.str, create_info->connect_string.length, MYF(MY_NABP))) goto err; int2store(buff, str_db_type.length); - if (my_write(file, (const byte*)buff, sizeof(buff), MYF(MY_NABP)) || + if (my_write(file, (const byte*)buff, 2, MYF(MY_NABP)) || my_write(file, (const byte*)str_db_type.str, str_db_type.length, MYF(MY_NABP))) goto err; - + +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (part_info) + { + int4store(buff, part_info->part_info_len); + if (my_write(file, (const byte*)buff, 4, MYF_RW) || + my_write(file, (const byte*)part_info->part_info_string, + part_info->part_info_len + 1, MYF_RW)) + goto err; + } + else +#endif + { + bzero(buff, 5); + if (my_write(file, (byte*) buff, 5, MYF_RW)) + goto err; + } + for (i= 0; i < keys; i++) + { + if (key_info[i].parser_name) + { + if (my_write(file, (const byte*)key_info[i].parser_name->str, + key_info[i].parser_name->length + 1, MYF(MY_NABP))) + goto err; + } + } + VOID(my_seek(file,filepos,MY_SEEK_SET,MYF(0))); if (my_write(file,(byte*) forminfo,288,MYF_RW) || my_write(file,(byte*) screen_buff,info_length,MYF_RW) || @@ -243,8 +293,7 @@ err3: create_fields Fields to create keys number of keys to create key_info Keys to create - db_file Handler to use. May be zero, in which case we use - create_info->db_type + file Handler to use. RETURN 0 ok 1 error @@ -254,19 +303,21 @@ int rea_create_table(THD *thd, my_string file_name, const char *db, const char *table, HA_CREATE_INFO *create_info, List<create_field> &create_fields, - uint keys, KEY *key_info) + uint keys, KEY *key_info, handler *file) { DBUG_ENTER("rea_create_table"); if (mysql_create_frm(thd, file_name, db, table, create_info, - create_fields, keys, key_info, NULL)) + create_fields, keys, key_info, file)) DBUG_RETURN(1); + if (file->create_handler_files(file_name)) + goto err_handler; if (!create_info->frm_only && ha_create_table(file_name,create_info,0)) - { - my_delete(file_name,MYF(0)); - DBUG_RETURN(1); - } + goto err_handler; DBUG_RETURN(0); +err_handler: + my_delete(file_name, MYF(0)); + DBUG_RETURN(1); } /* rea_create_table */ @@ -684,7 +735,8 @@ static bool make_empty_rec(THD *thd, File file,enum db_type table_type, uint table_options, List<create_field> &create_fields, uint reclength, - ulong data_offset) + ulong data_offset, + handler *handler) { int error; Field::utype type; @@ -692,19 +744,15 @@ static bool make_empty_rec(THD *thd, File file,enum db_type table_type, uchar *buff,*null_pos; TABLE table; create_field *field; - handler *handler; enum_check_fields old_count_cuted_fields= thd->count_cuted_fields; DBUG_ENTER("make_empty_rec"); /* We need a table to generate columns for default values */ bzero((char*) &table,sizeof(table)); table.s= &table.share_not_to_be_used; - handler= get_new_handler((TABLE*) 0, thd->mem_root, table_type); - if (!handler || - !(buff=(uchar*) my_malloc((uint) reclength,MYF(MY_WME | MY_ZEROFILL)))) + if (!(buff=(uchar*) my_malloc((uint) reclength,MYF(MY_WME | MY_ZEROFILL)))) { - delete handler; DBUG_RETURN(1); } @@ -761,6 +809,7 @@ static bool make_empty_rec(THD *thd, File file,enum db_type table_type, { my_error(ER_INVALID_DEFAULT, MYF(0), regfield->field_name); error= 1; + delete regfield; //To avoid memory leak goto err; } } @@ -790,7 +839,6 @@ static bool make_empty_rec(THD *thd, File file,enum db_type table_type, err: my_free((gptr) buff,MYF(MY_FAE)); - delete handler; thd->count_cuted_fields= old_count_cuted_fields; DBUG_RETURN(error); } /* make_empty_rec */ diff --git a/sql/unireg.h b/sql/unireg.h index 6afefa579e8..d613392f831 100644 --- a/sql/unireg.h +++ b/sql/unireg.h @@ -36,6 +36,9 @@ #ifndef SHAREDIR #define SHAREDIR "share/" #endif +#ifndef LIBDIR +#define LIBDIR "lib/" +#endif #define ER(X) errmesg[(X) - ER_ERROR_FIRST] #define ER_SAFE(X) (((X) >= ER_ERROR_FIRST && (X) <= ER_ERROR_LAST) ? ER(X) : "Invalid error code") @@ -84,6 +87,7 @@ #define PSEUDO_TABLE_BITS (PARAM_TABLE_BIT | OUTER_REF_TABLE_BIT | \ RAND_TABLE_BIT) #define MAX_FIELDS 4096 /* Limit in the .frm file */ +#define MAX_PARTITIONS 1024 #define MAX_SORT_MEMORY (2048*1024-MALLOC_OVERHEAD) #define MIN_SORT_MEMORY (32*1024-MALLOC_OVERHEAD) |