diff options
Diffstat (limited to 'storage')
152 files changed, 4103 insertions, 1702 deletions
diff --git a/storage/Makefile.am b/storage/Makefile.am index 8aa1e4f7dc6..e232547139d 100644 --- a/storage/Makefile.am +++ b/storage/Makefile.am @@ -21,6 +21,3 @@ AUTOMAKE_OPTIONS = foreign EXTRA_DIST = mysql_storage_engine.cmake SUBDIRS = @mysql_se_dirs@ DIST_SUBDIRS = @mysql_se_distdirs@ - -# Don't update the files from bitkeeper -%::SCCS/s.% diff --git a/storage/archive/Makefile.am b/storage/archive/Makefile.am index d092f091798..254c95bf68b 100644 --- a/storage/archive/Makefile.am +++ b/storage/archive/Makefile.am @@ -36,14 +36,14 @@ noinst_PROGRAMS = archive_test archive_reader EXTRA_LTLIBRARIES = ha_archive.la pkgplugin_LTLIBRARIES = @plugin_archive_shared_target@ ha_archive_la_LDFLAGS = -module -rpath $(pkgplugindir) -ha_archive_la_CXXFLAGS= $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN -ha_archive_la_CFLAGS = $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN +ha_archive_la_CXXFLAGS= -shared $(AM_CXXFLAGS) -DMYSQL_DYNAMIC_PLUGIN +ha_archive_la_CFLAGS = -shared $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN ha_archive_la_SOURCES = ha_archive.cc azio.c EXTRA_LIBRARIES = libarchive.a noinst_LIBRARIES = @plugin_archive_static_target@ -libarchive_a_CXXFLAGS = $(AM_CFLAGS) +libarchive_a_CXXFLAGS = $(AM_CXXFLAGS) libarchive_a_CFLAGS = $(AM_CFLAGS) libarchive_a_SOURCES = ha_archive.cc azio.c diff --git a/storage/archive/azio.c b/storage/archive/azio.c index a24350dd454..fc54d98ab15 100644 --- a/storage/archive/azio.c +++ b/storage/archive/azio.c @@ -150,6 +150,17 @@ int az_open (azio_stream *s, const char *path, int Flags, File fd) } else { + /* Reset values in case of old version of archive file */ + s->rows= 0; + s->forced_flushes= 0; + s->shortest_row= 0; + s->longest_row= 0; + s->auto_increment= 0; + s->check_point= 0; + s->comment_start_pos= 0; + s->comment_length= 0; + s->frm_start_pos= 0; + s->frm_length= 0; check_header(s); /* skip the .az header */ } diff --git a/storage/archive/ha_archive.cc b/storage/archive/ha_archive.cc index 922db30c9d9..f3f5788360a 100644 --- a/storage/archive/ha_archive.cc +++ b/storage/archive/ha_archive.cc @@ -293,7 +293,7 @@ int ha_archive::read_data_header(azio_stream *file_to_read) DBUG_PRINT("ha_archive", ("Version %u", data_buffer[1])); if ((data_buffer[0] != (uchar)ARCHIVE_CHECK_HEADER) && - (data_buffer[1] != (uchar)ARCHIVE_VERSION)) + (data_buffer[1] == 1 || data_buffer[1] == 2)) DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE); DBUG_RETURN(0); @@ -360,9 +360,19 @@ ARCHIVE_SHARE *ha_archive::get_share(const char *table_name, int *rc) my_free(share, MYF(0)); DBUG_RETURN(NULL); } - stats.auto_increment_value= archive_tmp.auto_increment + 1; - share->rows_recorded= (ha_rows)archive_tmp.rows; - share->crashed= archive_tmp.dirty; + share->version= archive_tmp.version; + if (archive_tmp.version == ARCHIVE_VERSION) + { + stats.auto_increment_value= archive_tmp.auto_increment + 1; + share->rows_recorded= (ha_rows)archive_tmp.rows; + share->crashed= archive_tmp.dirty; + } + else + { + /* Used by repair */ + share->rows_recorded= ~(ha_rows) 0; + stats.auto_increment_value= 0; + } /* If archive version is less than 3, It should be upgraded before use. @@ -512,10 +522,19 @@ int ha_archive::open(const char *name, int mode, uint open_options) case 0: break; case HA_ERR_CRASHED_ON_USAGE: + DBUG_PRINT("ha_archive", ("archive table was crashed")); if (open_options & HA_OPEN_FOR_REPAIR) + { + rc= 0; break; + } /* fall through */ case HA_ERR_TABLE_NEEDS_UPGRADE: + if (open_options & HA_OPEN_FOR_REPAIR) + { + rc= 0; + break; + } free_share(); /* fall through */ default: @@ -535,13 +554,6 @@ int ha_archive::open(const char *name, int mode, uint open_options) thr_lock_data_init(&share->lock, &lock, NULL); - DBUG_PRINT("ha_archive", ("archive table was crashed %s", - rc == HA_ERR_CRASHED_ON_USAGE ? "yes" : "no")); - if (rc == HA_ERR_CRASHED_ON_USAGE && open_options & HA_OPEN_FOR_REPAIR) - { - DBUG_RETURN(0); - } - DBUG_RETURN(rc); } @@ -1267,6 +1279,14 @@ int ha_archive::rnd_pos(uchar * buf, uchar *pos) DBUG_RETURN(get_row(&archive, buf)); } +int ha_archive::check_for_upgrade(HA_CHECK_OPT *check_opt) +{ + if (share->version < ARCHIVE_VERSION) + return HA_ADMIN_NEEDS_ALTER; + return 0; +} + + /* This method repairs the meta file. It does this by walking the datafile and rewriting the meta file. If EXTENDED repair is requested, we attempt to diff --git a/storage/archive/ha_archive.h b/storage/archive/ha_archive.h index ab630ed22fd..653a13b242d 100644 --- a/storage/archive/ha_archive.h +++ b/storage/archive/ha_archive.h @@ -35,7 +35,7 @@ typedef struct st_archive_record_buffer { typedef struct st_archive_share { char *table_name; char data_file_name[FN_REFLEN]; - uint table_name_length,use_count; + uint table_name_length,use_count, version; pthread_mutex_t mutex; THR_LOCK lock; azio_stream archive_write; /* Archive file we are working with */ @@ -133,6 +133,7 @@ public: int create(const char *name, TABLE *form, HA_CREATE_INFO *create_info); int optimize(THD* thd, HA_CHECK_OPT* check_opt); int repair(THD* thd, HA_CHECK_OPT* check_opt); + int check_for_upgrade(HA_CHECK_OPT *check_opt); void start_bulk_insert(ha_rows rows); int end_bulk_insert(); enum row_type get_row_type() const diff --git a/storage/blackhole/Makefile.am b/storage/blackhole/Makefile.am index db4f67cf847..148746a9336 100644 --- a/storage/blackhole/Makefile.am +++ b/storage/blackhole/Makefile.am @@ -35,15 +35,13 @@ noinst_HEADERS = ha_blackhole.h EXTRA_LTLIBRARIES = ha_blackhole.la pkgplugin_LTLIBRARIES = @plugin_blackhole_shared_target@ ha_blackhole_la_LDFLAGS=-module -rpath $(pkgplugindir) -ha_blackhole_la_CXXFLAGS=$(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN -ha_blackhole_la_CFLAGS= $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN +ha_blackhole_la_CXXFLAGS=-shared $(AM_CXXFLAGS) -DMYSQL_DYNAMIC_PLUGIN ha_blackhole_la_SOURCES=ha_blackhole.cc EXTRA_LIBRARIES = libblackhole.a noinst_LIBRARIES = @plugin_blackhole_static_target@ -libblackhole_a_CXXFLAGS=$(AM_CFLAGS) -libblackhole_a_CFLAGS = $(AM_CFLAGS) +libblackhole_a_CXXFLAGS=$(AM_CXXFLAGS) libblackhole_a_SOURCES= ha_blackhole.cc diff --git a/storage/csv/Makefile.am b/storage/csv/Makefile.am index 07ffac88a96..75ad9062984 100644 --- a/storage/csv/Makefile.am +++ b/storage/csv/Makefile.am @@ -32,12 +32,12 @@ noinst_HEADERS = ha_tina.h transparent_file.h EXTRA_LTLIBRARIES = ha_csv.la pkglib_LTLIBRARIES = @plugin_csv_shared_target@ ha_csv_la_LDFLAGS = -module -rpath $(MYSQLLIBdir) -ha_csv_la_CXXFLAGS = $(AM_CFLAGS) -DMYSQL_PLUGIN +ha_csv_la_CXXFLAGS = -shared $(AM_CXXFLAGS) -DMYSQL_PLUGIN ha_csv_la_SOURCES = transparent_file.cc ha_tina.cc EXTRA_LIBRARIES = libcsv.a noinst_LIBRARIES = @plugin_csv_static_target@ -libcsv_a_CXXFLAGS = $(AM_CFLAGS) +libcsv_a_CXXFLAGS = $(AM_CXXFLAGS) libcsv_a_SOURCES = transparent_file.cc ha_tina.cc EXTRA_DIST = CMakeLists.txt plug.in diff --git a/storage/csv/ha_tina.cc b/storage/csv/ha_tina.cc index 4ca3e029cbc..8ba425c86cb 100644 --- a/storage/csv/ha_tina.cc +++ b/storage/csv/ha_tina.cc @@ -468,7 +468,7 @@ int ha_tina::encode_quote(uchar *buf) const char *ptr; const char *end_ptr; const bool was_null= (*field)->is_null(); - + /* assistance for backwards compatibility in production builds. note: this will not work for ENUM columns. @@ -480,7 +480,7 @@ int ha_tina::encode_quote(uchar *buf) } (*field)->val_str(&attribute,&attribute); - + if (was_null) (*field)->set_null(); @@ -498,34 +498,30 @@ int ha_tina::encode_quote(uchar *buf) buffer.append('"'); - while (ptr < end_ptr) + for (; ptr < end_ptr; ptr++) { if (*ptr == '"') { buffer.append('\\'); buffer.append('"'); - *ptr++; } else if (*ptr == '\r') { buffer.append('\\'); buffer.append('r'); - *ptr++; } else if (*ptr == '\\') { buffer.append('\\'); buffer.append('\\'); - *ptr++; } else if (*ptr == '\n') { buffer.append('\\'); buffer.append('n'); - *ptr++; } else - buffer.append(*ptr++); + buffer.append(*ptr); } buffer.append('"'); } diff --git a/storage/example/Makefile.am b/storage/example/Makefile.am index 10163e307b1..dc499e2f9dd 100644 --- a/storage/example/Makefile.am +++ b/storage/example/Makefile.am @@ -35,15 +35,12 @@ noinst_HEADERS = ha_example.h EXTRA_LTLIBRARIES = ha_example.la pkgplugin_LTLIBRARIES = @plugin_example_shared_target@ ha_example_la_LDFLAGS = -module -rpath $(pkgplugindir) -L$(top_builddir)/libservices -lmysqlservices -ha_example_la_CXXFLAGS= $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN -ha_example_la_CFLAGS = $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN +ha_example_la_CXXFLAGS= -shared $(AM_CXXFLAGS) -DMYSQL_DYNAMIC_PLUGIN ha_example_la_SOURCES = ha_example.cc - EXTRA_LIBRARIES = libexample.a noinst_LIBRARIES = @plugin_example_static_target@ -libexample_a_CXXFLAGS = $(AM_CFLAGS) -libexample_a_CFLAGS = $(AM_CFLAGS) +libexample_a_CXXFLAGS = $(AM_CXXFLAGS) libexample_a_SOURCES= ha_example.cc diff --git a/storage/example/ha_example.cc b/storage/example/ha_example.cc index e8ef0b72bc8..f2aee1de70c 100644 --- a/storage/example/ha_example.cc +++ b/storage/example/ha_example.cc @@ -225,7 +225,7 @@ static int example_done_func(void *p) hash_free(&example_open_tables); pthread_mutex_destroy(&example_mutex); - DBUG_RETURN(0); + DBUG_RETURN(error); } diff --git a/storage/federated/Makefile.am b/storage/federated/Makefile.am index 26786ee48cb..414fb5b7d5f 100644 --- a/storage/federated/Makefile.am +++ b/storage/federated/Makefile.am @@ -33,15 +33,14 @@ noinst_HEADERS = ha_federated.h EXTRA_LTLIBRARIES = ha_federated.la pkgplugin_LTLIBRARIES = @plugin_federated_shared_target@ ha_federated_la_LDFLAGS = -module -rpath $(pkgplugindir) -ha_federated_la_CXXFLAGS= $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN -ha_federated_la_CFLAGS = $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN +ha_federated_la_CXXFLAGS= -shared $(AM_CXXFLAGS) -DMYSQL_DYNAMIC_PLUGIN +ha_federated_la_CFLAGS = -shared $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN ha_federated_la_SOURCES = ha_federated.cc $(top_srcdir)/mysys/string.c EXTRA_LIBRARIES = libfederated.a noinst_LIBRARIES = @plugin_federated_static_target@ -libfederated_a_CXXFLAGS = $(AM_CFLAGS) -libfederated_a_CFLAGS = $(AM_CFLAGS) +libfederated_a_CXXFLAGS = $(AM_CXXFLAGS) libfederated_a_SOURCES= ha_federated.cc diff --git a/storage/federated/ha_federated.cc b/storage/federated/ha_federated.cc index 4d597d6d7a9..fcc178b09a6 100644 --- a/storage/federated/ha_federated.cc +++ b/storage/federated/ha_federated.cc @@ -561,7 +561,6 @@ static int parse_url_error(FEDERATED_SHARE *share, TABLE *table, int error_num) int get_connection(MEM_ROOT *mem_root, FEDERATED_SHARE *share) { int error_num= ER_FOREIGN_SERVER_DOESNT_EXIST; - char error_buffer[FEDERATED_QUERY_BUFFER_SIZE]; FOREIGN_SERVER *server, server_buffer; DBUG_ENTER("ha_federated::get_connection"); @@ -613,10 +612,8 @@ int get_connection(MEM_ROOT *mem_root, FEDERATED_SHARE *share) DBUG_RETURN(0); error: - my_sprintf(error_buffer, - (error_buffer, "server name: '%s' doesn't exist!", - share->connection_string)); - my_error(error_num, MYF(0), error_buffer); + my_printf_error(error_num, "server name: '%s' doesn't exist!", + MYF(0), share->connection_string); DBUG_RETURN(error_num); } @@ -2405,8 +2402,8 @@ int ha_federated::index_read_idx_with_result_set(uchar *buf, uint index, if (real_query(sql_query.ptr(), sql_query.length())) { - my_sprintf(error_buffer, (error_buffer, "error: %d '%s'", - mysql_errno(mysql), mysql_error(mysql))); + sprintf(error_buffer, "error: %d '%s'", + mysql_errno(mysql), mysql_error(mysql)); retval= ER_QUERY_ON_FOREIGN_DATA_SOURCE; goto error; } @@ -2775,7 +2772,6 @@ int ha_federated::rnd_pos(uchar *buf, uchar *pos) int ha_federated::info(uint flag) { - char error_buffer[FEDERATED_QUERY_BUFFER_SIZE]; char status_buf[FEDERATED_QUERY_BUFFER_SIZE]; int error; uint error_code; @@ -2859,9 +2855,8 @@ error: mysql_free_result(result); if (mysql) { - my_sprintf(error_buffer, (error_buffer, ": %d : %s", - mysql_errno(mysql), mysql_error(mysql))); - my_error(error_code, MYF(0), error_buffer); + my_printf_error(error_code, ": %d : %s", MYF(0), + mysql_errno(mysql), mysql_error(mysql)); } else if (remote_error_number != -1 /* error already reported */) diff --git a/storage/federatedx/Makefile.am b/storage/federatedx/Makefile.am index 1781a53a0b6..1e76ac15266 100644 --- a/storage/federatedx/Makefile.am +++ b/storage/federatedx/Makefile.am @@ -20,14 +20,13 @@ noinst_HEADERS = ha_federatedx.h federatedx_probes.h EXTRA_LTLIBRARIES = ha_federatedx.la pkgplugin_LTLIBRARIES = @plugin_federatedx_shared_target@ ha_federatedx_la_LDFLAGS = -module -rpath $(pkgplugindir) -ha_federatedx_la_CXXFLAGS= $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN -ha_federatedx_la_CFLAGS = $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN +ha_federatedx_la_CXXFLAGS= -shared $(AM_CXXFLAGS) -DMYSQL_DYNAMIC_PLUGIN +ha_federatedx_la_CFLAGS = -shared $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN EXTRA_LIBRARIES = libfederatedx.a noinst_LIBRARIES = @plugin_federatedx_static_target@ -libfederatedx_a_CXXFLAGS = $(AM_CFLAGS) -libfederatedx_a_CFLAGS = $(AM_CFLAGS) +libfederatedx_a_CXXFLAGS = $(AM_CXXFLAGS) libfederatedx_a_SOURCES= ha_federatedx.cc federatedx_txn.cc \ federatedx_io.cc federatedx_io_null.cc \ federatedx_io_mysql.cc diff --git a/storage/federatedx/ha_federatedx.cc b/storage/federatedx/ha_federatedx.cc index 8735e5625e6..2749034cba2 100644 --- a/storage/federatedx/ha_federatedx.cc +++ b/storage/federatedx/ha_federatedx.cc @@ -2967,10 +2967,13 @@ int ha_federatedx::rnd_pos(uchar *buf, uchar *pos) DBUG_ENTER("ha_federatedx::rnd_pos"); ha_statistic_increment(&SSV::ha_read_rnd_count); + /* We have to move this to 'ref' to get things aligned */ + bmove(ref, pos, ref_length); + if ((retval= txn->acquire(share, TRUE, &io))) goto error; - if ((retval= io->seek_position(&result, pos))) + if ((retval= io->seek_position(&result, ref))) goto error; retval= read_next(buf, result); diff --git a/storage/heap/hp_test2.c b/storage/heap/hp_test2.c index 5c548b6be74..bf06cf03035 100644 --- a/storage/heap/hp_test2.c +++ b/storage/heap/hp_test2.c @@ -406,7 +406,7 @@ int main(int argc, char *argv[]) bmove(record2,record,reclength); if (heap_rsame(file,record,-1) || heap_rsame(file,record2,2)) goto err; - if (bcmp(record2,record,reclength)) + if (memcmp(record2,record,reclength)) { puts("heap_rsame didn't find right record"); goto end; @@ -415,7 +415,7 @@ int main(int argc, char *argv[]) puts("- Test of read through position"); if (heap_rrnd(file,record,position)) goto err; - if (bcmp(record3,record,reclength)) + if (memcmp(record3,record,reclength)) { puts("heap_frnd didn't find right record"); goto end; diff --git a/storage/ibmdb2i/Makefile.am b/storage/ibmdb2i/Makefile.am index 768ca15f4cf..b9602e392e0 100644 --- a/storage/ibmdb2i/Makefile.am +++ b/storage/ibmdb2i/Makefile.am @@ -34,7 +34,7 @@ EXTRA_LTLIBRARIES = ha_ibmdb2i.la pkgplugin_LTLIBRARIES = @plugin_ibmdb2i_shared_target@ ha_ibmdb2i_la_LIBADD = -liconv ha_ibmdb2i_la_LDFLAGS = -module -rpath $(MYSQLLIBdir) -ha_ibmdb2i_la_CXXFLAGS= $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN +ha_ibmdb2i_la_CXXFLAGS= $(AM_CXXFLAGS) -DMYSQL_DYNAMIC_PLUGIN ha_ibmdb2i_la_CFLAGS = $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN ha_ibmdb2i_la_SOURCES = ha_ibmdb2i.cc db2i_ileBridge.cc db2i_conversion.cc \ db2i_blobCollection.cc db2i_file.cc db2i_charsetSupport.cc \ @@ -44,7 +44,7 @@ ha_ibmdb2i_la_SOURCES = ha_ibmdb2i.cc db2i_ileBridge.cc db2i_conversion.cc \ EXTRA_LIBRARIES = libibmdb2i.a noinst_LIBRARIES = @plugin_ibmdb2i_static_target@ -libibmdb2i_a_CXXFLAGS = $(AM_CFLAGS) +libibmdb2i_a_CXXFLAGS = $(AM_CXXFLAGS) libibmdb2i_a_CFLAGS = $(AM_CFLAGS) libibmdb2i_a_SOURCES= $(ha_ibmdb2i_la_SOURCES) diff --git a/storage/ibmdb2i/db2i_file.h b/storage/ibmdb2i/db2i_file.h index ff35a473b05..7b63b18c315 100644 --- a/storage/ibmdb2i/db2i_file.h +++ b/storage/ibmdb2i/db2i_file.h @@ -40,7 +40,6 @@ OF SUCH DAMAGE. #include "db2i_global.h" #include "db2i_ileBridge.h" #include "db2i_validatedPointer.h" -#include "my_atomic.h" #include "db2i_iconv.h" #include "db2i_charsetSupport.h" diff --git a/storage/innobase/Makefile.am b/storage/innobase/Makefile.am index a597e3c24e4..29ba83c0668 100644 --- a/storage/innobase/Makefile.am +++ b/storage/innobase/Makefile.am @@ -156,15 +156,15 @@ libinnobase_a_SOURCES= btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c \ ut/ut0ut.c ut/ut0vec.c ut/ut0wqueue.c \ handler/ha_innodb.cc -libinnobase_a_CXXFLAGS= $(AM_CFLAGS) +libinnobase_a_CXXFLAGS= $(AM_CXXFLAGS) libinnobase_a_CFLAGS= $(AM_CFLAGS) EXTRA_LTLIBRARIES= ha_innodb.la pkgplugin_LTLIBRARIES= @plugin_innobase_shared_target@ ha_innodb_la_LDFLAGS= -module -rpath $(pkgplugindir) -ha_innodb_la_CXXFLAGS= $(AM_CFLAGS) $(INNODB_DYNAMIC_CFLAGS) -ha_innodb_la_CFLAGS= $(AM_CFLAGS) $(INNODB_DYNAMIC_CFLAGS) +ha_innodb_la_CXXFLAGS= -shared $(AM_CXXFLAGS) $(INNODB_DYNAMIC_CFLAGS) +ha_innodb_la_CFLAGS= -shared $(AM_CFLAGS) $(INNODB_DYNAMIC_CFLAGS) ha_innodb_la_SOURCES= $(libinnobase_a_SOURCES) EXTRA_DIST= CMakeLists.txt plug.in \ diff --git a/storage/innobase/dict/dict0load.c b/storage/innobase/dict/dict0load.c index 65f1c9536bd..d5e7600f4d0 100644 --- a/storage/innobase/dict/dict0load.c +++ b/storage/innobase/dict/dict0load.c @@ -927,6 +927,8 @@ dict_load_table_on_id( ut_ad(mutex_own(&(dict_sys->mutex))); + table = NULL; + /* NOTE that the operation of this function is protected by the dictionary mutex, and therefore no deadlocks can occur with other dictionary operations. */ @@ -953,15 +955,17 @@ dict_load_table_on_id( BTR_SEARCH_LEAF, &pcur, &mtr); rec = btr_pcur_get_rec(&pcur); - if (!btr_pcur_is_on_user_rec(&pcur, &mtr) - || rec_get_deleted_flag(rec, 0)) { + if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) { /* Not found */ + goto func_exit; + } - btr_pcur_close(&pcur); - mtr_commit(&mtr); - mem_heap_free(heap); - - return(NULL); + /* Find the first record that is not delete marked */ + while (rec_get_deleted_flag(rec, 0)) { + if (!btr_pcur_move_to_next_user_rec(&pcur, &mtr)) { + goto func_exit; + } + rec = btr_pcur_get_rec(&pcur); } /*---------------------------------------------------*/ @@ -974,19 +978,14 @@ dict_load_table_on_id( /* Check if the table id in record is the one searched for */ if (ut_dulint_cmp(table_id, mach_read_from_8(field)) != 0) { - - btr_pcur_close(&pcur); - mtr_commit(&mtr); - mem_heap_free(heap); - - return(NULL); + goto func_exit; } /* Now we get the table name from the record */ field = rec_get_nth_field_old(rec, 1, &len); /* Load the table definition to memory */ table = dict_load_table(mem_heap_strdupl(heap, (char*) field, len)); - +func_exit: btr_pcur_close(&pcur); mtr_commit(&mtr); mem_heap_free(heap); diff --git a/storage/innobase/os/os0file.c b/storage/innobase/os/os0file.c index 7373a97cfb0..3396d1adf2f 100644 --- a/storage/innobase/os/os0file.c +++ b/storage/innobase/os/os0file.c @@ -3974,6 +3974,9 @@ os_aio_simulated_handle( ulint n; ulint i; + /* Fix compiler warning */ + *consecutive_ios = NULL; + segment = os_aio_get_array_and_local_segment(&array, global_segment); restart: diff --git a/storage/innodb_plugin/ChangeLog b/storage/innodb_plugin/ChangeLog index 5ebcf1e87a2..3e802360d23 100644 --- a/storage/innodb_plugin/ChangeLog +++ b/storage/innodb_plugin/ChangeLog @@ -1,3 +1,27 @@ +2010-07-27 The InnoDB Team + + * include/mem0pool.h, mem/mem0mem.c, mem/mem0pool.c, srv/srv0start.c: + Fix Bug#55581 shutdown with innodb-use-sys-malloc=0: assert + mutex->magic_n == MUTEX_MAGIC_N. + +2010-06-30 The InnoDB Team + + * btr/btr0sea.c, ha/ha0ha.c, handler/ha_innodb.cc, include/btr0sea.h: + Fix Bug#54311 Crash on CHECK PARTITION after concurrent LOAD DATA + and adaptive_hash_index=OFF + +2010-06-29 The InnoDB Team + * row/row0row.c, row/row0undo.c, row/row0upd.c: + Fix Bug#54408 txn rollback after recovery: row0umod.c:673 + dict_table_get_format(index->table) + +2010-06-29 The InnoDB Team + + * btr/btr0cur.c, include/btr0cur.h, + include/row0mysql.h, row/row0merge.c, row/row0sel.c: + Fix Bug#54358 READ UNCOMMITTED access failure of off-page DYNAMIC + or COMPRESSED columns + 2010-06-24 The InnoDB Team * handler/ha_innodb.cc: diff --git a/storage/innodb_plugin/Makefile.am b/storage/innodb_plugin/Makefile.am index 807be18b2a2..1bed796f0d4 100644 --- a/storage/innodb_plugin/Makefile.am +++ b/storage/innodb_plugin/Makefile.am @@ -325,15 +325,15 @@ libinnobase_a_SOURCES= \ ut/ut0vec.c \ ut/ut0wqueue.c -libinnobase_a_CXXFLAGS= $(AM_CFLAGS) +libinnobase_a_CXXFLAGS= $(AM_CXXFLAGS) libinnobase_a_CFLAGS= $(AM_CFLAGS) EXTRA_LTLIBRARIES= ha_innodb_plugin.la pkgplugin_LTLIBRARIES= @plugin_innodb_plugin_shared_target@ ha_innodb_plugin_la_LDFLAGS= -module -rpath $(pkgplugindir) -L$(top_builddir)/libservices -lmysqlservices -ha_innodb_plugin_la_CXXFLAGS= $(AM_CFLAGS) $(INNODB_DYNAMIC_CFLAGS) -ha_innodb_plugin_la_CFLAGS= $(AM_CFLAGS) $(INNODB_DYNAMIC_CFLAGS) +ha_innodb_plugin_la_CXXFLAGS= -shared $(AM_CXXFLAGS) $(INNODB_DYNAMIC_CFLAGS) +ha_innodb_plugin_la_CFLAGS= -shared $(AM_CFLAGS) $(INNODB_DYNAMIC_CFLAGS) ha_innodb_plugin_la_SOURCES= $(libinnobase_a_SOURCES) EXTRA_DIST= CMakeLists.txt plug.in \ diff --git a/storage/innodb_plugin/btr/btr0cur.c b/storage/innodb_plugin/btr/btr0cur.c index 50531ad3bd7..7fa7d42320a 100644 --- a/storage/innodb_plugin/btr/btr0cur.c +++ b/storage/innodb_plugin/btr/btr0cur.c @@ -4814,7 +4814,7 @@ btr_copy_externally_stored_field( /*******************************************************************//** Copies an externally stored field of a record to mem heap. -@return the field copied to heap */ +@return the field copied to heap, or NULL if the field is incomplete */ UNIV_INTERN byte* btr_rec_copy_externally_stored_field( @@ -4844,6 +4844,18 @@ btr_rec_copy_externally_stored_field( data = rec_get_nth_field(rec, offsets, no, &local_len); + ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE); + + if (UNIV_UNLIKELY + (!memcmp(data + local_len - BTR_EXTERN_FIELD_REF_SIZE, + field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE))) { + /* The externally stored field was not written yet. + This record should only be seen by + recv_recovery_rollback_active() or any + TRX_ISO_READ_UNCOMMITTED transactions. */ + return(NULL); + } + return(btr_copy_externally_stored_field(len, data, zip_size, local_len, heap)); } diff --git a/storage/innodb_plugin/btr/btr0sea.c b/storage/innodb_plugin/btr/btr0sea.c index c91afa31bf0..ac7248fef20 100644 --- a/storage/innodb_plugin/btr/btr0sea.c +++ b/storage/innodb_plugin/btr/btr0sea.c @@ -46,6 +46,7 @@ Created 2/17/1996 Heikki Tuuri /** Flag: has the search system been enabled? Protected by btr_search_latch and btr_search_enabled_mutex. */ UNIV_INTERN char btr_search_enabled = TRUE; +UNIV_INTERN ibool btr_search_fully_disabled = FALSE; /** Mutex protecting btr_search_enabled */ static mutex_t btr_search_enabled_mutex; @@ -201,12 +202,19 @@ btr_search_disable(void) mutex_enter(&btr_search_enabled_mutex); rw_lock_x_lock(&btr_search_latch); + /* Disable access to hash index, also tell ha_insert_for_fold() + stop adding new nodes to hash index, but still allow updating + existing nodes */ btr_search_enabled = FALSE; /* Clear all block->is_hashed flags and remove all entries from btr_search_sys->hash_index. */ buf_pool_drop_hash_index(); + /* hash index has been cleaned up, disallow any operation to + the hash index */ + btr_search_fully_disabled = TRUE; + /* btr_search_enabled_mutex should guarantee this. */ ut_ad(!btr_search_enabled); @@ -225,6 +233,7 @@ btr_search_enable(void) rw_lock_x_lock(&btr_search_latch); btr_search_enabled = TRUE; + btr_search_fully_disabled = FALSE; rw_lock_x_unlock(&btr_search_latch); mutex_exit(&btr_search_enabled_mutex); @@ -1363,7 +1372,7 @@ btr_search_build_page_hash_index( rw_lock_x_lock(&btr_search_latch); - if (UNIV_UNLIKELY(!btr_search_enabled)) { + if (UNIV_UNLIKELY(btr_search_fully_disabled)) { goto exit_func; } diff --git a/storage/innodb_plugin/ha/ha0ha.c b/storage/innodb_plugin/ha/ha0ha.c index 9d9d341ad39..f9e798012f8 100644 --- a/storage/innodb_plugin/ha/ha0ha.c +++ b/storage/innodb_plugin/ha/ha0ha.c @@ -31,9 +31,7 @@ Created 8/22/1994 Heikki Tuuri #ifdef UNIV_DEBUG # include "buf0buf.h" #endif /* UNIV_DEBUG */ -#ifdef UNIV_SYNC_DEBUG -# include "btr0sea.h" -#endif /* UNIV_SYNC_DEBUG */ +#include "btr0sea.h" #include "page0page.h" /*************************************************************//** @@ -127,7 +125,8 @@ ha_clear( /*************************************************************//** Inserts an entry into a hash table. If an entry with the same fold number is found, its node is updated to point to the new data, and no new node -is inserted. +is inserted. If btr_search_enabled is set to FALSE, we will only allow +updating existing nodes, but no new node is allowed to be added. @return TRUE if succeed, FALSE if no more memory could be allocated */ UNIV_INTERN ibool @@ -174,6 +173,7 @@ ha_insert_for_fold_func( prev_block->n_pointers--; block->n_pointers++; } + ut_ad(!btr_search_fully_disabled); # endif /* !UNIV_HOTBACKUP */ prev_node->block = block; @@ -186,6 +186,13 @@ ha_insert_for_fold_func( prev_node = prev_node->next; } + /* We are in the process of disabling hash index, do not add + new chain node */ + if (!btr_search_enabled) { + ut_ad(!btr_search_fully_disabled); + return(TRUE); + } + /* We have to allocate a new chain node */ node = mem_heap_alloc(hash_get_heap(table, fold), sizeof(ha_node_t)); diff --git a/storage/innodb_plugin/handler/ha_innodb.cc b/storage/innodb_plugin/handler/ha_innodb.cc index 4d4fd185567..37ce69b9933 100644 --- a/storage/innodb_plugin/handler/ha_innodb.cc +++ b/storage/innodb_plugin/handler/ha_innodb.cc @@ -2270,6 +2270,7 @@ innobase_change_buffering_inited_ok: /* Get the current high water mark format. */ innobase_file_format_check = (char*) trx_sys_file_format_max_get(); + btr_search_fully_disabled = (!btr_search_enabled); DBUG_RETURN(FALSE); error: DBUG_RETURN(TRUE); diff --git a/storage/innodb_plugin/include/btr0cur.h b/storage/innodb_plugin/include/btr0cur.h index 716f15c4267..7dc2eb63cf5 100644 --- a/storage/innodb_plugin/include/btr0cur.h +++ b/storage/innodb_plugin/include/btr0cur.h @@ -570,7 +570,7 @@ btr_copy_externally_stored_field_prefix( ulint local_len);/*!< in: length of data, in bytes */ /*******************************************************************//** Copies an externally stored field of a record to mem heap. -@return the field copied to heap */ +@return the field copied to heap, or NULL if the field is incomplete */ UNIV_INTERN byte* btr_rec_copy_externally_stored_field( diff --git a/storage/innodb_plugin/include/btr0sea.h b/storage/innodb_plugin/include/btr0sea.h index f98ba386f9c..20a2be7f877 100644 --- a/storage/innodb_plugin/include/btr0sea.h +++ b/storage/innodb_plugin/include/btr0sea.h @@ -190,7 +190,13 @@ btr_search_validate(void); /** Flag: has the search system been enabled? Protected by btr_search_latch and btr_search_enabled_mutex. */ -extern char btr_search_enabled; +extern char btr_search_enabled; + +/** Flag: whether the search system has completed its disabling process, +It is set to TRUE right after buf_pool_drop_hash_index() in +btr_search_disable(), indicating hash index entries are cleaned up. +Protected by btr_search_latch and btr_search_enabled_mutex. */ +extern ibool btr_search_fully_disabled; /** The search info struct in an index */ struct btr_search_struct{ diff --git a/storage/innodb_plugin/include/mem0pool.h b/storage/innodb_plugin/include/mem0pool.h index 5e93bf88a47..fa8be296ec9 100644 --- a/storage/innodb_plugin/include/mem0pool.h +++ b/storage/innodb_plugin/include/mem0pool.h @@ -100,18 +100,6 @@ mem_pool_get_reserved( /*==================*/ mem_pool_t* pool); /*!< in: memory pool */ /********************************************************************//** -Reserves the mem pool mutex. */ -UNIV_INTERN -void -mem_pool_mutex_enter(void); -/*======================*/ -/********************************************************************//** -Releases the mem pool mutex. */ -UNIV_INTERN -void -mem_pool_mutex_exit(void); -/*=====================*/ -/********************************************************************//** Validates a memory pool. @return TRUE if ok */ UNIV_INTERN diff --git a/storage/innodb_plugin/include/row0mysql.h b/storage/innodb_plugin/include/row0mysql.h index 39ea240772c..b69e657361b 100644 --- a/storage/innodb_plugin/include/row0mysql.h +++ b/storage/innodb_plugin/include/row0mysql.h @@ -622,7 +622,11 @@ struct row_prebuilt_struct { the secondary index, then this is set to TRUE */ unsigned templ_contains_blob:1;/*!< TRUE if the template contains - BLOB column(s) */ + a column with DATA_BLOB == + get_innobase_type_from_mysql_type(); + not to be confused with InnoDB + externally stored columns + (VARCHAR can be off-page too) */ mysql_row_templ_t* mysql_template;/*!< template used to transform rows fast between MySQL and Innobase formats; memory for this template diff --git a/storage/innodb_plugin/include/univ.i b/storage/innodb_plugin/include/univ.i index ff62c3e53c1..991625d6a8a 100644 --- a/storage/innodb_plugin/include/univ.i +++ b/storage/innodb_plugin/include/univ.i @@ -46,7 +46,7 @@ Created 1/20/1994 Heikki Tuuri #define INNODB_VERSION_MAJOR 1 #define INNODB_VERSION_MINOR 0 -#define INNODB_VERSION_BUGFIX 10 +#define INNODB_VERSION_BUGFIX 11 /* The following is the InnoDB version as shown in SELECT plugin_version FROM information_schema.plugins; diff --git a/storage/innodb_plugin/mem/mem0mem.c b/storage/innodb_plugin/mem/mem0mem.c index c0ce8a3e1ac..1dd4db30841 100644 --- a/storage/innodb_plugin/mem/mem0mem.c +++ b/storage/innodb_plugin/mem/mem0mem.c @@ -367,7 +367,7 @@ mem_heap_create_block( block->line = line; #ifdef MEM_PERIODIC_CHECK - mem_pool_mutex_enter(); + mutex_enter(&(mem_comm_pool->mutex)); if (!mem_block_list_inited) { mem_block_list_inited = TRUE; @@ -376,7 +376,7 @@ mem_heap_create_block( UT_LIST_ADD_LAST(mem_block_list, mem_block_list, block); - mem_pool_mutex_exit(); + mutex_exit(&(mem_comm_pool->mutex)); #endif mem_block_set_len(block, len); mem_block_set_type(block, type); @@ -479,11 +479,11 @@ mem_heap_block_free( UT_LIST_REMOVE(list, heap->base, block); #ifdef MEM_PERIODIC_CHECK - mem_pool_mutex_enter(); + mutex_enter(&(mem_comm_pool->mutex)); UT_LIST_REMOVE(mem_block_list, mem_block_list, block); - mem_pool_mutex_exit(); + mutex_exit(&(mem_comm_pool->mutex)); #endif ut_ad(heap->total_size >= block->len); @@ -556,7 +556,7 @@ mem_validate_all_blocks(void) { mem_block_t* block; - mem_pool_mutex_enter(); + mutex_enter(&(mem_comm_pool->mutex)); block = UT_LIST_GET_FIRST(mem_block_list); @@ -568,6 +568,6 @@ mem_validate_all_blocks(void) block = UT_LIST_GET_NEXT(mem_block_list, block); } - mem_pool_mutex_exit(); + mutex_exit(&(mem_comm_pool->mutex)); } #endif diff --git a/storage/innodb_plugin/mem/mem0pool.c b/storage/innodb_plugin/mem/mem0pool.c index c4f8af607e0..3291453eeb5 100644 --- a/storage/innodb_plugin/mem/mem0pool.c +++ b/storage/innodb_plugin/mem/mem0pool.c @@ -34,6 +34,7 @@ Created 5/12/1997 Heikki Tuuri #include "ut0lst.h" #include "ut0byte.h" #include "mem0mem.h" +#include "srv0start.h" /* We would like to use also the buffer frames to allocate memory. This would be desirable, because then the memory consumption of the database @@ -121,23 +122,33 @@ mysql@lists.mysql.com */ UNIV_INTERN ulint mem_n_threads_inside = 0; /********************************************************************//** -Reserves the mem pool mutex. */ -UNIV_INTERN +Reserves the mem pool mutex if we are not in server shutdown. Use +this function only in memory free functions, since only memory +free functions are used during server shutdown. */ +UNIV_INLINE void -mem_pool_mutex_enter(void) -/*======================*/ +mem_pool_mutex_enter( +/*=================*/ + mem_pool_t* pool) /*!< in: memory pool */ { - mutex_enter(&(mem_comm_pool->mutex)); + if (srv_shutdown_state < SRV_SHUTDOWN_EXIT_THREADS) { + mutex_enter(&(pool->mutex)); + } } /********************************************************************//** -Releases the mem pool mutex. */ -UNIV_INTERN +Releases the mem pool mutex if we are not in server shutdown. As +its corresponding mem_pool_mutex_enter() function, use it only +in memory free functions */ +UNIV_INLINE void -mem_pool_mutex_exit(void) -/*=====================*/ +mem_pool_mutex_exit( +/*================*/ + mem_pool_t* pool) /*!< in: memory pool */ { - mutex_exit(&(mem_comm_pool->mutex)); + if (srv_shutdown_state < SRV_SHUTDOWN_EXIT_THREADS) { + mutex_exit(&(pool->mutex)); + } } /********************************************************************//** @@ -567,7 +578,7 @@ mem_area_free( n = ut_2_log(size); - mutex_enter(&(pool->mutex)); + mem_pool_mutex_enter(pool); mem_n_threads_inside++; ut_a(mem_n_threads_inside == 1); @@ -595,7 +606,7 @@ mem_area_free( pool->reserved += ut_2_exp(n); mem_n_threads_inside--; - mutex_exit(&(pool->mutex)); + mem_pool_mutex_exit(pool); mem_area_free(new_ptr, pool); @@ -611,7 +622,7 @@ mem_area_free( } mem_n_threads_inside--; - mutex_exit(&(pool->mutex)); + mem_pool_mutex_exit(pool); ut_ad(mem_pool_validate(pool)); } @@ -630,7 +641,7 @@ mem_pool_validate( ulint free; ulint i; - mutex_enter(&(pool->mutex)); + mem_pool_mutex_enter(pool); free = 0; @@ -658,7 +669,7 @@ mem_pool_validate( ut_a(free + pool->reserved == pool->size); - mutex_exit(&(pool->mutex)); + mem_pool_mutex_exit(pool); return(TRUE); } diff --git a/storage/innodb_plugin/row/row0merge.c b/storage/innodb_plugin/row/row0merge.c index 70cc7912fad..56a68b58225 100644 --- a/storage/innodb_plugin/row/row0merge.c +++ b/storage/innodb_plugin/row/row0merge.c @@ -1780,6 +1780,11 @@ row_merge_copy_blobs( (below). */ data = btr_rec_copy_externally_stored_field( mrec, offsets, zip_size, i, &len, heap); + /* Because we have locked the table, any records + written by incomplete transactions must have been + rolled back already. There must not be any incomplete + BLOB columns. */ + ut_a(data); dfield_set_data(field, data, len); } diff --git a/storage/innodb_plugin/row/row0row.c b/storage/innodb_plugin/row/row0row.c index cb7dfa2b7c9..8e806a14a98 100644 --- a/storage/innodb_plugin/row/row0row.c +++ b/storage/innodb_plugin/row/row0row.c @@ -294,7 +294,13 @@ row_build( ut_ad(dtuple_check_typed(row)); - if (j) { + if (!ext) { + /* REDUNDANT and COMPACT formats store a local + 768-byte prefix of each externally stored + column. No cache is needed. */ + ut_ad(dict_table_get_format(index->table) + < DICT_TF_FORMAT_ZIP); + } else if (j) { *ext = row_ext_create(j, ext_cols, row, dict_table_zip_size(index->table), heap); diff --git a/storage/innodb_plugin/row/row0sel.c b/storage/innodb_plugin/row/row0sel.c index 2861235a995..76c144e5a8c 100644 --- a/storage/innodb_plugin/row/row0sel.c +++ b/storage/innodb_plugin/row/row0sel.c @@ -416,7 +416,7 @@ row_sel_fetch_columns( field_no))) { /* Copy an externally stored field to the - temporary heap */ + temporary heap, if possible. */ heap = mem_heap_create(1); @@ -425,6 +425,17 @@ row_sel_fetch_columns( dict_table_zip_size(index->table), field_no, &len, heap); + /* data == NULL means that the + externally stored field was not + written yet. This record + should only be seen by + recv_recovery_rollback_active() or any + TRX_ISO_READ_UNCOMMITTED + transactions. The InnoDB SQL parser + (the sole caller of this function) + does not implement READ UNCOMMITTED, + and it is not involved during rollback. */ + ut_a(data); ut_a(len != UNIV_SQL_NULL); needs_copy = TRUE; @@ -926,6 +937,7 @@ row_sel_get_clust_rec( when plan->clust_pcur was positioned. The latch will not be released until mtr_commit(mtr). */ + ut_ad(!rec_get_deleted_flag(clust_rec, rec_offs_comp(offsets))); row_sel_fetch_columns(index, clust_rec, offsets, UT_LIST_GET_FIRST(plan->columns)); *out_rec = clust_rec; @@ -1628,6 +1640,13 @@ skip_lock: } if (old_vers == NULL) { + /* The record does not exist + in our read view. Skip it, but + first attempt to determine + whether the index segment we + are searching through has been + exhausted. */ + offsets = rec_get_offsets( rec, index, offsets, ULINT_UNDEFINED, &heap); @@ -2647,9 +2666,8 @@ Convert a row in the Innobase format to a row in the MySQL format. Note that the template in prebuilt may advise us to copy only a few columns to mysql_rec, other columns are left blank. All columns may not be needed in the query. -@return TRUE if success, FALSE if could not allocate memory for a BLOB -(though we may also assert in that case) */ -static +@return TRUE on success, FALSE if not all columns could be retrieved */ +static __attribute__((warn_unused_result)) ibool row_sel_store_mysql_rec( /*====================*/ @@ -2672,6 +2690,7 @@ row_sel_store_mysql_rec( ut_ad(prebuilt->mysql_template); ut_ad(prebuilt->default_rec); ut_ad(rec_offs_validate(rec, NULL, offsets)); + ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets))); if (UNIV_LIKELY_NULL(prebuilt->blob_heap)) { mem_heap_free(prebuilt->blob_heap); @@ -2719,6 +2738,21 @@ row_sel_store_mysql_rec( dict_table_zip_size(prebuilt->table), templ->rec_field_no, &len, heap); + if (UNIV_UNLIKELY(!data)) { + /* The externally stored field + was not written yet. This + record should only be seen by + recv_recovery_rollback_active() + or any TRX_ISO_READ_UNCOMMITTED + transactions. */ + + if (extern_field_heap) { + mem_heap_free(extern_field_heap); + } + + return(FALSE); + } + ut_a(len != UNIV_SQL_NULL); } else { /* Field is stored in the row. */ @@ -3136,9 +3170,10 @@ row_sel_pop_cached_row_for_mysql( } /********************************************************************//** -Pushes a row for MySQL to the fetch cache. */ -UNIV_INLINE -void +Pushes a row for MySQL to the fetch cache. +@return TRUE on success, FALSE if the record contains incomplete BLOBs */ +UNIV_INLINE __attribute__((warn_unused_result)) +ibool row_sel_push_cache_row_for_mysql( /*=============================*/ row_prebuilt_t* prebuilt, /*!< in: prebuilt struct */ @@ -3180,10 +3215,11 @@ row_sel_push_cache_row_for_mysql( prebuilt->fetch_cache[ prebuilt->n_fetch_cached], prebuilt, rec, offsets))) { - ut_error; + return(FALSE); } prebuilt->n_fetch_cached++; + return(TRUE); } /*********************************************************************//** @@ -3578,11 +3614,21 @@ row_search_for_mysql( if (!row_sel_store_mysql_rec(buf, prebuilt, rec, offsets)) { - err = DB_TOO_BIG_RECORD; - - /* We let the main loop to do the - error handling */ - goto shortcut_fails_too_big_rec; + /* Only fresh inserts may contain + incomplete externally stored + columns. Pretend that such + records do not exist. Such + records may only be accessed + at the READ UNCOMMITTED + isolation level or when + rolling back a recovered + transaction. Rollback happens + at a lower level, not here. */ + ut_a(trx->isolation_level + == TRX_ISO_READ_UNCOMMITTED); + + /* Proceed as in case SEL_RETRY. */ + break; } mtr_commit(&mtr); @@ -3622,7 +3668,7 @@ release_search_latch_if_needed: default: ut_ad(0); } -shortcut_fails_too_big_rec: + mtr_commit(&mtr); mtr_start(&mtr); } @@ -4357,9 +4403,18 @@ requires_clust_rec: not cache rows because there the cursor is a scrollable cursor. */ - row_sel_push_cache_row_for_mysql(prebuilt, result_rec, - offsets); - if (prebuilt->n_fetch_cached == MYSQL_FETCH_CACHE_SIZE) { + if (!row_sel_push_cache_row_for_mysql(prebuilt, result_rec, + offsets)) { + /* Only fresh inserts may contain incomplete + externally stored columns. Pretend that such + records do not exist. Such records may only be + accessed at the READ UNCOMMITTED isolation + level or when rolling back a recovered + transaction. Rollback happens at a lower + level, not here. */ + ut_a(trx->isolation_level == TRX_ISO_READ_UNCOMMITTED); + } else if (prebuilt->n_fetch_cached + == MYSQL_FETCH_CACHE_SIZE) { goto got_row; } @@ -4375,9 +4430,17 @@ requires_clust_rec: } else { if (!row_sel_store_mysql_rec(buf, prebuilt, result_rec, offsets)) { - err = DB_TOO_BIG_RECORD; - - goto lock_wait_or_error; + /* Only fresh inserts may contain + incomplete externally stored + columns. Pretend that such records do + not exist. Such records may only be + accessed at the READ UNCOMMITTED + isolation level or when rolling back a + recovered transaction. Rollback + happens at a lower level, not here. */ + ut_a(trx->isolation_level + == TRX_ISO_READ_UNCOMMITTED); + goto next_rec; } } diff --git a/storage/innodb_plugin/row/row0undo.c b/storage/innodb_plugin/row/row0undo.c index 9ef842b5114..fd28a4f6520 100644 --- a/storage/innodb_plugin/row/row0undo.c +++ b/storage/innodb_plugin/row/row0undo.c @@ -199,8 +199,24 @@ row_undo_search_clust_to_pcur( ret = FALSE; } else { + row_ext_t** ext; + + if (dict_table_get_format(node->table) >= DICT_TF_FORMAT_ZIP) { + /* In DYNAMIC or COMPRESSED format, there is + no prefix of externally stored columns in the + clustered index record. Build a cache of + column prefixes. */ + ext = &node->ext; + } else { + /* REDUNDANT and COMPACT formats store a local + 768-byte prefix of each externally stored + column. No cache is needed. */ + ext = NULL; + node->ext = NULL; + } + node->row = row_build(ROW_COPY_DATA, clust_index, rec, - offsets, NULL, &node->ext, node->heap); + offsets, NULL, ext, node->heap); if (node->update) { node->undo_row = dtuple_copy(node->row, node->heap); row_upd_replace(node->undo_row, &node->undo_ext, diff --git a/storage/innodb_plugin/row/row0upd.c b/storage/innodb_plugin/row/row0upd.c index d0aaecd3dae..397b117c067 100644 --- a/storage/innodb_plugin/row/row0upd.c +++ b/storage/innodb_plugin/row/row0upd.c @@ -1398,6 +1398,7 @@ row_upd_store_row( dict_index_t* clust_index; rec_t* rec; mem_heap_t* heap = NULL; + row_ext_t** ext; ulint offsets_[REC_OFFS_NORMAL_SIZE]; const ulint* offsets; rec_offs_init(offsets_); @@ -1414,8 +1415,22 @@ row_upd_store_row( offsets = rec_get_offsets(rec, clust_index, offsets_, ULINT_UNDEFINED, &heap); + + if (dict_table_get_format(node->table) >= DICT_TF_FORMAT_ZIP) { + /* In DYNAMIC or COMPRESSED format, there is no prefix + of externally stored columns in the clustered index + record. Build a cache of column prefixes. */ + ext = &node->ext; + } else { + /* REDUNDANT and COMPACT formats store a local + 768-byte prefix of each externally stored column. + No cache is needed. */ + ext = NULL; + node->ext = NULL; + } + node->row = row_build(ROW_COPY_DATA, clust_index, rec, offsets, - NULL, &node->ext, node->heap); + NULL, ext, node->heap); if (node->is_delete) { node->upd_row = NULL; node->upd_ext = NULL; diff --git a/storage/innodb_plugin/srv/srv0start.c b/storage/innodb_plugin/srv/srv0start.c index e517b9a86b0..ba9fc831b39 100644 --- a/storage/innodb_plugin/srv/srv0start.c +++ b/storage/innodb_plugin/srv/srv0start.c @@ -2018,9 +2018,13 @@ innobase_shutdown_for_mysql(void) pars_lexer_close(); log_mem_free(); buf_pool_free(); - ut_free_all_mem(); mem_close(); + /* ut_free_all_mem() frees all allocated memory not freed yet + in shutdown, and it will also free the ut_list_mutex, so it + should be the last one for all operation */ + ut_free_all_mem(); + if (os_thread_count != 0 || os_event_count != 0 || os_mutex_count != 0 diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc index 1475cd52488..95ddde42e07 100644 --- a/storage/maria/ha_maria.cc +++ b/storage/maria/ha_maria.cc @@ -786,8 +786,11 @@ static int maria_create_trn_for_mysql(MARIA_HA *info) thd->query_length()); } else + { DBUG_PRINT("info", ("lock_type: %d trnman_flags: %u", - info->lock_type, trnman_get_flags(trn))); /* QQ */ + info->lock_type, trnman_get_flags(trn))); + } + #endif DBUG_RETURN(0); } @@ -2383,6 +2386,12 @@ int ha_maria::extra(enum ha_extra_function operation) int ha_maria::reset(void) { + if (file->trn) + { + /* Next statement is a new statement. Ensure it's logged */ + trnman_set_flags(file->trn, + trnman_get_flags(file->trn) & ~TRN_STATE_INFO_LOGGED); + } return maria_reset(file); } diff --git a/storage/maria/ma_bitmap.c b/storage/maria/ma_bitmap.c index a76b36e1115..4f37d7b9a1f 100644 --- a/storage/maria/ma_bitmap.c +++ b/storage/maria/ma_bitmap.c @@ -147,6 +147,12 @@ static inline my_bool write_changed_bitmap(MARIA_SHARE *share, DBUG_ASSERT(bitmap->file.write_callback != 0); DBUG_PRINT("info", ("bitmap->non_flushable: %u", bitmap->non_flushable)); + /* + Mark that a bitmap page has been written to page cache and we have + to flush it during checkpoint. + */ + bitmap->changed_not_flushed= 1; + if ((bitmap->non_flushable == 0) #ifdef WRONG_BITMAP_FLUSH || 1 @@ -347,7 +353,7 @@ my_bool _ma_bitmap_flush_all(MARIA_SHARE *share) MARIA_FILE_BITMAP *bitmap= &share->bitmap; DBUG_ENTER("_ma_bitmap_flush_all"); pthread_mutex_lock(&bitmap->bitmap_lock); - if (bitmap->changed) + if (bitmap->changed || bitmap->changed_not_flushed) { bitmap->flush_all_requested= TRUE; #ifndef WRONG_BITMAP_FLUSH @@ -384,6 +390,7 @@ my_bool _ma_bitmap_flush_all(MARIA_SHARE *share) &bitmap->pages_covered) & PCFLUSH_PINNED_AND_ERROR) res= TRUE; + bitmap->changed_not_flushed= FALSE; bitmap->flush_all_requested= FALSE; /* Some well-behaved threads may be waiting for flush_all_requested to @@ -1875,6 +1882,7 @@ static my_bool set_page_bits(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap, uint offset_page, offset, tmp, org_tmp; uchar *data; DBUG_ENTER("set_page_bits"); + DBUG_ASSERT(fill_pattern <= 7); bitmap_page= page - page % bitmap->pages_covered; if (bitmap_page != bitmap->page && @@ -2296,9 +2304,16 @@ my_bool _ma_bitmap_release_unused(MARIA_HA *info, MARIA_BITMAP_BLOCKS *blocks) The page has all bits set; The following test is an optimization to not set the bits to the same value as before. */ - if (bits != current_bitmap_value && - set_page_bits(info, bitmap, block->page, bits)) - goto err; + if (bits != current_bitmap_value) + { + if (set_page_bits(info, bitmap, block->page, bits)) + goto err; + } + else + { + DBUG_ASSERT(current_bitmap_value == + _ma_bitmap_get_page_bits(info, bitmap, block->page)); + } } else if (!(block->used & BLOCKUSED_USED) && _ma_bitmap_reset_full_page_bits(info, bitmap, diff --git a/storage/maria/ma_blockrec.c b/storage/maria/ma_blockrec.c index 0779564c8af..e3cb8d14eeb 100644 --- a/storage/maria/ma_blockrec.c +++ b/storage/maria/ma_blockrec.c @@ -1716,7 +1716,7 @@ static my_bool get_head_or_tail_page(MARIA_HA *info, MARIA_PINNED_PAGE page_link; MARIA_SHARE *share= info->s; DBUG_ENTER("get_head_or_tail_page"); - DBUG_PRINT("enter", ("length: %u", length)); + DBUG_PRINT("enter", ("page_type: %u length: %u", page_type, length)); block_size= share->block_size; if (block->org_bitmap_value == 0) /* Empty block */ @@ -1990,7 +1990,8 @@ static my_bool write_tail(MARIA_HA *info, block->empty_space= (enough_free_entries(row_pos.buff, share->block_size, 1 + share->base.blobs) ? empty_space : 0); - block->used= BLOCKUSED_USED | BLOCKUSED_TAIL; + /* Keep BLOCKUSED_USE_ORG_BITMAP */ + block->used|= BLOCKUSED_USED | BLOCKUSED_TAIL; /* Increase data file size, if extended */ position= (my_off_t) block->page * block_size; @@ -6237,7 +6238,10 @@ uint _ma_apply_redo_insert_row_head_or_tail(MARIA_HA *info, LSN lsn, /* Skip errors when reading outside of file and uninitialized pages */ if (!new_page || (my_errno != HA_ERR_FILE_TOO_SHORT && my_errno != HA_ERR_WRONG_CRC)) + { + DBUG_PRINT("error", ("Error %d when reading page", (int) my_errno)); goto err; + } /* Create new page */ buff= pagecache_block_link_to_buffer(page_link.link); buff[PAGE_TYPE_OFFSET]= UNALLOCATED_PAGE; @@ -6265,7 +6269,13 @@ uint _ma_apply_redo_insert_row_head_or_tail(MARIA_HA *info, LSN lsn, changed to new type. */ if (!new_page) + { + DBUG_PRINT("error", + ("Found page of wrong type: %u, should have been %u", + (uint) (buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK), + page_type)); goto crashed_file; + } make_empty_page(info, buff, page_type, 0); empty_space= block_size - PAGE_HEADER_SIZE - PAGE_SUFFIX_SIZE; (void) extend_directory(page_type == HEAD_PAGE ? info: 0, buff, diff --git a/storage/maria/ma_check.c b/storage/maria/ma_check.c index 086cff67dab..ed3a7a46344 100644 --- a/storage/maria/ma_check.c +++ b/storage/maria/ma_check.c @@ -136,11 +136,13 @@ void maria_chk_init_for_check(HA_CHECK *param, MARIA_HA *info) Set up transaction handler so that we can see all rows. When rows is read we will check the found id against param->max_tried */ - if (!ma_control_file_inited()) - param->max_trid= 0; /* Give warning for first trid found */ - else - param->max_trid= max_trid_in_system(); - + if (param->max_trid == 0) + { + if (!ma_control_file_inited()) + param->max_trid= 0; /* Give warning for first trid found */ + else + param->max_trid= max_trid_in_system(); + } maria_ignore_trids(info); } @@ -867,7 +869,7 @@ static int chk_index(HA_CHECK *param, MARIA_HA *info, MARIA_KEYDEF *keyinfo, llstr(anc_page->pos, llbuff)); } - if (anc_page->size > (uint) keyinfo->block_length - KEYPAGE_CHECKSUM_SIZE) + if (anc_page->size > share->max_index_block_size) { _ma_check_print_error(param, "Page at %s has impossible (too big) pagelength", @@ -1758,7 +1760,7 @@ static my_bool check_head_page(HA_CHECK *param, MARIA_HA *info, uchar *record, _ma_check_print_error(param, "Page %9s: Row: %3d has an extent with " "wrong information in bitmap: " - "Page %9s Page_type: %d Bitmap: %d", + "Page: %9s Page_type: %d Bitmap: %d", llstr(page, llbuff), row, llstr(extent_page, llbuff2), page_type, bitmap_pattern); @@ -2325,11 +2327,13 @@ static int initialize_variables_for_repair(HA_CHECK *param, } /* Set up transaction handler so that we can see all rows */ - if (!ma_control_file_inited()) - param->max_trid= 0; /* Give warning for first trid found */ - else - param->max_trid= max_trid_in_system(); - + if (param->max_trid == 0) + { + if (!ma_control_file_inited()) + param->max_trid= 0; /* Give warning for first trid found */ + else + param->max_trid= max_trid_in_system(); + } maria_ignore_trids(info); /* Don't write transid's during repair */ maria_versioning(info, 0); @@ -5609,7 +5613,7 @@ static int sort_insert_key(MARIA_SORT_PARAM *sort_param, a_length+=t_length; _ma_store_page_used(share, anc_buff, a_length); key_block->end_pos+=t_length; - if (a_length <= (uint) (keyinfo->block_length - KEYPAGE_CHECKSUM_SIZE)) + if (a_length <= share->max_index_block_size) { MARIA_KEY tmp_key2; tmp_key2.data= key_block->lastkey; diff --git a/storage/maria/ma_delete.c b/storage/maria/ma_delete.c index 0e9e5caafbf..2420cac0e93 100644 --- a/storage/maria/ma_delete.c +++ b/storage/maria/ma_delete.c @@ -1,4 +1,5 @@ /* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + Copyright (C) 2009-2010 Monty Program Ab This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -180,7 +181,11 @@ my_bool _ma_ck_delete(MARIA_HA *info, MARIA_KEY *key) key->data= key_buff; } - res= _ma_ck_real_delete(info, key, &new_root); + if ((res= _ma_ck_real_delete(info, key, &new_root))) + { + /* We have to mark the table crashed before unpin_all_pages() */ + maria_mark_crashed(info); + } key->data= save_key_data; if (!res && share->now_transactional) @@ -218,7 +223,8 @@ my_bool _ma_ck_real_delete(register MARIA_HA *info, MARIA_KEY *key, my_errno=ENOMEM; DBUG_RETURN(1); } - DBUG_PRINT("info",("root_page: %ld", (long) old_root)); + DBUG_PRINT("info",("root_page: %lu", + (ulong) (old_root / keyinfo->block_length))); if (_ma_fetch_keypage(&page, info, keyinfo, old_root, PAGECACHE_LOCK_WRITE, DFLT_INIT_HITS, root_buff, 0)) { @@ -435,7 +441,8 @@ static int d_search(MARIA_HA *info, MARIA_KEY *key, uint32 comp_flag, */ if (share->now_transactional && _ma_log_delete(anc_page, s_temp.key_pos, - s_temp.changed_length, s_temp.move_length)) + s_temp.changed_length, s_temp.move_length, + 0, KEY_OP_DEBUG_LOG_DEL_CHANGE_1)) DBUG_RETURN(-1); if (!nod_flag) @@ -458,7 +465,7 @@ static int d_search(MARIA_HA *info, MARIA_KEY *key, uint32 comp_flag, } if (ret_value >0) { - save_flag=1; + save_flag= 2; if (ret_value == 1) ret_value= underflow(info, keyinfo, anc_page, &leaf_page, keypos); else @@ -474,17 +481,20 @@ static int d_search(MARIA_HA *info, MARIA_KEY *key, uint32 comp_flag, ret_value= _ma_insert(info, key, anc_page, keypos, last_key.data, (MARIA_PAGE*) 0, (uchar*) 0, (my_bool) 0); + + if (_ma_write_keypage(&leaf_page, PAGECACHE_LOCK_LEFT_WRITELOCKED, + DFLT_INIT_HITS)) + ret_value= -1; } } - if (ret_value == 0 && anc_page->size > - (uint) (keyinfo->block_length - KEYPAGE_CHECKSUM_SIZE)) + if (ret_value == 0 && anc_page->size > share->max_index_block_size) { /* parent buffer got too big ; We have to split the page */ - save_flag=1; + save_flag= 3; ret_value= _ma_split_page(info, key, anc_page, - (uint) (keyinfo->block_length - - KEYPAGE_CHECKSUM_SIZE), + share->max_index_block_size, (uchar*) 0, 0, 0, lastkey, 0) | 2; + DBUG_ASSERT(anc_page->org_size == anc_page->size); } if (save_flag && ret_value != 1) { @@ -550,7 +560,8 @@ static int del(MARIA_HA *info, MARIA_KEY *key, MARIA_KEY ret_key; MARIA_PAGE next_page; DBUG_ENTER("del"); - DBUG_PRINT("enter",("leaf_page: %ld keypos: 0x%lx", (long) leaf_page, + DBUG_PRINT("enter",("leaf_page: %lu keypos: 0x%lx", + (ulong) (leaf_page->pos / share->block_size), (ulong) keypos)); DBUG_DUMP("leaf_buff", leaf_page->buff, leaf_page->size); @@ -587,11 +598,10 @@ static int del(MARIA_HA *info, MARIA_KEY *key, ret_value= underflow(info, keyinfo, leaf_page, &next_page, endpos); if (ret_value == 0 && leaf_page->size > - (uint) (keyinfo->block_length - KEYPAGE_CHECKSUM_SIZE)) + share->max_index_block_size) { ret_value= (_ma_split_page(info, key, leaf_page, - (uint) (keyinfo->block_length - - KEYPAGE_CHECKSUM_SIZE), + share->max_index_block_size, (uchar*) 0, 0, 0, ret_key_buff, 0) | 2); } @@ -708,8 +718,7 @@ err: @fn underflow() @param anc_buff Anchestor page data - @param leaf_page Page number of leaf page - @param leaf_buff Leaf page (page that underflowed) + @param leaf_page Leaf page (page that underflowed) @param leaf_page_link Pointer to pin information about leaf page @param keypos Position after current key in anc_buff @@ -743,7 +752,8 @@ static int underflow(MARIA_HA *info, MARIA_KEYDEF *keyinfo, MARIA_KEY tmp_key, anc_key, leaf_key; MARIA_PAGE next_page; DBUG_ENTER("underflow"); - DBUG_PRINT("enter",("leaf_page: %ld keypos: 0x%lx",(long) leaf_page->pos, + DBUG_PRINT("enter",("leaf_page: %lu keypos: 0x%lx", + (ulong) (leaf_page->pos / share->block_size), (ulong) keypos)); DBUG_DUMP("anc_buff", anc_page->buff, anc_page->size); DBUG_DUMP("leaf_buff", leaf_page->buff, leaf_page->size); @@ -841,7 +851,7 @@ static int underflow(MARIA_HA *info, MARIA_KEYDEF *keyinfo, anc_page->size= new_anc_length; page_store_size(share, anc_page); - if (buff_length <= (uint) (keyinfo->block_length - KEYPAGE_CHECKSUM_SIZE)) + if (buff_length <= share->max_index_block_size) { /* All keys fitted into one page */ page_mark_changed(info, &next_page); @@ -854,10 +864,15 @@ static int underflow(MARIA_HA *info, MARIA_KEYDEF *keyinfo, if (share->now_transactional) { - /* Log changes to parent page */ + /* + Log changes to parent page. Note that this page may have been + temporarily bigger than block_size. + */ if (_ma_log_delete(anc_page, key_deleted.key_pos, key_deleted.changed_length, - key_deleted.move_length)) + key_deleted.move_length, + anc_length - anc_page->org_size, + KEY_OP_DEBUG_LOG_DEL_CHANGE_2)) goto err; /* Log changes to leaf page. Data for leaf page is in leaf_buff @@ -986,7 +1001,8 @@ static int underflow(MARIA_HA *info, MARIA_KEYDEF *keyinfo, */ DBUG_ASSERT(new_buff_length <= next_buff_length); if (_ma_log_prefix(&next_page, key_inserted.changed_length, - (int) (new_buff_length - next_buff_length))) + (int) (new_buff_length - next_buff_length), + KEY_OP_DEBUG_LOG_PREFIX_1)) goto err; } page_mark_changed(info, &next_page); @@ -1044,11 +1060,19 @@ static int underflow(MARIA_HA *info, MARIA_KEYDEF *keyinfo, /* Remember for logging how many bytes of leaf_buff that are not changed */ DBUG_ASSERT((int) key_inserted.changed_length >= key_inserted.move_length); - unchanged_leaf_length= leaf_length - (key_inserted.changed_length - - key_inserted.move_length); + unchanged_leaf_length= (leaf_length - p_length - + (key_inserted.changed_length - + key_inserted.move_length)); new_buff_length= buff_length + leaf_length - p_length + t_length; +#ifdef EXTRA_DEBUG + /* Ensure that unchanged_leaf_length is correct */ + DBUG_ASSERT(bcmp(next_page.buff + new_buff_length - unchanged_leaf_length, + leaf_buff + leaf_length - unchanged_leaf_length, + unchanged_leaf_length) == 0); +#endif + page_flag= next_page.flag | leaf_page->flag; if (anc_key.flag & (SEARCH_USER_KEY_HAS_TRANSID | SEARCH_PAGE_KEY_HAS_TRANSID)) @@ -1069,8 +1093,7 @@ static int underflow(MARIA_HA *info, MARIA_KEYDEF *keyinfo, anc_page->size= new_anc_length; page_store_size(share, anc_page); - if (new_buff_length <= (uint) (keyinfo->block_length - - KEYPAGE_CHECKSUM_SIZE)) + if (new_buff_length <= share->max_index_block_size) { /* All keys fitted into one page */ page_mark_changed(info, leaf_page); @@ -1079,10 +1102,14 @@ static int underflow(MARIA_HA *info, MARIA_KEYDEF *keyinfo, if (share->now_transactional) { - /* Log changes to parent page */ + /* + Log changes to parent page. Note that this page may have been + temporarily bigger than block_size. + */ if (_ma_log_delete(anc_page, key_deleted.key_pos, - key_deleted.changed_length, key_deleted.move_length)) - + key_deleted.changed_length, key_deleted.move_length, + anc_length - anc_page->org_size, + KEY_OP_DEBUG_LOG_DEL_CHANGE_3)) goto err; /* Log changes to next page. Data for leaf page is in buff @@ -1192,8 +1219,10 @@ static int underflow(MARIA_HA *info, MARIA_KEYDEF *keyinfo, This contains original data with new data added first */ DBUG_ASSERT(leaf_length <= new_leaf_length); + DBUG_ASSERT(new_leaf_length >= unchanged_leaf_length); if (_ma_log_prefix(leaf_page, new_leaf_length - unchanged_leaf_length, - (int) (new_leaf_length - leaf_length))) + (int) (new_leaf_length - leaf_length), + KEY_OP_DEBUG_LOG_PREFIX_2)) goto err; /* Log changes to next page @@ -1395,7 +1424,9 @@ static uint remove_key(MARIA_KEYDEF *keyinfo, uint page_flag, uint nod_flag, ****************************************************************************/ /** - @brief log entry where some parts are deleted and some things are changed + @brief + log entry where some parts are deleted and some things are changed + and some data could be added last. @fn _ma_log_delete() @param info Maria handler @@ -1404,74 +1435,148 @@ static uint remove_key(MARIA_KEYDEF *keyinfo, uint page_flag, uint nod_flag, @param key_pos Start of change area @param changed_length How many bytes where changed at key_pos @param move_length How many bytes where deleted at key_pos + @param append_length Length of data added last + This is taken from end of ma_page->buff + This is mainly used when a key is deleted. The append happens + when we delete a key from a page with data > block_size kept in + memory and we have to add back the data that was stored > block_size */ my_bool _ma_log_delete(MARIA_PAGE *ma_page, const uchar *key_pos, - uint changed_length, uint move_length) + uint changed_length, uint move_length, + uint append_length __attribute__((unused)), + enum en_key_debug debug_marker __attribute__((unused))) { LSN lsn; - uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 9 + 7], *log_pos; - LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 3]; - uint translog_parts; + uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 2 + 2 + 3 + 3 + 6 + 3 + 7]; + uchar *log_pos; + LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 7]; + uint translog_parts, current_size, extra_length; uint offset= (uint) (key_pos - ma_page->buff); MARIA_HA *info= ma_page->info; MARIA_SHARE *share= info->s; my_off_t page; DBUG_ENTER("_ma_log_delete"); DBUG_PRINT("enter", ("page: %lu changed_length: %u move_length: %d", - (ulong) ma_page->pos, changed_length, move_length)); + (ulong) (ma_page->pos / share->block_size), + changed_length, move_length)); DBUG_ASSERT(share->now_transactional && move_length); DBUG_ASSERT(offset + changed_length <= ma_page->size); + DBUG_ASSERT(ma_page->org_size - move_length + append_length == ma_page->size); + DBUG_ASSERT(move_length <= ma_page->org_size - share->keypage_header); /* Store address of new root page */ page= ma_page->pos / share->block_size; page_store(log_data + FILEID_STORE_SIZE, page); log_pos= log_data+ FILEID_STORE_SIZE + PAGE_STORE_SIZE; + current_size= ma_page->org_size; + +#ifdef EXTRA_DEBUG_KEY_CHANGES + *log_pos++= KEY_OP_DEBUG; + *log_pos++= debug_marker; +#endif + + /* Store keypage_flag */ + *log_pos++= KEY_OP_SET_PAGEFLAG; + *log_pos++= ma_page->buff[KEYPAGE_TRANSFLAG_OFFSET]; + log_pos[0]= KEY_OP_OFFSET; int2store(log_pos+1, offset); - log_pos[3]= KEY_OP_SHIFT; - int2store(log_pos+4, -(int) move_length); - log_pos+= 6; - translog_parts= 1; + log_pos+= 3; + translog_parts= TRANSLOG_INTERNAL_PARTS + 1; + extra_length= 0; + if (changed_length) { + if (offset + changed_length >= share->max_index_block_size) + { + changed_length= share->max_index_block_size - offset; + move_length= 0; /* Nothing to move */ + current_size= share->max_index_block_size; + } + log_pos[0]= KEY_OP_CHANGE; int2store(log_pos+1, changed_length); log_pos+= 3; - translog_parts= 2; - log_array[TRANSLOG_INTERNAL_PARTS + 1].str= ma_page->buff + offset; - log_array[TRANSLOG_INTERNAL_PARTS + 1].length= changed_length; + log_array[translog_parts].str= ma_page->buff + offset; + log_array[translog_parts].length= changed_length; + translog_parts++; + + /* We only have to move things after offset+changed_length */ + offset+= changed_length; } -#ifdef EXTRA_DEBUG_KEY_CHANGES + log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data; + log_array[TRANSLOG_INTERNAL_PARTS + 0].length= (uint) (log_pos - log_data); + + if (move_length) { - int page_length= ma_page->size; - ha_checksum crc; - crc= my_checksum(0, ma_page->buff + LSN_STORE_SIZE, - page_length - LSN_STORE_SIZE); - log_pos[0]= KEY_OP_CHECK; - int2store(log_pos+1, page_length); - int4store(log_pos+3, crc); - - log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].str= log_pos; - log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].length= 7; - changed_length+= 7; + uint log_length; + if (offset + move_length < share->max_index_block_size) + { + /* + Move down things that is on page. + page_offset in apply_redo_inxed() will be at original offset + + changed_length. + */ + log_pos[0]= KEY_OP_SHIFT; + int2store(log_pos+1, - (int) move_length); + log_length= 3; + current_size-= move_length; + } + else + { + /* Delete to end of page */ + uint tmp= current_size - offset; + current_size= offset; + log_pos[0]= KEY_OP_DEL_SUFFIX; + int2store(log_pos+1, tmp); + log_length= 3; + } + log_array[translog_parts].str= log_pos; + log_array[translog_parts].length= log_length; translog_parts++; + log_pos+= log_length; + extra_length+= log_length; } -#endif - log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data; - log_array[TRANSLOG_INTERNAL_PARTS + 0].length= (uint) (log_pos - log_data); + if (current_size != ma_page->size && + current_size != share->max_index_block_size) + { + /* Append data that didn't fit on the page before */ + uint length= (min(ma_page->size, share->max_index_block_size) - + current_size); + uchar *data= ma_page->buff + current_size; + + DBUG_ASSERT(length <= append_length); + + log_pos[0]= KEY_OP_ADD_SUFFIX; + int2store(log_pos+1, length); + log_array[translog_parts].str= log_pos; + log_array[translog_parts].length= 3; + log_array[translog_parts + 1].str= data; + log_array[translog_parts + 1].length= length; + log_pos+= 3; + translog_parts+= 2; + current_size+= length; + extra_length+= 3 + length; + } + + _ma_log_key_changes(ma_page, + log_array + translog_parts, + log_pos, &extra_length, &translog_parts); + /* Remember new page length for future log entires for same page */ + ma_page->org_size= current_size; if (translog_write_record(&lsn, LOGREC_REDO_INDEX, info->trn, info, (translog_size_t) - log_array[TRANSLOG_INTERNAL_PARTS + 0].length + - changed_length, - TRANSLOG_INTERNAL_PARTS + translog_parts, + log_array[TRANSLOG_INTERNAL_PARTS].length + + changed_length + extra_length, translog_parts, log_array, log_data, NULL)) DBUG_RETURN(1); + DBUG_RETURN(0); } diff --git a/storage/maria/ma_key_recover.c b/storage/maria/ma_key_recover.c index 82f8099fe6a..1a61731e817 100644 --- a/storage/maria/ma_key_recover.c +++ b/storage/maria/ma_key_recover.c @@ -312,24 +312,33 @@ my_bool write_hook_for_undo_key_delete(enum translog_record_type type, */ my_bool _ma_log_prefix(MARIA_PAGE *ma_page, uint changed_length, - int move_length) + int move_length, + enum en_key_debug debug_marker __attribute__((unused))) { uint translog_parts; LSN lsn; - uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 7 + 7 + 2], *log_pos; + uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 7 + 7 + 2 + 2]; + uchar *log_pos; uchar *buff= ma_page->buff; - LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 3]; + LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 4]; pgcache_page_no_t page; MARIA_HA *info= ma_page->info; DBUG_ENTER("_ma_log_prefix"); DBUG_PRINT("enter", ("page: %lu changed_length: %u move_length: %d", (ulong) ma_page->pos, changed_length, move_length)); + DBUG_ASSERT(ma_page->size == ma_page->org_size + move_length); + page= ma_page->pos / info->s->block_size; log_pos= log_data + FILEID_STORE_SIZE; page_store(log_pos, page); log_pos+= PAGE_STORE_SIZE; +#ifdef EXTRA_DEBUG_KEY_CHANGES + (*log_pos++)= KEY_OP_DEBUG; + (*log_pos++)= debug_marker; +#endif + /* Store keypage_flag */ *log_pos++= KEY_OP_SET_PAGEFLAG; *log_pos++= buff[KEYPAGE_TRANSFLAG_OFFSET]; @@ -373,21 +382,11 @@ my_bool _ma_log_prefix(MARIA_PAGE *ma_page, uint changed_length, translog_parts= 2; } -#ifdef EXTRA_DEBUG_KEY_CHANGES - { - int page_length= ma_page->size; - ha_checksum crc; - crc= my_checksum(0, buff + LSN_STORE_SIZE, page_length - LSN_STORE_SIZE); - log_pos[0]= KEY_OP_CHECK; - int2store(log_pos+1, page_length); - int4store(log_pos+3, crc); - - log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].str= log_pos; - log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].length= 7; - changed_length+= 7; - translog_parts++; - } -#endif + _ma_log_key_changes(ma_page, log_array + TRANSLOG_INTERNAL_PARTS + + translog_parts, log_pos, &changed_length, + &translog_parts); + /* Remember new page length for future log entires for same page */ + ma_page->org_size= ma_page->size; DBUG_RETURN(translog_write_record(&lsn, LOGREC_REDO_INDEX, info->trn, info, @@ -407,7 +406,7 @@ my_bool _ma_log_prefix(MARIA_PAGE *ma_page, uint changed_length, my_bool _ma_log_suffix(MARIA_PAGE *ma_page, uint org_length, uint new_length) { LSN lsn; - LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 3]; + LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 4]; uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 10 + 7 + 2], *log_pos; uchar *buff= ma_page->buff; int diff; @@ -417,6 +416,8 @@ my_bool _ma_log_suffix(MARIA_PAGE *ma_page, uint org_length, uint new_length) DBUG_ENTER("_ma_log_suffix"); DBUG_PRINT("enter", ("page: %lu org_length: %u new_length: %u", (ulong) ma_page->pos, org_length, new_length)); + DBUG_ASSERT(ma_page->size == new_length); + DBUG_ASSERT(ma_page->org_size == org_length); page= ma_page->pos / info->s->block_size; @@ -451,20 +452,11 @@ my_bool _ma_log_suffix(MARIA_PAGE *ma_page, uint org_length, uint new_length) log_array[TRANSLOG_INTERNAL_PARTS + 0].length= (uint) (log_pos - log_data); -#ifdef EXTRA_DEBUG_KEY_CHANGES - { - ha_checksum crc; - crc= my_checksum(0, buff + LSN_STORE_SIZE, new_length - LSN_STORE_SIZE); - log_pos[0]= KEY_OP_CHECK; - int2store(log_pos+1, new_length); - int4store(log_pos+3, crc); - - log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].str= log_pos; - log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].length= 7; - extra_length+= 7; - translog_parts++; - } -#endif + _ma_log_key_changes(ma_page, + log_array + TRANSLOG_INTERNAL_PARTS + translog_parts, + log_pos, &extra_length, &translog_parts); + /* Remember new page length for future log entires for same page */ + ma_page->org_size= ma_page->size; DBUG_RETURN(translog_write_record(&lsn, LOGREC_REDO_INDEX, info->trn, info, @@ -481,37 +473,45 @@ my_bool _ma_log_suffix(MARIA_PAGE *ma_page, uint org_length, uint new_length) @param ma_page Changed page @param org_page_length Length of data in page before key was added + Final length in ma_page->size @note If handle_overflow is set, then we have to protect against logging changes that is outside of the page. This may happen during underflow() handling where the buffer in memory temporary contains more data than block_size + + ma_page may be a page that was previously logged and cuted down + becasue it's too big. (org_page_length > ma_page->org_size) */ my_bool _ma_log_add(MARIA_PAGE *ma_page, - uint org_page_length, uchar *key_pos, - uint changed_length, int move_length, + uint org_page_length __attribute__ ((unused)), + uchar *key_pos, uint changed_length, int move_length, my_bool handle_overflow __attribute__ ((unused))) { LSN lsn; uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 2 + 3 + 3 + 3 + 3 + 7 + - 2]; + 3 + 2]; uchar *log_pos; uchar *buff= ma_page->buff; - LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 3]; + LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 6]; MARIA_HA *info= ma_page->info; uint offset= (uint) (key_pos - buff); - uint page_length= info->s->block_size - KEYPAGE_CHECKSUM_SIZE; - uint translog_parts; + uint max_page_size= info->s->max_index_block_size; + uint translog_parts, current_size; pgcache_page_no_t page_pos; DBUG_ENTER("_ma_log_add"); DBUG_PRINT("enter", ("page: %lu org_page_length: %u changed_length: %u " "move_length: %d", - (ulong) ma_page->pos, org_page_length, changed_length, + (ulong) (ma_page->pos / info->s->block_size), + org_page_length, changed_length, move_length)); DBUG_ASSERT(info->s->now_transactional); DBUG_ASSERT(move_length <= (int) changed_length); + DBUG_ASSERT(ma_page->org_size == min(org_page_length, max_page_size)); + DBUG_ASSERT(ma_page->size == org_page_length + move_length); + DBUG_ASSERT(offset < max_page_size); /* Write REDO entry that contains the logical operations we need @@ -520,6 +520,7 @@ my_bool _ma_log_add(MARIA_PAGE *ma_page, log_pos= log_data + FILEID_STORE_SIZE; page_pos= ma_page->pos / info->s->block_size; page_store(log_pos, page_pos); + current_size= ma_page->org_size; log_pos+= PAGE_STORE_SIZE; #ifdef EXTRA_DEBUG_KEY_CHANGES @@ -531,40 +532,41 @@ my_bool _ma_log_add(MARIA_PAGE *ma_page, *log_pos++= KEY_OP_SET_PAGEFLAG; *log_pos++= buff[KEYPAGE_TRANSFLAG_OFFSET]; - if (org_page_length + move_length > page_length) + /* + Don't overwrite page boundary + It's ok to cut this as we will append the data at end of page + in the next log entry + */ + if (offset + changed_length > max_page_size) + { + DBUG_ASSERT(handle_overflow); + changed_length= max_page_size - offset; /* Update to end of page */ + move_length= 0; /* Nothing to move */ + /* Extend the page to max length on recovery */ + *log_pos++= KEY_OP_MAX_PAGELENGTH; + current_size= max_page_size; + } + + /* Check if adding the key made the page overflow */ + if (current_size + move_length > max_page_size) { /* - Overflow. Cut either key or data from page end so that key fits - The code that splits the too big page will ignore logging any - data over org_page_length + Adding the key caused an overflow. Cut away the part of the + page that doesn't fit. */ + uint diff; DBUG_ASSERT(handle_overflow); - if (offset + changed_length > page_length) - { - /* Log that data changed to end of page */ - changed_length= page_length - offset; - move_length= 0; - /* Set page to max length */ - org_page_length= page_length; - *log_pos++= KEY_OP_MAX_PAGELENGTH; - } - else - { - /* They key will not be part of the page ; Don't log it */ - uint diff= org_page_length + move_length - page_length; - log_pos[0]= KEY_OP_DEL_SUFFIX; - int2store(log_pos+1, diff); - log_pos+= 3; - org_page_length-= diff; - DBUG_ASSERT(org_page_length == page_length - move_length); - } - DBUG_ASSERT(offset != org_page_length); + diff= current_size + move_length - max_page_size; + log_pos[0]= KEY_OP_DEL_SUFFIX; + int2store(log_pos+1, diff); + log_pos+= 3; + current_size= max_page_size - move_length; } - if (offset == org_page_length) + if (offset == current_size) { - DBUG_ASSERT(move_length == (int) changed_length); log_pos[0]= KEY_OP_ADD_SUFFIX; + current_size+= changed_length; } else { @@ -576,51 +578,104 @@ my_bool _ma_log_add(MARIA_PAGE *ma_page, log_pos[0]= KEY_OP_SHIFT; int2store(log_pos+1, move_length); log_pos+= 3; + current_size+= move_length; } log_pos[0]= KEY_OP_CHANGE; } int2store(log_pos+1, changed_length); log_pos+= 3; - translog_parts= 2; log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data; log_array[TRANSLOG_INTERNAL_PARTS + 0].length= (uint) (log_pos - log_data); log_array[TRANSLOG_INTERNAL_PARTS + 1].str= key_pos; log_array[TRANSLOG_INTERNAL_PARTS + 1].length= changed_length; + translog_parts= TRANSLOG_INTERNAL_PARTS + 2; -#ifdef EXTRA_DEBUG_KEY_CHANGES + /* + If page was originally > block_size before operation and now all data + fits, append the end data that was not part of the previous logged + page to it. + */ + DBUG_ASSERT(current_size <= max_page_size && current_size <= ma_page->size); + if (current_size != ma_page->size && current_size != max_page_size) { - MARIA_SHARE *share= info->s; - ha_checksum crc; - uint save_page_length= ma_page->size; - uint new_length= org_page_length + move_length; - _ma_store_page_used(share, buff, new_length); - crc= my_checksum(0, buff + LSN_STORE_SIZE, new_length - LSN_STORE_SIZE); - log_pos[0]= KEY_OP_CHECK; - int2store(log_pos+1, new_length); - int4store(log_pos+3, crc); - - log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].str= log_pos; - log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].length= 7; - changed_length+= 7; - translog_parts++; - _ma_store_page_used(share, buff, save_page_length); + uint length= min(ma_page->size, max_page_size) - current_size; + uchar *data= ma_page->buff + current_size; + + log_pos[0]= KEY_OP_ADD_SUFFIX; + int2store(log_pos+1, length); + log_array[translog_parts].str= log_pos; + log_array[translog_parts].length= 3; + log_array[translog_parts+1].str= data; + log_array[translog_parts+1].length= length; + log_pos+= 3; + translog_parts+= 2; + current_size+= length; + changed_length+= length + 3; } -#endif + + _ma_log_key_changes(ma_page, log_array + translog_parts, + log_pos, &changed_length, &translog_parts); + /* + Remember new page length for future log entries for same page + Note that this can be different from ma_page->size in case of page + overflow! + */ + ma_page->org_size= current_size; + DBUG_ASSERT(ma_page->org_size == min(ma_page->size, max_page_size)); if (translog_write_record(&lsn, LOGREC_REDO_INDEX, info->trn, info, (translog_size_t) log_array[TRANSLOG_INTERNAL_PARTS + 0].length + - changed_length, - TRANSLOG_INTERNAL_PARTS + translog_parts, + changed_length, translog_parts, log_array, log_data, NULL)) DBUG_RETURN(-1); DBUG_RETURN(0); } +#ifdef EXTRA_DEBUG_KEY_CHANGES + +/* Log checksum and optionally key page to log */ + +void _ma_log_key_changes(MARIA_PAGE *ma_page, LEX_CUSTRING *log_array, + uchar *log_pos, uint *changed_length, + uint *translog_parts) +{ + MARIA_SHARE *share= ma_page->info->s; + int page_length= min(ma_page->size, share->max_index_block_size); + uint org_length; + ha_checksum crc; + + DBUG_ASSERT(ma_page->flag == (uint) ma_page->buff[KEYPAGE_TRANSFLAG_OFFSET]); + + /* We have to change length as the page may have been shortened */ + org_length= _ma_get_page_used(share, ma_page->buff); + _ma_store_page_used(share, ma_page->buff, page_length); + crc= my_checksum(0, ma_page->buff + LSN_STORE_SIZE, + page_length - LSN_STORE_SIZE); + _ma_store_page_used(share, ma_page->buff, org_length); + + log_pos[0]= KEY_OP_CHECK; + int2store(log_pos+1, page_length); + int4store(log_pos+3, crc); + + log_array[0].str= log_pos; + log_array[0].length= 7; + (*changed_length)+= 7; + (*translog_parts)++; +#ifdef EXTRA_STORE_FULL_PAGE_IN_KEY_CHANGES + log_array[1].str= ma_page->buff; + log_array[1].length= page_length; + (*changed_length)+= page_length; + (*translog_parts)++; +#endif /* EXTRA_STORE_FULL_PAGE_IN_KEY_CHANGES */ +} + +#endif /* EXTRA_DEBUG_KEY_CHANGES */ + /**************************************************************************** Redo of key pages ****************************************************************************/ @@ -716,7 +771,7 @@ uint _ma_apply_redo_index_new_page(MARIA_HA *info, LSN lsn, bzero(buff, LSN_STORE_SIZE); memcpy(buff + LSN_STORE_SIZE, header, length); bzero(buff + LSN_STORE_SIZE + length, - share->block_size - LSN_STORE_SIZE - KEYPAGE_CHECKSUM_SIZE - length); + share->max_index_block_size - LSN_STORE_SIZE - length); bfill(buff + share->block_size - KEYPAGE_CHECKSUM_SIZE, KEYPAGE_CHECKSUM_SIZE, (uchar) 255); @@ -847,7 +902,9 @@ err: KEY_OP_ADD_SUFFIX 2 length, data Add data to end of page KEY_OP_DEL_SUFFIX 2 length Reduce page length with this Sets position to start of page - KEY_OP_CHECK 6 page_length[2},CRC Used only when debugging + KEY_OP_CHECK 6 page_length[2],CRC Used only when debugging + This may be followed by page_length + of data (until end of log record) KEY_OP_COMPACT_PAGE 6 transid KEY_OP_SET_PAGEFLAG 1 flag for page KEY_OP_MAX_PAGELENGTH 0 Set page to max length @@ -870,7 +927,7 @@ uint _ma_apply_redo_index(MARIA_HA *info, const uchar *header_end= header + head_length; uint page_offset= 0, org_page_length; uint nod_flag, page_length, keypage_header, keynr; - uint max_page_length= share->block_size - KEYPAGE_CHECKSUM_SIZE; + uint max_page_size= share->max_index_block_size; int result; MARIA_PAGE page; DBUG_ENTER("_ma_apply_redo_index"); @@ -919,11 +976,14 @@ uint _ma_apply_redo_index(MARIA_HA *info, header+= 2; DBUG_PRINT("redo", ("key_op_shift: %d", length)); DBUG_ASSERT(page_offset != 0 && page_offset <= page_length && - page_length + length <= max_page_length); + page_length + length <= max_page_size); if (length < 0) + { + DBUG_ASSERT(page_offset - length <= page_length); bmove(buff + page_offset, buff + page_offset - length, page_length - page_offset + length); + } else if (page_length != page_offset) bmove_upp(buff + page_length + length, buff + page_length, page_length - page_offset); @@ -937,6 +997,7 @@ uint _ma_apply_redo_index(MARIA_HA *info, DBUG_ASSERT(page_offset != 0 && page_offset + length <= page_length); memcpy(buff + page_offset, header + 2 , length); + page_offset+= length; /* Put offset after changed length */ header+= 2 + length; break; } @@ -948,7 +1009,7 @@ uint _ma_apply_redo_index(MARIA_HA *info, insert_length, changed_length)); DBUG_ASSERT(insert_length <= changed_length && - page_length + changed_length <= max_page_length); + page_length + changed_length <= max_page_size); bmove_upp(buff + page_length + insert_length, buff + page_length, page_length - keypage_header); @@ -974,8 +1035,8 @@ uint _ma_apply_redo_index(MARIA_HA *info, case KEY_OP_ADD_SUFFIX: /* 6 */ { uint insert_length= uint2korr(header); - DBUG_PRINT("redo", ("key_op_add_prefix: %u", insert_length)); - DBUG_ASSERT(page_length + insert_length <= max_page_length); + DBUG_PRINT("redo", ("key_op_add_suffix: %u", insert_length)); + DBUG_ASSERT(page_length + insert_length <= max_page_size); memcpy(buff + page_length, header+2, insert_length); page_length+= insert_length; @@ -1003,13 +1064,22 @@ uint _ma_apply_redo_index(MARIA_HA *info, if (crc != (uint32) my_checksum(0, buff + LSN_STORE_SIZE, page_length - LSN_STORE_SIZE)) { - DBUG_PRINT("error", ("page_length %u",page_length)); - DBUG_DUMP("KEY_OP_CHECK bad page", buff, max_page_length); - DBUG_ASSERT("crc" == "failure in REDO_INDEX"); + DBUG_DUMP("KEY_OP_CHECK bad page", buff, page_length); + if (header + 6 + page_length <= header_end) + { + DBUG_DUMP("KEY_OP_CHECK org page", header + 6, page_length); + } + DBUG_ASSERT("crc failure in REDO_INDEX" == 0); } #endif DBUG_PRINT("redo", ("key_op_check")); - header+= 6; + /* + This is the last entry in the block and it can contain page_length + data or not + */ + DBUG_ASSERT(header + 6 == header_end || + header + 6 + page_length == header_end); + header= header_end; break; } case KEY_OP_DEBUG: @@ -1018,7 +1088,7 @@ uint _ma_apply_redo_index(MARIA_HA *info, break; case KEY_OP_MAX_PAGELENGTH: DBUG_PRINT("redo", ("key_op_max_page_length")); - page_length= max_page_length; + page_length= max_page_size; break; case KEY_OP_MULTI_COPY: /* 9 */ { @@ -1040,7 +1110,7 @@ uint _ma_apply_redo_index(MARIA_HA *info, log_memcpy_length= uint2korr(header); header+= 2; log_memcpy_end= header + log_memcpy_length; - DBUG_ASSERT(full_length <= max_page_length); + DBUG_ASSERT(full_length <= max_page_size); while (header < log_memcpy_end) { uint to, from; @@ -1049,7 +1119,7 @@ uint _ma_apply_redo_index(MARIA_HA *info, from= uint2korr(header); header+= 2; /* "from" is a place in the existing page */ - DBUG_ASSERT(max(from, to) < max_page_length); + DBUG_ASSERT(max(from, to) < max_page_size); memcpy(buff + to, buff + from, full_length); } break; diff --git a/storage/maria/ma_key_recover.h b/storage/maria/ma_key_recover.h index 30830f7d82c..3fdb045ee40 100644 --- a/storage/maria/ma_key_recover.h +++ b/storage/maria/ma_key_recover.h @@ -64,17 +64,26 @@ extern my_bool write_hook_for_undo_key_delete(enum translog_record_type type, TRN *trn, MARIA_HA *tbl_info, LSN *lsn, void *hook_arg); -my_bool _ma_log_prefix(MARIA_PAGE *page, uint changed_length, int move_length); +my_bool _ma_log_prefix(MARIA_PAGE *page, uint changed_length, int move_length, + enum en_key_debug debug_marker); my_bool _ma_log_suffix(MARIA_PAGE *page, uint org_length, uint new_length); my_bool _ma_log_add(MARIA_PAGE *page, uint buff_length, uchar *key_pos, uint changed_length, int move_length, my_bool handle_overflow); my_bool _ma_log_delete(MARIA_PAGE *page, const uchar *key_pos, - uint changed_length, uint move_length); + uint changed_length, uint move_length, + uint append_length, enum en_key_debug debug_marker); my_bool _ma_log_change(MARIA_PAGE *page, const uchar *key_pos, uint length, enum en_key_debug debug_marker); my_bool _ma_log_new(MARIA_PAGE *page, my_bool root_page); +#ifdef EXTRA_DEBUG_KEY_CHANGES +void _ma_log_key_changes(MARIA_PAGE *ma_page, LEX_CUSTRING *log_array, + uchar *log_pos, uint *changed_length, + uint *translog_parts); +#else +#define _ma_log_key_changes(A,B,C,D,E) +#endif uint _ma_apply_redo_index_new_page(MARIA_HA *info, LSN lsn, const uchar *header, uint length); diff --git a/storage/maria/ma_loghandler.h b/storage/maria/ma_loghandler.h index 5ef98244b76..7bb38e628da 100644 --- a/storage/maria/ma_loghandler.h +++ b/storage/maria/ma_loghandler.h @@ -172,13 +172,24 @@ enum en_key_op enum en_key_debug { - KEY_OP_DEBUG_RTREE_COMBINE, - KEY_OP_DEBUG_RTREE_SPLIT, - KEY_OP_DEBUG_RTREE_SET_KEY, - KEY_OP_DEBUG_FATHER_CHANGED_1, - KEY_OP_DEBUG_FATHER_CHANGED_2, - KEY_OP_DEBUG_LOG_SPLIT, - KEY_OP_DEBUG_LOG_ADD + KEY_OP_DEBUG_RTREE_COMBINE, /* 0 */ + KEY_OP_DEBUG_RTREE_SPLIT, /* 1 */ + KEY_OP_DEBUG_RTREE_SET_KEY, /* 2 */ + KEY_OP_DEBUG_FATHER_CHANGED_1, /* 3 */ + KEY_OP_DEBUG_FATHER_CHANGED_2, /* 4 */ + KEY_OP_DEBUG_LOG_SPLIT, /* 5 */ + KEY_OP_DEBUG_LOG_ADD, /* 6 */ + KEY_OP_DEBUG_LOG_PREFIX_1, /* 7 */ + KEY_OP_DEBUG_LOG_PREFIX_2, /* 8 */ + KEY_OP_DEBUG_LOG_PREFIX_3, /* 9 */ + KEY_OP_DEBUG_LOG_PREFIX_4, /* 10 */ + KEY_OP_DEBUG_LOG_PREFIX_5, /* 11 */ + KEY_OP_DEBUG_LOG_DEL_CHANGE_1, /* 12 */ + KEY_OP_DEBUG_LOG_DEL_CHANGE_2, /* 13 */ + KEY_OP_DEBUG_LOG_DEL_CHANGE_3, /* 14 */ + KEY_OP_DEBUG_LOG_DEL_CHANGE_RT, /* 15 */ + KEY_OP_DEBUG_LOG_DEL_PREFIX, /* 16 */ + KEY_OP_DEBUG_LOG_MIDDLE /* 17 */ }; diff --git a/storage/maria/ma_open.c b/storage/maria/ma_open.c index a43513cfaa0..39c13e27e15 100644 --- a/storage/maria/ma_open.c +++ b/storage/maria/ma_open.c @@ -550,6 +550,7 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) strmov(share->open_file_name.str, name); share->block_size= share->base.block_size; /* Convenience */ + share->max_index_block_size= share->block_size - KEYPAGE_CHECKSUM_SIZE; { HA_KEYSEG *pos=share->keyparts; uint32 ftkey_nr= 1; diff --git a/storage/maria/ma_page.c b/storage/maria/ma_page.c index acbee2a6f07..a4423133270 100644 --- a/storage/maria/ma_page.c +++ b/storage/maria/ma_page.c @@ -59,6 +59,7 @@ void _ma_page_setup(MARIA_PAGE *page, MARIA_HA *info, page->buff= buff; page->pos= pos; page->size= _ma_get_page_used(share, buff); + page->org_size= page->size; page->flag= _ma_get_keypage_flag(share, buff); page->node= ((page->flag & KEYPAGE_FLAG_ISNOD) ? share->base.key_reflength : 0); @@ -68,7 +69,7 @@ void _ma_page_setup(MARIA_PAGE *page, MARIA_HA *info, void page_cleanup(MARIA_SHARE *share, MARIA_PAGE *page) { uint length= page->size; - DBUG_ASSERT(length <= block_size - KEYPAGE_CHECKSUM_SIZE); + DBUG_ASSERT(length <= share->max_index_block_size); bzero(page->buff + length, share->block_size - length); } #endif @@ -103,7 +104,7 @@ my_bool _ma_fetch_keypage(MARIA_PAGE *page, MARIA_HA *info, MARIA_SHARE *share= info->s; uint block_size= share->block_size; DBUG_ENTER("_ma_fetch_keypage"); - DBUG_PRINT("enter",("pos: %ld", (long) pos)); + DBUG_PRINT("enter",("page: %lu", (ulong) (pos / block_size))); tmp= pagecache_read(share->pagecache, &share->kfile, (pgcache_page_no_t) (pos / block_size), level, buff, @@ -142,6 +143,7 @@ my_bool _ma_fetch_keypage(MARIA_PAGE *page, MARIA_HA *info, page->buff= tmp; page->pos= pos; page->size= _ma_get_page_used(share, tmp); + page->org_size= page->size; /* For debugging */ page->flag= _ma_get_keypage_flag(share, tmp); page->node= ((page->flag & KEYPAGE_FLAG_ISNOD) ? share->base.key_reflength : 0); @@ -149,7 +151,7 @@ my_bool _ma_fetch_keypage(MARIA_PAGE *page, MARIA_HA *info, #ifdef EXTRA_DEBUG { uint page_size= page->size; - if (page_size < 4 || page_size > block_size || + if (page_size < 4 || page_size > share->max_index_block_size || _ma_get_keynr(share, tmp) != keyinfo->key_nr) { DBUG_PRINT("error",("page %lu had wrong page length: %u keynr: %u", @@ -159,7 +161,7 @@ my_bool _ma_fetch_keypage(MARIA_PAGE *page, MARIA_HA *info, info->last_keypage = HA_OFFSET_ERROR; maria_print_error(share, HA_ERR_CRASHED); my_errno= HA_ERR_CRASHED; - tmp= 0; + DBUG_RETURN(1); } } #endif @@ -179,6 +181,13 @@ my_bool _ma_write_keypage(MARIA_PAGE *page, enum pagecache_page_lock lock, MARIA_PINNED_PAGE page_link; DBUG_ENTER("_ma_write_keypage"); + /* + The following ensures that for transactional tables we have logged + all changes that changes the page size (as the logging code sets + page->org_size) + */ + DBUG_ASSERT(!share->now_transactional || page->size == page->org_size); + #ifdef EXTRA_DEBUG /* Safety check */ { uint page_length, nod_flag; @@ -193,7 +202,7 @@ my_bool _ma_write_keypage(MARIA_PAGE *page, enum pagecache_page_lock lock, (page->pos & (maria_block_size-1))) { DBUG_PRINT("error",("Trying to write inside key status region: " - "key_start: %lu length: %lu page: %lu", + "key_start: %lu length: %lu page_pos: %lu", (long) share->base.keystart, (long) share->state.state.key_file_length, (long) page->pos)); @@ -201,7 +210,7 @@ my_bool _ma_write_keypage(MARIA_PAGE *page, enum pagecache_page_lock lock, DBUG_ASSERT(0); DBUG_RETURN(1); } - DBUG_PRINT("page",("write page at: %lu",(long) page->pos)); + DBUG_PRINT("page",("write page at: %lu",(ulong) (page->pos / block_size))); DBUG_DUMP("buff", buff, page_length); DBUG_ASSERT(page_length >= share->keypage_header + nod_flag + page->keyinfo->minlength || maria_in_recovery); @@ -274,7 +283,7 @@ int _ma_dispose(register MARIA_HA *info, my_off_t pos, my_bool page_not_read) enum pagecache_page_lock lock_method; enum pagecache_page_pin pin_method; DBUG_ENTER("_ma_dispose"); - DBUG_PRINT("enter",("pos: %ld", (long) pos)); + DBUG_PRINT("enter",("page: %lu", (ulong) (pos / block_size))); DBUG_ASSERT(pos % block_size == 0); (void) _ma_lock_key_del(info, 0); @@ -423,8 +432,7 @@ my_off_t _ma_new(register MARIA_HA *info, int level, share->key_del_current= mi_sizekorr(buff+share->keypage_header); #ifndef DBUG_OFF key_del_current= share->key_del_current; - DBUG_ASSERT(key_del_current != share->state.key_del && - (key_del_current != 0) && + DBUG_ASSERT((key_del_current != 0) && ((key_del_current == HA_OFFSET_ERROR) || (key_del_current <= (share->state.state.key_file_length - block_size)))); @@ -453,32 +461,48 @@ my_off_t _ma_new(register MARIA_HA *info, int level, Log compactation of a index page */ -static my_bool _ma_log_compact_keypage(MARIA_HA *info, my_off_t page, +static my_bool _ma_log_compact_keypage(MARIA_PAGE *ma_page, TrID min_read_from) { LSN lsn; - uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 1 + TRANSID_SIZE]; + uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 1 + 7 + TRANSID_SIZE]; + uchar *log_pos; LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 1]; + MARIA_HA *info= ma_page->info; MARIA_SHARE *share= info->s; + uint translog_parts, extra_length; + my_off_t page= ma_page->pos; DBUG_ENTER("_ma_log_compact_keypage"); - DBUG_PRINT("enter", ("page: %lu", (ulong) page)); + DBUG_PRINT("enter", ("page: %lu", (ulong) (page / share->block_size))); /* Store address of new root page */ page/= share->block_size; page_store(log_data + FILEID_STORE_SIZE, page); - log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE]= KEY_OP_COMPACT_PAGE; - transid_store(log_data + FILEID_STORE_SIZE + PAGE_STORE_SIZE +1, - min_read_from); + log_pos= log_data + FILEID_STORE_SIZE + PAGE_STORE_SIZE; + + log_pos[0]= KEY_OP_COMPACT_PAGE; + transid_store(log_pos + 1, min_read_from); + log_pos+= 1 + TRANSID_SIZE; log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data; - log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data); + log_array[TRANSLOG_INTERNAL_PARTS + 0].length= (uint) (log_pos - + log_data); + translog_parts= 1; + extra_length= 0; + + _ma_log_key_changes(ma_page, + log_array + TRANSLOG_INTERNAL_PARTS + translog_parts, + log_pos, &extra_length, &translog_parts); + /* Remember new page length for future log entires for same page */ + ma_page->org_size= ma_page->size; if (translog_write_record(&lsn, LOGREC_REDO_INDEX, info->trn, info, - (translog_size_t) sizeof(log_data), - TRANSLOG_INTERNAL_PARTS + 1, log_array, - log_data, NULL)) + log_array[TRANSLOG_INTERNAL_PARTS + + 0].length + extra_length, + TRANSLOG_INTERNAL_PARTS + translog_parts, + log_array, log_data, NULL)) DBUG_RETURN(1); DBUG_RETURN(0); } @@ -526,7 +550,7 @@ my_bool _ma_compact_keypage(MARIA_PAGE *ma_page, TrID min_read_from) { if (!(page= (*ma_page->keyinfo->skip_key)(&key, 0, 0, page))) { - DBUG_PRINT("error",("Couldn't find last key: page: 0x%lx", + DBUG_PRINT("error",("Couldn't find last key: page_pos: 0x%lx", (long) page)); maria_print_error(share, HA_ERR_CRASHED); my_errno=HA_ERR_CRASHED; @@ -588,7 +612,7 @@ my_bool _ma_compact_keypage(MARIA_PAGE *ma_page, TrID min_read_from) if (share->now_transactional) { - if (_ma_log_compact_keypage(info, ma_page->pos, min_read_from)) + if (_ma_log_compact_keypage(ma_page, min_read_from)) DBUG_RETURN(1); } DBUG_RETURN(0); diff --git a/storage/maria/ma_recovery.c b/storage/maria/ma_recovery.c index a10fac9a15d..b4fe96ae168 100644 --- a/storage/maria/ma_recovery.c +++ b/storage/maria/ma_recovery.c @@ -332,6 +332,8 @@ int maria_apply_log(LSN from_lsn, LSN end_lsn, if (end_lsn != LSN_IMPOSSIBLE) { abort_message_printed= 1; + if (!trace_file) + fputc('\n', stderr); my_message(HA_ERR_INITIALIZATION, "Maria recovery aborted as end_lsn/end of file was reached", MYF(0)); @@ -502,9 +504,13 @@ end: } if (error && !abort_message_printed) + { + if (!trace_file) + fputc('\n', stderr); my_message(HA_ERR_INITIALIZATION, "Maria recovery failed. Please run maria_chk -r on all maria " "tables and delete all maria_log.######## files", MYF(0)); + } procent_printed= 0; /* We don't cleanly close tables if we hit some error (may corrupt them by @@ -545,7 +551,7 @@ static int display_and_apply_record(const LOG_DESC *log_desc, if (log_desc->record_execute_in_redo_phase == NULL) { /* die on all not-yet-handled records :) */ - DBUG_ASSERT("one more hook" == "to write"); + DBUG_ASSERT("one more hook to write" == 0); return 1; } if ((error= (*log_desc->record_execute_in_redo_phase)(rec))) @@ -638,6 +644,20 @@ prototype_redo_exec_hook(INCOMPLETE_LOG) /* no such table, don't need to warn */ return 0; } + + if (maria_is_crashed(info)) + return 0; + + if (info->s->state.is_of_horizon > rec->lsn) + { + /* + This table was repaired at a time after this log entry. + We can assume that all rows was inserted sucessfully and we don't + have to warn about that the inserted data was not logged + */ + return 0; + } + /* Example of what can go wrong when replaying DDLs: CREATE TABLE t (logged); INSERT INTO t VALUES(1) (logged); @@ -658,10 +678,15 @@ prototype_redo_exec_hook(INCOMPLETE_LOG) there was a crash during a DDL (see comment in execution of REDO_RENAME_TABLE ). */ - tprint(tracef, "***WARNING: MySQL server currently logs no records" - " about insertion of data by ALTER TABLE and CREATE SELECT," - " as they are not necessary for recovery;" - " present applying of log records may well not work.***\n"); + + eprint(tracef, "***WARNING: Aria engine currently logs no records " + "about insertion of data by ALTER TABLE and CREATE SELECT, " + "as they are not necessary for recovery; " + "present applying of log records to table '%s' may well not work." + "***", info->s->index_file_name.str); + + /* Prevent using the table for anything else than undo repair */ + _ma_mark_file_crashed(info->s); recovery_warnings++; return 0; } @@ -1052,7 +1077,11 @@ prototype_redo_exec_hook(REDO_REPAIR_TABLE) } if ((info= get_MARIA_HA_from_REDO_record(rec)) == NULL) DBUG_RETURN(0); - + if (maria_is_crashed(info)) + { + tprint(tracef, "we skip repairing crashed table\n"); + DBUG_RETURN(0); + } /* Otherwise, the mapping is newer than the table, and our record is newer than the mapping, so we can repair. @@ -1063,6 +1092,7 @@ prototype_redo_exec_hook(REDO_REPAIR_TABLE) param.isam_file_name= name= info->s->open_file_name.str; param.testflag= uint8korr(rec->header + FILEID_STORE_SIZE); param.tmpdir= maria_tmpdir; + param.max_trid= max_long_trid; DBUG_ASSERT(maria_tmpdir); info->s->state.key_map= uint8korr(rec->header + FILEID_STORE_SIZE + 8); @@ -1366,7 +1396,8 @@ prototype_redo_exec_hook(REDO_INSERT_ROW_HEAD) int error= 1; uchar *buff= NULL; MARIA_HA *info= get_MARIA_HA_from_REDO_record(rec); - if (info == NULL) + if (info == NULL || maria_is_crashed(info)) + { /* Table was skipped at open time (because later dropped/renamed, not @@ -1432,7 +1463,7 @@ prototype_redo_exec_hook(REDO_INSERT_ROW_TAIL) int error= 1; uchar *buff; MARIA_HA *info= get_MARIA_HA_from_REDO_record(rec); - if (info == NULL) + if (info == NULL || maria_is_crashed(info)) return 0; enlarge_buffer(rec); if (log_record_buffer.str == NULL || @@ -1473,7 +1504,7 @@ prototype_redo_exec_hook(REDO_INSERT_ROW_BLOBS) pgcache_page_no_t first_page, last_page; char llbuf1[22], llbuf2[22]; MARIA_HA *info= get_MARIA_HA_from_REDO_record(rec); - if (info == NULL) + if (info == NULL || maria_is_crashed(info)) return 0; enlarge_buffer(rec); if (log_record_buffer.str == NULL || @@ -1507,7 +1538,7 @@ prototype_redo_exec_hook(REDO_PURGE_ROW_HEAD) { int error= 1; MARIA_HA *info= get_MARIA_HA_from_REDO_record(rec); - if (info == NULL) + if (info == NULL || maria_is_crashed(info)) return 0; if (_ma_apply_redo_purge_row_head_or_tail(info, current_group_end_lsn, HEAD_PAGE, @@ -1523,7 +1554,7 @@ prototype_redo_exec_hook(REDO_PURGE_ROW_TAIL) { int error= 1; MARIA_HA *info= get_MARIA_HA_from_REDO_record(rec); - if (info == NULL) + if (info == NULL || maria_is_crashed(info)) return 0; if (_ma_apply_redo_purge_row_head_or_tail(info, current_group_end_lsn, TAIL_PAGE, @@ -1540,7 +1571,7 @@ prototype_redo_exec_hook(REDO_FREE_BLOCKS) int error= 1; uchar *buff; MARIA_HA *info= get_MARIA_HA_from_REDO_record(rec); - if (info == NULL) + if (info == NULL || maria_is_crashed(info)) return 0; enlarge_buffer(rec); @@ -1567,7 +1598,7 @@ prototype_redo_exec_hook(REDO_FREE_HEAD_OR_TAIL) { int error= 1; MARIA_HA *info= get_MARIA_HA_from_REDO_record(rec); - if (info == NULL) + if (info == NULL || maria_is_crashed(info)) return 0; if (_ma_apply_redo_free_head_or_tail(info, current_group_end_lsn, @@ -1599,7 +1630,7 @@ prototype_redo_exec_hook(REDO_INDEX) { int error= 1; MARIA_HA *info= get_MARIA_HA_from_REDO_record(rec); - if (info == NULL) + if (info == NULL || maria_is_crashed(info)) return 0; enlarge_buffer(rec); @@ -1625,7 +1656,7 @@ prototype_redo_exec_hook(REDO_INDEX_NEW_PAGE) { int error= 1; MARIA_HA *info= get_MARIA_HA_from_REDO_record(rec); - if (info == NULL) + if (info == NULL || maria_is_crashed(info)) return 0; enlarge_buffer(rec); @@ -1652,7 +1683,7 @@ prototype_redo_exec_hook(REDO_INDEX_FREE_PAGE) { int error= 1; MARIA_HA *info= get_MARIA_HA_from_REDO_record(rec); - if (info == NULL) + if (info == NULL || maria_is_crashed(info)) return 0; if (_ma_apply_redo_index_free_page(info, current_group_end_lsn, @@ -1668,7 +1699,7 @@ prototype_redo_exec_hook(REDO_BITMAP_NEW_PAGE) { int error= 1; MARIA_HA *info= get_MARIA_HA_from_REDO_record(rec); - if (info == NULL) + if (info == NULL || maria_is_crashed(info)) return 0; enlarge_buffer(rec); @@ -2114,7 +2145,7 @@ prototype_undo_exec_hook(UNDO_ROW_INSERT) MARIA_SHARE *share; const uchar *record_ptr; - if (info == NULL) + if (info == NULL || maria_is_crashed(info)) { /* Unlike for REDOs, if the table was skipped it is abnormal; we have a @@ -2170,7 +2201,7 @@ prototype_undo_exec_hook(UNDO_ROW_DELETE) LSN previous_undo_lsn= lsn_korr(rec->header); MARIA_SHARE *share; - if (info == NULL) + if (info == NULL || maria_is_crashed(info)) { skip_undo_record(previous_undo_lsn, trn); return 0; @@ -2209,7 +2240,7 @@ prototype_undo_exec_hook(UNDO_ROW_UPDATE) LSN previous_undo_lsn= lsn_korr(rec->header); MARIA_SHARE *share; - if (info == NULL) + if (info == NULL || maria_is_crashed(info)) { skip_undo_record(previous_undo_lsn, trn); return 0; @@ -2248,7 +2279,7 @@ prototype_undo_exec_hook(UNDO_KEY_INSERT) LSN previous_undo_lsn= lsn_korr(rec->header); MARIA_SHARE *share; - if (info == NULL) + if (info == NULL || maria_is_crashed(info)) { skip_undo_record(previous_undo_lsn, trn); return 0; @@ -2289,7 +2320,7 @@ prototype_undo_exec_hook(UNDO_KEY_DELETE) LSN previous_undo_lsn= lsn_korr(rec->header); MARIA_SHARE *share; - if (info == NULL) + if (info == NULL || maria_is_crashed(info)) { skip_undo_record(previous_undo_lsn, trn); return 0; @@ -2330,7 +2361,7 @@ prototype_undo_exec_hook(UNDO_KEY_DELETE_WITH_ROOT) LSN previous_undo_lsn= lsn_korr(rec->header); MARIA_SHARE *share; - if (info == NULL) + if (info == NULL || maria_is_crashed(info)) { skip_undo_record(previous_undo_lsn, trn); return 0; @@ -2371,6 +2402,7 @@ prototype_undo_exec_hook(UNDO_BULK_INSERT) LSN previous_undo_lsn= lsn_korr(rec->header); MARIA_SHARE *share; + /* Here we don't check for crashed as we can undo the bulk insert */ if (info == NULL) { skip_undo_record(previous_undo_lsn, trn); diff --git a/storage/maria/ma_rt_key.c b/storage/maria/ma_rt_key.c index bc2c8f71f5d..2e204990a3b 100644 --- a/storage/maria/ma_rt_key.c +++ b/storage/maria/ma_rt_key.c @@ -91,7 +91,8 @@ int maria_rtree_delete_key(MARIA_PAGE *page, uchar *key, uint key_length) page->size-= key_length_with_nod_flag; page_store_size(share, page); if (share->now_transactional && - _ma_log_delete(page, key_start, 0, key_length_with_nod_flag)) + _ma_log_delete(page, key_start, 0, key_length_with_nod_flag, + 0, KEY_OP_DEBUG_LOG_DEL_CHANGE_RT)) return -1; return 0; } diff --git a/storage/maria/ma_rt_split.c b/storage/maria/ma_rt_split.c index 8f137c2e0cf..856edc60490 100644 --- a/storage/maria/ma_rt_split.c +++ b/storage/maria/ma_rt_split.c @@ -308,7 +308,7 @@ static my_bool _ma_log_rt_split(MARIA_PAGE *page, LSN lsn; uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 1 + 2 + 1 + 2 + 2 + 7], *log_pos; - LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 5]; + LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 6]; uint translog_parts, extra_length= 0; my_off_t page_pos; DBUG_ENTER("_ma_log_rt_split"); @@ -344,24 +344,11 @@ static my_bool _ma_log_rt_split(MARIA_PAGE *page, translog_parts+= 2; } -#ifdef EXTRA_DEBUG_KEY_CHANGES - { - int page_length= page->size; - ha_checksum crc; - uchar *check_start= log_pos; - crc= my_checksum(0, page->buff + LSN_STORE_SIZE, - page_length - LSN_STORE_SIZE); - log_pos[0]= KEY_OP_CHECK; - log_pos++; - int2store(log_pos, page_length); - log_pos+= 2; - int4store(log_pos, crc); - log_pos+= 4; - log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].str= check_start; - log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].length= 7; - translog_parts++; - } -#endif + _ma_log_key_changes(page, + log_array + TRANSLOG_INTERNAL_PARTS + translog_parts, + log_pos, &extra_length, &translog_parts); + /* Remember new page length for future log entires for same page */ + page->org_size= page->size; if (translog_write_record(&lsn, LOGREC_REDO_INDEX, info->trn, info, diff --git a/storage/maria/ma_unique.c b/storage/maria/ma_unique.c index bae58fd70cd..a90578c2162 100644 --- a/storage/maria/ma_unique.c +++ b/storage/maria/ma_unique.c @@ -68,8 +68,7 @@ my_bool _ma_check_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, uchar *record, DBUG_ASSERT(info->last_key.data_length == MARIA_UNIQUE_HASH_LENGTH); if (_ma_search_next(info, &info->last_key, SEARCH_BIGGER, info->s->state.key_root[def->key]) || - bcmp((char*) info->last_key.data, (char*) key_buff, - MARIA_UNIQUE_HASH_LENGTH)) + bcmp(info->last_key.data, key_buff, MARIA_UNIQUE_HASH_LENGTH)) { info->page_changed= 1; /* Can't optimize read next */ info->cur_row.lastpos= lastpos; diff --git a/storage/maria/ma_write.c b/storage/maria/ma_write.c index 6a5c2995ae5..c6718299d17 100644 --- a/storage/maria/ma_write.c +++ b/storage/maria/ma_write.c @@ -613,7 +613,7 @@ static int w_search(register MARIA_HA *info, uint32 comp_flag, MARIA_KEY *key, MARIA_KEYDEF *keyinfo= key->keyinfo; MARIA_PAGE page; DBUG_ENTER("w_search"); - DBUG_PRINT("enter",("page: %ld", (long) page_pos)); + DBUG_PRINT("enter", ("page: %lu", (ulong) (page_pos/keyinfo->block_length))); if (!(temp_buff= (uchar*) my_alloca((uint) keyinfo->block_length+ MARIA_MAX_KEY_BUFF*2))) @@ -823,9 +823,9 @@ int _ma_insert(register MARIA_HA *info, MARIA_KEY *key, Check if the new key fits totally into the the page (anc_buff is big enough to contain a full page + one key) */ - if (a_length <= (uint) keyinfo->block_length - KEYPAGE_CHECKSUM_SIZE) + if (a_length <= share->max_index_block_size) { - if (keyinfo->block_length - KEYPAGE_CHECKSUM_SIZE - a_length < 32 && + if (share->max_index_block_size - a_length < 32 && (keyinfo->flag & HA_FULLTEXT) && key_pos == endpos && share->base.key_reflength <= share->base.rec_reflength && share->options & (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)) @@ -885,9 +885,9 @@ ChangeSet@1.2562, 2008-04-09 07:41:40+02:00, serg@janus.mylan +9 -0 } else { - if (share->now_transactional && + if (share->now_transactional && _ma_log_add(anc_page, org_anc_length, - key_pos, s_temp.changed_length, t_length, 0)) + key_pos, s_temp.changed_length, t_length, 1)) DBUG_RETURN(-1); } DBUG_RETURN(0); /* There is room on page */ @@ -1265,7 +1265,7 @@ static int _ma_balance_page(MARIA_HA *info, MARIA_KEYDEF *keyinfo, curr_keylength); if ((right ? right_length : left_length) + curr_keylength <= - (uint) keyinfo->block_length - KEYPAGE_CHECKSUM_SIZE) + share->max_index_block_size) { /* Enough space to hold all keys in the two buffers ; Balance bufferts */ new_left_length= share->keypage_header+nod_flag+(keys/2)*curr_keylength; @@ -1320,7 +1320,8 @@ static int _ma_balance_page(MARIA_HA *info, MARIA_KEYDEF *keyinfo, start of page */ if (_ma_log_prefix(&next_page, 0, - ((int) new_right_length - (int) right_length))) + ((int) new_right_length - (int) right_length), + KEY_OP_DEBUG_LOG_PREFIX_3)) goto err; } else @@ -1383,7 +1384,8 @@ static int _ma_balance_page(MARIA_HA *info, MARIA_KEYDEF *keyinfo, */ if (_ma_log_prefix(&next_page, (uint) (new_right_length - right_length), - (int) (new_right_length - right_length))) + (int) (new_right_length - right_length), + KEY_OP_DEBUG_LOG_PREFIX_4)) goto err; } else @@ -1545,7 +1547,8 @@ static int _ma_balance_page(MARIA_HA *info, MARIA_KEYDEF *keyinfo, This contains the last 'extra_buff' from 'buff' */ if (_ma_log_prefix(&extra_page, - 0, (int) (extra_buff_length - right_length))) + 0, (int) (extra_buff_length - right_length), + KEY_OP_DEBUG_LOG_PREFIX_5)) goto err; /* @@ -1891,6 +1894,9 @@ my_bool _ma_log_new(MARIA_PAGE *ma_page, my_bool root_page) log_array[TRANSLOG_INTERNAL_PARTS + 1].str= ma_page->buff + LSN_STORE_SIZE; log_array[TRANSLOG_INTERNAL_PARTS + 1].length= page_length; + /* Remember new page length for future log entires for same page */ + ma_page->org_size= ma_page->size; + if (translog_write_record(&lsn, LOGREC_REDO_INDEX_NEW_PAGE, info->trn, info, (translog_size_t) @@ -1912,7 +1918,7 @@ my_bool _ma_log_change(MARIA_PAGE *ma_page, const uchar *key_pos, uint length, { LSN lsn; uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 2 + 6 + 7], *log_pos; - LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 3]; + LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 4]; uint offset= (uint) (key_pos - ma_page->buff), translog_parts; my_off_t page; MARIA_HA *info= ma_page->info; @@ -1921,6 +1927,7 @@ my_bool _ma_log_change(MARIA_PAGE *ma_page, const uchar *key_pos, uint length, DBUG_ASSERT(info->s->now_transactional); DBUG_ASSERT(offset + length <= ma_page->size); + DBUG_ASSERT(ma_page->org_size == ma_page->size); /* Store address of new root page */ page= ma_page->pos / info->s->block_size; @@ -1944,21 +1951,9 @@ my_bool _ma_log_change(MARIA_PAGE *ma_page, const uchar *key_pos, uint length, log_array[TRANSLOG_INTERNAL_PARTS + 1].length= length; translog_parts= 2; -#ifdef EXTRA_DEBUG_KEY_CHANGES - { - int page_length= ma_page->size; - ha_checksum crc; - crc= my_checksum(0, ma_page->buff + LSN_STORE_SIZE, - page_length - LSN_STORE_SIZE); - log_pos[0]= KEY_OP_CHECK; - int2store(log_pos+1, page_length); - int4store(log_pos+3, crc); - log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].str= log_pos; - log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].length= 7; - log_pos+= 7; - translog_parts++; - } -#endif + _ma_log_key_changes(ma_page, + log_array + TRANSLOG_INTERNAL_PARTS + translog_parts, + log_pos, &length, &translog_parts); if (translog_write_record(&lsn, LOGREC_REDO_INDEX, info->trn, info, @@ -1994,8 +1989,6 @@ my_bool _ma_log_change(MARIA_PAGE *ma_page, const uchar *key_pos, uint length, - Page is shortened from end - Data is added to end of page - Data added at front of page - - */ static my_bool _ma_log_split(MARIA_PAGE *ma_page, @@ -2006,9 +1999,9 @@ static my_bool _ma_log_split(MARIA_PAGE *ma_page, uint changed_length) { LSN lsn; - uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 2 + 3+3+3+3+3+2 +7]; + uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 2 + 2 + 3+3+3+3+3+2 +7]; uchar *log_pos; - LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 4]; + LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 6]; uint offset= (uint) (key_pos - ma_page->buff); uint translog_parts, extra_length; MARIA_HA *info= ma_page->info; @@ -2018,6 +2011,7 @@ static my_bool _ma_log_split(MARIA_PAGE *ma_page, (ulong) ma_page->pos, org_length, new_length)); DBUG_ASSERT(changed_length >= data_length); + DBUG_ASSERT(org_length <= info->s->max_index_block_size); log_pos= log_data + FILEID_STORE_SIZE; page= ma_page->pos / info->s->block_size; @@ -2029,6 +2023,10 @@ static my_bool _ma_log_split(MARIA_PAGE *ma_page, (*log_pos++)= KEY_OP_DEBUG_LOG_SPLIT; #endif + /* Store keypage_flag */ + *log_pos++= KEY_OP_SET_PAGEFLAG; + *log_pos++= ma_page->buff[KEYPAGE_TRANSFLAG_OFFSET]; + if (new_length <= offset || !key_pos) { /* @@ -2053,6 +2051,11 @@ static my_bool _ma_log_split(MARIA_PAGE *ma_page, */ max_key_length= new_length - offset; extra_length= min(key_length, max_key_length); + if (offset + move_length > new_length) + { + /* This is true when move_length includes changes for next packed key */ + move_length= new_length - offset; + } if ((int) new_length < (int) (org_length + move_length + data_length)) { @@ -2115,21 +2118,12 @@ static my_bool _ma_log_split(MARIA_PAGE *ma_page, log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data; log_array[TRANSLOG_INTERNAL_PARTS + 0].length= (uint) (log_pos - log_data); -#ifdef EXTRA_DEBUG_KEY_CHANGES - { - int page_length= ma_page->size; - ha_checksum crc; - crc= my_checksum(0, ma_page->buff + LSN_STORE_SIZE, - page_length - LSN_STORE_SIZE); - log_pos[0]= KEY_OP_CHECK; - int2store(log_pos+1, page_length); - int4store(log_pos+3, crc); - log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].str= log_pos; - log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].length= 7; - extra_length+= 7; - translog_parts++; - } -#endif + + _ma_log_key_changes(ma_page, + log_array + TRANSLOG_INTERNAL_PARTS + translog_parts, + log_pos, &extra_length, &translog_parts); + /* Remember new page length for future log entires for same page */ + ma_page->org_size= ma_page->size; DBUG_RETURN(translog_write_record(&lsn, LOGREC_REDO_INDEX, info->trn, info, @@ -2168,8 +2162,9 @@ static my_bool _ma_log_del_prefix(MARIA_PAGE *ma_page, int move_length) { LSN lsn; - uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 12 + 7], *log_pos; - LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 3]; + uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 2 + 2 + 12 + 7]; + uchar *log_pos; + LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 4]; uint offset= (uint) (key_pos - ma_page->buff); uint diff_length= org_length + move_length - new_length; uint translog_parts, extra_length; @@ -2180,6 +2175,7 @@ static my_bool _ma_log_del_prefix(MARIA_PAGE *ma_page, (ulong) ma_page->pos, org_length, new_length)); DBUG_ASSERT((int) diff_length > 0); + DBUG_ASSERT(ma_page->size == new_length); log_pos= log_data + FILEID_STORE_SIZE; page= ma_page->pos / info->s->block_size; @@ -2189,6 +2185,15 @@ static my_bool _ma_log_del_prefix(MARIA_PAGE *ma_page, translog_parts= 1; extra_length= 0; +#ifdef EXTRA_DEBUG_KEY_CHANGES + *log_pos++= KEY_OP_DEBUG; + *log_pos++= KEY_OP_DEBUG_LOG_DEL_PREFIX; +#endif + + /* Store keypage_flag */ + *log_pos++= KEY_OP_SET_PAGEFLAG; + *log_pos++= ma_page->buff[KEYPAGE_TRANSFLAG_OFFSET]; + if (offset < diff_length + info->s->keypage_header) { /* @@ -2236,21 +2241,11 @@ static my_bool _ma_log_del_prefix(MARIA_PAGE *ma_page, log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data; log_array[TRANSLOG_INTERNAL_PARTS + 0].length= (uint) (log_pos - log_data); -#ifdef EXTRA_DEBUG_KEY_CHANGES - { - int page_length= ma_page->size; - ha_checksum crc; - crc= my_checksum(0, ma_page->buff + LSN_STORE_SIZE, - page_length - LSN_STORE_SIZE); - log_pos[0]= KEY_OP_CHECK; - int2store(log_pos+1, page_length); - int4store(log_pos+3, crc); - log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].str= log_pos; - log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].length= 7; - extra_length+= 7; - translog_parts++; - } -#endif + _ma_log_key_changes(ma_page, + log_array + TRANSLOG_INTERNAL_PARTS + translog_parts, + log_pos, &extra_length, &translog_parts); + /* Remember new page length for future log entires for same page */ + ma_page->org_size= ma_page->size; DBUG_RETURN(translog_write_record(&lsn, LOGREC_REDO_INDEX, info->trn, info, @@ -2277,9 +2272,9 @@ static my_bool _ma_log_key_middle(MARIA_PAGE *ma_page, uint key_length, int move_length) { LSN lsn; - uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 3+5+3+3+3 + 7]; + uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 2 + 2 + 3+5+3+3+3 + 7]; uchar *log_pos; - LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 5]; + LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 6]; uint key_offset; uint translog_parts, extra_length; my_off_t page; @@ -2287,6 +2282,8 @@ static my_bool _ma_log_key_middle(MARIA_PAGE *ma_page, DBUG_ENTER("_ma_log_key_middle"); DBUG_PRINT("enter", ("page: %lu", (ulong) ma_page->pos)); + DBUG_ASSERT(ma_page->size == new_length); + /* new place of key after changes */ key_pos+= data_added_first; key_offset= (uint) (key_pos - ma_page->buff); @@ -2314,6 +2311,15 @@ static my_bool _ma_log_key_middle(MARIA_PAGE *ma_page, page_store(log_pos, page); log_pos+= PAGE_STORE_SIZE; +#ifdef EXTRA_DEBUG_KEY_CHANGES + *log_pos++= KEY_OP_DEBUG; + *log_pos++= KEY_OP_DEBUG_LOG_MIDDLE; +#endif + + /* Store keypage_flag */ + *log_pos++= KEY_OP_SET_PAGEFLAG; + *log_pos++= ma_page->buff[KEYPAGE_TRANSFLAG_OFFSET]; + log_pos[0]= KEY_OP_DEL_SUFFIX; int2store(log_pos+1, data_deleted_last); log_pos+= 3; @@ -2362,21 +2368,11 @@ static my_bool _ma_log_key_middle(MARIA_PAGE *ma_page, key_length); } -#ifdef EXTRA_DEBUG_KEY_CHANGES - { - int page_length= ma_page->size; - ha_checksum crc; - crc= my_checksum(0, ma_page->buff + LSN_STORE_SIZE, - page_length - LSN_STORE_SIZE); - log_pos[0]= KEY_OP_CHECK; - int2store(log_pos+1, page_length); - int4store(log_pos+3, crc); - log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].str= log_pos; - log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].length= 7; - extra_length+= 7; - translog_parts++; - } -#endif + _ma_log_key_changes(ma_page, + log_array + TRANSLOG_INTERNAL_PARTS + translog_parts, + log_pos, &extra_length, &translog_parts); + /* Remember new page length for future log entires for same page */ + ma_page->org_size= ma_page->size; DBUG_RETURN(translog_write_record(&lsn, LOGREC_REDO_INDEX, info->trn, info, @@ -2401,14 +2397,17 @@ static my_bool _ma_log_middle(MARIA_PAGE *ma_page, uint data_deleted_last) { LSN lsn; - LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 2]; - uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 3 + 5], *log_pos; + LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 4]; + uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 3 + 5 + 7], *log_pos; MARIA_HA *info= ma_page->info; my_off_t page; uint translog_parts, extra_length; DBUG_ENTER("_ma_log_middle"); DBUG_PRINT("enter", ("page: %lu", (ulong) page)); + DBUG_ASSERT(ma_page->org_size + data_added_first - data_deleted_last == + ma_page->size); + page= ma_page->page / info->s->block_size; log_pos= log_data + FILEID_STORE_SIZE; @@ -2434,21 +2433,11 @@ static my_bool _ma_log_middle(MARIA_PAGE *ma_page, translog_parts= 2; extra_length= data_changed_first; -#ifdef EXTRA_DEBUG_KEY_CHANGES - { - int page_length= ma_page->size; - ha_checksum crc; - crc= my_checksum(0, ma_page->buff + LSN_STORE_SIZE, - page_length - LSN_STORE_SIZE); - log_pos[0]= KEY_OP_CHECK; - int2store(log_pos+1, page_length); - int4store(log_pos+3, crc); - log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].str= log_pos; - log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].length= 7; - extra_length+= 7; - translog_parts++; - } -#endif + _ma_log_key_changes(ma_page, + log_array + TRANSLOG_INTERNAL_PARTS + translog_parts, + log_pos, &extra_length, &translog_parts); + /* Remember new page length for future log entires for same page */ + ma_page->org_size= ma_page->size; DBUG_RETURN(translog_write_record(&lsn, LOGREC_REDO_INDEX, info->trn, info, diff --git a/storage/maria/maria_def.h b/storage/maria/maria_def.h index f1454502bb1..6be34aae98c 100644 --- a/storage/maria/maria_def.h +++ b/storage/maria/maria_def.h @@ -39,6 +39,7 @@ #define SANITY_CHECKS 1 #ifdef EXTRA_DEBUG #define EXTRA_DEBUG_KEY_CHANGES +#define EXTRA_STORE_FULL_PAGE_IN_KEY_CHANGES #endif #define MAX_NONMAPPED_INSERTS 1000 @@ -243,7 +244,8 @@ typedef struct st_maria_file_bitmap uchar *map; pgcache_page_no_t page; /* Page number for current bitmap */ uint used_size; /* Size of bitmap head that is not 0 */ - my_bool changed; /* 1 if page needs to be flushed */ + my_bool changed; /* 1 if page needs to be written */ + my_bool changed_not_flushed; /* 1 if some bitmap is not flushed */ my_bool flush_all_requested; /**< If _ma_bitmap_flush_all waiting */ uint non_flushable; /**< 0 if bitmap and log are in sync */ PAGECACHE_FILE file; /* datafile where bitmap is stored */ @@ -361,6 +363,7 @@ typedef struct st_maria_share uint in_trans; /* Number of references by trn */ uint w_locks, r_locks, tot_locks; /* Number of read/write locks */ uint block_size; /* block_size of keyfile & data file*/ + uint max_index_block_size; /* block_size - end_of_page_info */ /* Fixed length part of a packed row in BLOCK_RECORD format */ uint base_length; myf write_flag; @@ -833,6 +836,7 @@ typedef struct st_maria_page uchar *buff; /* Data for page */ my_off_t pos; /* Disk address to page */ uint size; /* Size of data on page */ + uint org_size; /* Size of page at read or after log */ uint node; /* 0 or share->base.key_reflength */ uint flag; /* Page flag */ uint link_offset; diff --git a/storage/maria/trnman.c b/storage/maria/trnman.c index 0dbc8ed7fb3..a87b2f0e55b 100644 --- a/storage/maria/trnman.c +++ b/storage/maria/trnman.c @@ -363,6 +363,7 @@ TRN *trnman_new_trn(WT_THD *wt) trn->used_tables= 0; trn->locked_tables= 0; + trn->flags= 0; /* only after the following function TRN is considered initialized, diff --git a/storage/myisam/mi_dynrec.c b/storage/myisam/mi_dynrec.c index dae8fbe50a8..f08e9f32a3f 100644 --- a/storage/myisam/mi_dynrec.c +++ b/storage/myisam/mi_dynrec.c @@ -66,9 +66,12 @@ static int _mi_cmp_buffer(File file, const uchar *buff, my_off_t filepos, my_bool mi_dynmap_file(MI_INFO *info, my_off_t size) { DBUG_ENTER("mi_dynmap_file"); - if (size > (my_off_t) (~((size_t) 0))) + if (size == 0 || size > (my_off_t) (~((size_t) 0))) { - DBUG_PRINT("warning", ("File is too large for mmap")); + if (size) + DBUG_PRINT("warning", ("File is too large for mmap")); + else + DBUG_PRINT("warning", ("Do not mmap zero-length")); DBUG_RETURN(1); } /* diff --git a/storage/myisam/mi_locking.c b/storage/myisam/mi_locking.c index 860c9c57889..741b9b01986 100644 --- a/storage/myisam/mi_locking.c +++ b/storage/myisam/mi_locking.c @@ -29,7 +29,6 @@ int mi_lock_database(MI_INFO *info, int lock_type) int error; uint count; MYISAM_SHARE *share=info->s; - uint flag; DBUG_ENTER("mi_lock_database"); DBUG_PRINT("enter",("lock_type: %d old lock %d r_locks: %u w_locks: %u " "global_changed: %d open_count: %u name: '%s'", @@ -48,7 +47,7 @@ int mi_lock_database(MI_INFO *info, int lock_type) DBUG_RETURN(0); } - flag=error=0; + error= 0; pthread_mutex_lock(&share->intern_lock); if (share->kfile >= 0) /* May only be false on windows */ { @@ -128,14 +127,12 @@ int mi_lock_database(MI_INFO *info, int lock_type) { if (share->r_locks) { /* Only read locks left */ - flag=1; if (my_lock(share->kfile,F_RDLCK,0L,F_TO_EOF, MYF(MY_WME | MY_SEEK_NOT_DONE)) && !error) error=my_errno; } else if (!share->w_locks) { /* No more locks */ - flag=1; if (my_lock(share->kfile,F_UNLCK,0L,F_TO_EOF, MYF(MY_WME | MY_SEEK_NOT_DONE)) && !error) error=my_errno; @@ -156,7 +153,6 @@ int mi_lock_database(MI_INFO *info, int lock_type) */ if (share->w_locks == 1) { - flag=1; if (my_lock(share->kfile,lock_type,0L,F_TO_EOF, MYF(MY_SEEK_NOT_DONE))) { @@ -171,7 +167,6 @@ int mi_lock_database(MI_INFO *info, int lock_type) } if (!share->r_locks && !share->w_locks) { - flag=1; if (my_lock(share->kfile,lock_type,0L,F_TO_EOF, info->lock_wait | MY_SEEK_NOT_DONE)) { @@ -196,7 +191,6 @@ int mi_lock_database(MI_INFO *info, int lock_type) { /* Change READONLY to RW */ if (share->r_locks == 1) { - flag=1; if (my_lock(share->kfile,lock_type,0L,F_TO_EOF, MYF(info->lock_wait | MY_SEEK_NOT_DONE))) { @@ -213,7 +207,6 @@ int mi_lock_database(MI_INFO *info, int lock_type) { if (!share->w_locks) { - flag=1; if (my_lock(share->kfile,lock_type,0L,F_TO_EOF, info->lock_wait | MY_SEEK_NOT_DONE)) { @@ -260,11 +253,6 @@ int mi_lock_database(MI_INFO *info, int lock_type) } #endif pthread_mutex_unlock(&share->intern_lock); -#if defined(FULL_LOG) || defined(_lint) - lock_type|=(int) (flag << 8); /* Set bit to set if real lock */ - myisam_log_command(MI_LOG_LOCK,info,(uchar*) &lock_type,sizeof(lock_type), - error); -#endif DBUG_RETURN(error); } /* mi_lock_database */ diff --git a/storage/myisam/mi_open.c b/storage/myisam/mi_open.c index 7436548c7e1..e4240dec90c 100644 --- a/storage/myisam/mi_open.c +++ b/storage/myisam/mi_open.c @@ -139,8 +139,7 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags) my_errno= HA_ERR_NOT_A_TABLE; goto err; } - if (memcmp((uchar*) share->state.header.file_version, - (uchar*) myisam_file_magic, 4)) + if (bcmp(share->state.header.file_version, myisam_file_magic, 4)) { DBUG_PRINT("error",("Wrong header in %s",name_buff)); DBUG_DUMP("error_dump", share->state.header.file_version, diff --git a/storage/myisam/mi_page.c b/storage/myisam/mi_page.c index 17a5820d768..82acb801c90 100644 --- a/storage/myisam/mi_page.c +++ b/storage/myisam/mi_page.c @@ -49,7 +49,7 @@ uchar *_mi_fetch_keypage(register MI_INFO *info, MI_KEYDEF *keyinfo, { DBUG_PRINT("error",("page %lu had wrong page length: %u", (ulong) page, page_size)); - DBUG_DUMP("page",tmp, keyinfo->block_length); + DBUG_DUMP("page", tmp, keyinfo->block_length); info->last_keypage = HA_OFFSET_ERROR; mi_print_error(info->s, HA_ERR_CRASHED); my_errno = HA_ERR_CRASHED; diff --git a/storage/myisam/mi_search.c b/storage/myisam/mi_search.c index 03e0c72d7e0..52e28f8a5e9 100644 --- a/storage/myisam/mi_search.c +++ b/storage/myisam/mi_search.c @@ -819,7 +819,7 @@ uint _mi_get_pack_key(register MI_KEYDEF *keyinfo, uint nod_flag, DBUG_PRINT("error", ("Found too long null packed key: %u of %u at 0x%lx", length, keyseg->length, (long) *page_pos)); - DBUG_DUMP("key",*page_pos,16); + DBUG_DUMP("key", *page_pos, 16); mi_print_error(keyinfo->share, HA_ERR_CRASHED); my_errno=HA_ERR_CRASHED; return 0; @@ -876,7 +876,7 @@ uint _mi_get_pack_key(register MI_KEYDEF *keyinfo, uint nod_flag, { DBUG_PRINT("error",("Found too long packed key: %u of %u at 0x%lx", length, keyseg->length, (long) *page_pos)); - DBUG_DUMP("key",*page_pos,16); + DBUG_DUMP("key", *page_pos, 16); mi_print_error(keyinfo->share, HA_ERR_CRASHED); my_errno=HA_ERR_CRASHED; return 0; /* Error */ @@ -948,7 +948,7 @@ uint _mi_get_binary_pack_key(register MI_KEYDEF *keyinfo, uint nod_flag, DBUG_PRINT("error", ("Found too long binary packed key: %u of %u at 0x%lx", length, keyinfo->maxlength, (long) *page_pos)); - DBUG_DUMP("key",*page_pos,16); + DBUG_DUMP("key", *page_pos, 16); mi_print_error(keyinfo->share, HA_ERR_CRASHED); my_errno=HA_ERR_CRASHED; DBUG_RETURN(0); /* Wrong key */ diff --git a/storage/myisam/mi_test2.c b/storage/myisam/mi_test2.c index 67da89e55ef..527c5e03a27 100644 --- a/storage/myisam/mi_test2.c +++ b/storage/myisam/mi_test2.c @@ -410,7 +410,7 @@ int main(int argc, char *argv[]) } ant=0; while (mi_rprev(file,read_record3,0) == 0 && - bcmp(read_record3+start,key,length) == 0) ant++; + memcmp(read_record3+start,key,length) == 0) ant++; if (ant != dupp_keys) { printf("prev: Found: %d records of %d\n",ant,dupp_keys); @@ -448,7 +448,7 @@ int main(int argc, char *argv[]) goto end; } if (mi_rlast(file,read_record2,0) || - bcmp(read_record2,read_record3,reclength)) + memcmp(read_record2,read_record3,reclength)) { printf("Can't find last record\n"); DBUG_DUMP("record2",(uchar*) read_record2,reclength); @@ -463,7 +463,7 @@ int main(int argc, char *argv[]) printf("prev: I found: %d records of %d\n",ant,write_count); goto end; } - if (bcmp(read_record,read_record3,reclength)) + if (memcmp(read_record,read_record3,reclength)) { printf("Can't find first record\n"); goto end; @@ -478,7 +478,7 @@ int main(int argc, char *argv[]) mi_rprev(file,read_record3,0) == 0 || mi_rnext(file,read_record3,0)) goto err; - if (bcmp(read_record,read_record3,reclength) != 0) + if (memcmp(read_record,read_record3,reclength) != 0) printf("Can't find first record\n"); if (!silent) @@ -490,7 +490,7 @@ int main(int argc, char *argv[]) mi_rnext(file,read_record3,0) == 0 || mi_rprev(file,read_record3,0)) goto err; - if (bcmp(read_record2,read_record3,reclength)) + if (memcmp(read_record2,read_record3,reclength)) printf("Can't find last record\n"); #ifdef NOT_ANYMORE if (!silent) @@ -504,7 +504,7 @@ int main(int argc, char *argv[]) bzero((char*) file->lastkey,file->s->base.max_key_length*2); if (mi_rkey(file,read_record,0,key2,(uint) i,HA_READ_PREFIX)) goto err; - if (bcmp(read_record+start,key,(uint) i)) + if (memcmp(read_record+start,key,(uint) i)) { puts("Didn't find right record"); goto end; @@ -523,7 +523,7 @@ int main(int argc, char *argv[]) opt_delete++; ant=1; while (mi_rnext(file,read_record3,0) == 0 && - bcmp(read_record3+start,key,length) == 0) ant++; + memcmp(read_record3+start,key,length) == 0) ant++; if (ant != dupp_keys-1) { printf("next: I can only find: %d keys of %d\n",ant,dupp_keys-1); @@ -541,7 +541,7 @@ int main(int argc, char *argv[]) opt_delete++; ant=1; while (mi_rprev(file,read_record3,0) == 0 && - bcmp(read_record3+start,key,length) == 0) ant++; + memcmp(read_record3+start,key,length) == 0) ant++; if (ant != dupp_keys-2) { printf("next: I can only find: %d keys of %d\n",ant,dupp_keys-2); @@ -561,7 +561,7 @@ int main(int argc, char *argv[]) if (mi_rnext(file,read_record,0)) goto err; /* Skall finnas poster */ while (mi_rnext(file,read_record3,0) == 0 && - bcmp(read_record3+start,key,length) == 0) ant++; + memcmp(read_record3+start,key,length) == 0) ant++; if (ant != dupp_keys-3) { printf("next: I can only find: %d keys of %d\n",ant,dupp_keys-3); @@ -576,7 +576,7 @@ int main(int argc, char *argv[]) opt_delete++; ant=0; while (mi_rprev(file,read_record3,0) == 0 && - bcmp(read_record3+start,key,length) == 0) ant++; + memcmp(read_record3+start,key,length) == 0) ant++; if (ant != dupp_keys-4) { printf("next: I can only find: %d keys of %d\n",ant,dupp_keys-4); @@ -599,7 +599,7 @@ int main(int argc, char *argv[]) for (i=min(2,keys) ; i-- > 0 ;) { if (mi_rsame(file,read_record2,(int) i)) goto err; - if (bcmp(read_record,read_record2,reclength) != 0) + if (memcmp(read_record,read_record2,reclength) != 0) { printf("mi_rsame didn't find same record\n"); goto end; diff --git a/storage/myisam/mi_unique.c b/storage/myisam/mi_unique.c index 02fcd9289dd..fdba84a2e67 100644 --- a/storage/myisam/mi_unique.c +++ b/storage/myisam/mi_unique.c @@ -56,7 +56,7 @@ my_bool mi_check_unique(MI_INFO *info, MI_UNIQUEDEF *def, uchar *record, if (_mi_search_next(info,info->s->keyinfo+def->key, info->lastkey, MI_UNIQUE_HASH_LENGTH, SEARCH_BIGGER, info->s->state.key_root[def->key]) || - bcmp((char*) info->lastkey, (char*) key_buff, MI_UNIQUE_HASH_LENGTH)) + memcmp(info->lastkey, key_buff, MI_UNIQUE_HASH_LENGTH)) { info->page_changed=1; /* Can't optimize read next */ info->lastpos=lastpos; diff --git a/storage/myisam/rt_split.c b/storage/myisam/rt_split.c index 88cf643faf9..03d22de68fa 100644 --- a/storage/myisam/rt_split.c +++ b/storage/myisam/rt_split.c @@ -255,7 +255,6 @@ int rtree_split_page(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *page, uchar *key, SplitStruct *stop; double *coord_buf; double *next_coord; - double *old_coord; int n_dim; uchar *source_cur, *cur1, *cur2; uchar *new_page= info->buff; @@ -293,8 +292,6 @@ int rtree_split_page(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *page, uchar *key, rtree_d_mbr(keyinfo->seg, key, key_length, cur->coords); cur->key = key; - old_coord = next_coord; - if (split_rtree_node(task, max_keys + 1, mi_getint(page) + full_length + 2, full_length, rt_PAGE_MIN_SIZE(keyinfo->block_length), diff --git a/storage/myisammrg/ha_myisammrg.cc b/storage/myisammrg/ha_myisammrg.cc index c6e5c24590a..be3f0ca4813 100644 --- a/storage/myisammrg/ha_myisammrg.cc +++ b/storage/myisammrg/ha_myisammrg.cc @@ -295,8 +295,8 @@ static int myisammrg_parent_open_callback(void *callback_param, } } - DBUG_PRINT("myrg", ("open: '%.*s'.'%.*s'", (int)(child_l->db_length), - child_l->db, (int)(child_l->table_name_length), + DBUG_PRINT("myrg", ("open: '%.*s'.'%.*s'", (int) child_l->db_length, + child_l->db, (int) child_l->table_name_length, child_l->table_name)); /* Convert to lowercase if required. */ diff --git a/storage/myisammrg/myrg_open.c b/storage/myisammrg/myrg_open.c index ea306c5ba9c..17c9b4ba4d1 100644 --- a/storage/myisammrg/myrg_open.c +++ b/storage/myisammrg/myrg_open.c @@ -227,9 +227,7 @@ MYRG_INFO *myrg_parent_open(const char *parent_name, int save_errno; int insert_method; uint length; - uint dir_length; uint child_count; - size_t name_buff_length; File fd; IO_CACHE file_cache; char parent_name_buff[FN_REFLEN * 2]; @@ -300,7 +298,6 @@ MYRG_INFO *myrg_parent_open(const char *parent_name, } /* Call callback for each child. */ - dir_length= dirname_part(parent_name_buff, parent_name, &name_buff_length); my_b_seek(&file_cache, 0); while ((length= my_b_gets(&file_cache, child_name_buff, FN_REFLEN - 1))) { @@ -380,7 +377,6 @@ int myrg_attach_children(MYRG_INFO *m_info, int handle_locking, { ulonglong file_offset; MI_INFO *myisam; - int rc; int errpos; int save_errno; uint idx; @@ -399,7 +395,6 @@ int myrg_attach_children(MYRG_INFO *m_info, int handle_locking, here and in ha_myisammrg::store_lock() forces consistent data. */ pthread_mutex_lock(&m_info->mutex); - rc= 1; errpos= 0; file_offset= 0; min_keys= 0; diff --git a/storage/ndb/src/common/portlib/NdbMutex.c b/storage/ndb/src/common/portlib/NdbMutex.c index c9184e5d1f2..5595baba7c4 100644 --- a/storage/ndb/src/common/portlib/NdbMutex.c +++ b/storage/ndb/src/common/portlib/NdbMutex.c @@ -24,36 +24,31 @@ NdbMutex* NdbMutex_Create(void) { NdbMutex* pNdbMutex; int result; - DBUG_ENTER("NdbMutex_Create"); - + pNdbMutex = (NdbMutex*)NdbMem_Allocate(sizeof(NdbMutex)); - DBUG_PRINT("info",("NdbMem_Allocate 0x%lx", (long) pNdbMutex)); - + if (pNdbMutex == NULL) - DBUG_RETURN(NULL); - + return NULL; + result = pthread_mutex_init(pNdbMutex, NULL); assert(result == 0); - - DBUG_RETURN(pNdbMutex); + + return pNdbMutex; } int NdbMutex_Destroy(NdbMutex* p_mutex) { int result; - DBUG_ENTER("NdbMutex_Destroy"); if (p_mutex == NULL) - DBUG_RETURN(-1); + return -1; result = pthread_mutex_destroy(p_mutex); - DBUG_PRINT("info",("NdbMem_Free 0x%lx", (long) p_mutex)); NdbMem_Free(p_mutex); - - DBUG_RETURN(result); + return result; } diff --git a/storage/ndb/src/ndbapi/DictCache.cpp b/storage/ndb/src/ndbapi/DictCache.cpp index 04be3711847..9c66b2be9d2 100644 --- a/storage/ndb/src/ndbapi/DictCache.cpp +++ b/storage/ndb/src/ndbapi/DictCache.cpp @@ -20,8 +20,10 @@ #include <NdbCondition.h> #include <NdbSleep.h> -static NdbTableImpl f_invalid_table; -static NdbTableImpl f_altered_table; +static NdbTableImpl * f_invalid_table = 0; +static NdbTableImpl * f_altered_table = 0; + +static int ndb_dict_cache_count = 0; Ndb_local_table_info * Ndb_local_table_info::create(NdbTableImpl *table_impl, Uint32 sz) @@ -93,11 +95,29 @@ GlobalDictCache::GlobalDictCache(){ DBUG_ENTER("GlobalDictCache::GlobalDictCache"); m_tableHash.createHashTable(); m_waitForTableCondition = NdbCondition_Create(); + if (f_invalid_table == NULL) + f_invalid_table = new NdbTableImpl(); + if (f_altered_table == NULL) + f_altered_table = new NdbTableImpl(); + ndb_dict_cache_count++; DBUG_VOID_RETURN; } GlobalDictCache::~GlobalDictCache(){ DBUG_ENTER("GlobalDictCache::~GlobalDictCache"); + if (--ndb_dict_cache_count == 0) + { + if (f_invalid_table) + { + delete f_invalid_table; + f_invalid_table = 0; + } + if (f_altered_table) + { + delete f_altered_table; + f_altered_table = 0; + } + } NdbElement_t<Vector<TableVersion> > * curr = m_tableHash.getNext(0); while(curr != 0){ Vector<TableVersion> * vers = curr->theData; @@ -254,7 +274,7 @@ GlobalDictCache::put(const char * name, NdbTableImpl * tab) TableVersion & ver = vers->back(); if(ver.m_status != RETREIVING || !(ver.m_impl == 0 || - ver.m_impl == &f_invalid_table || ver.m_impl == &f_altered_table) || + ver.m_impl == f_invalid_table || ver.m_impl == f_altered_table) || ver.m_version != 0 || ver.m_refCount == 0){ abort(); @@ -271,7 +291,7 @@ GlobalDictCache::put(const char * name, NdbTableImpl * tab) ver.m_version = tab->m_version; ver.m_status = OK; } - else if (ver.m_impl == &f_invalid_table) + else if (ver.m_impl == f_invalid_table) { DBUG_PRINT("info", ("Table DROPPED invalid")); ver.m_impl = tab; @@ -279,7 +299,7 @@ GlobalDictCache::put(const char * name, NdbTableImpl * tab) ver.m_status = DROPPED; ver.m_impl->m_status = NdbDictionary::Object::Invalid; } - else if(ver.m_impl == &f_altered_table) + else if(ver.m_impl == f_altered_table) { DBUG_PRINT("info", ("Table DROPPED altered")); ver.m_impl = tab; @@ -440,7 +460,7 @@ GlobalDictCache::alter_table_rep(const char * name, if(i == sz - 1 && ver.m_status == RETREIVING) { - ver.m_impl = altered ? &f_altered_table : &f_invalid_table; + ver.m_impl = altered ? f_altered_table : f_invalid_table; DBUG_VOID_RETURN; } } diff --git a/storage/pbxt/src/Makefile.am b/storage/pbxt/src/Makefile.am index fc4c4ef8f1e..20399aee484 100644 --- a/storage/pbxt/src/Makefile.am +++ b/storage/pbxt/src/Makefile.am @@ -40,8 +40,8 @@ libpbxt_la_LDFLAGS = -module # These are the warning Drizzle uses: # DRIZZLE_WARNINGS = -W -Wall -Wextra -pedantic -Wundef -Wredundant-decls -Wno-strict-aliasing -Wno-long-long -Wno-unused-parameter -libpbxt_la_CXXFLAGS = $(AM_CXXFLAGS) -DMYSQL_DYNAMIC_PLUGIN -libpbxt_la_CFLAGS = $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN -std=c99 +libpbxt_la_CXXFLAGS = -shared $(AM_CXXFLAGS) -DMYSQL_DYNAMIC_PLUGIN +libpbxt_la_CFLAGS = -shared $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN -std=c99 EXTRA_LIBRARIES = libpbxt.a noinst_LIBRARIES = libpbxt.a diff --git a/storage/xtradb/ChangeLog b/storage/xtradb/ChangeLog index bc69aaca96a..5ebcf1e87a2 100644 --- a/storage/xtradb/ChangeLog +++ b/storage/xtradb/ChangeLog @@ -1,3 +1,128 @@ +2010-06-24 The InnoDB Team + + * handler/ha_innodb.cc: + Fix Bug#54679 alter table causes compressed row_format to revert + to compact + +2010-06-22 The InnoDB Team + + * dict/dict0dict.c, dict/dict0mem.c, include/dict0mem.h, + include/univ.i, page/page0zip.c, row/row0merge.c: + Fix Bug#47991 InnoDB Dictionary Cache memory usage increases + indefinitely when renaming tables + +2010-06-22 The InnoDB Team + + * handler/ha_innodb.cc: + Fix Bug#54686: "field->col->mtype == type" assertion error at + row/row0sel.c + +2010-06-22 The InnoDB Team + + * handler/ha_innodb.cc, innodb_bug54044.result, innodb_bug54044.test: + Fix Bug#54044 Create temporary tables and using innodb crashes. + +2010-06-21 The InnoDB Team + + * dict/dict0load.c, fil/fil0fil.c: + Fix Bug#54658: InnoDB: Warning: allocated tablespace %lu, + old maximum was 0 (introduced in Bug #53578 fix) + +2010-06-16 The InnoDB Team + + * row/row0merge.c: + Fix Bug#54330 Broken fast index creation + +2010-06-10 The InnoDB Team + + * include/log0log.ic, row/row0ins.c, row/row0purge.c, + row/row0uins.c, row/row0umod.c, row/row0upd.c: + Fix Bug#39168 ERROR: the age of the last checkpoint ... exceeds + the log group capacity + +2010-06-08 The InnoDB Team + + * dict/dict0load.c: + Fix Bug#54009 Server crashes when data is selected from non backed + up table for InnoDB plugin + +2010-06-02 The InnoDB Team + + * include/db0err.h, include/lock0lock.h, include/row0mysql.h, + lock/lock0lock.c, row/row0ins.c, row/row0mysql.c, row/row0sel.c: + Fix Bug#53674 InnoDB: Error: unlock row could not find a + 4 mode lock on the record + +2010-06-01 The InnoDB Team + + * include/sync0rw.h, sync/sync0rw.c: + Fix Bug#48197 Concurrent rw_lock_free may cause assertion failure + +2010-06-01 The InnoDB Team + + * row/row0umod.c: + Fix Bug#53812 assert row/row0umod.c line 660 in txn rollback + after crash recovery + +2010-05-25 The InnoDB Team + + * handler/ha_innodb.cc, include/row0mysql.h, row/row0mysql.c: + Fix Bug#53592: crash replacing duplicates into table after fast + alter table added unique key + +2010-05-24 The InnoDB Team + + * dict/dict0boot.c, dict/dict0crea.c, fil/fil0fil.c, + include/dict0boot.h, include/fil0fil.h, row/row0mysql.c: + Fix Bug#53578: assert on invalid page access, in fil_io() + +2010-05-14 The InnoDB Team + * mysql-test/innodb_bug48024.test, mysql-test/innodb_bug48024.result, + dict/dict0dict.c, handler/ha_innodb.cc, handler/ha_innodb.h, + include/dict0dict.h, include/ha_prototypes.h, include/row0mysql.h, + include/trx0trx.h, row/row0mysql.c, trx/trx0i_s.c, trx/trx0trx.c: + Fix Bug#48024 Innodb doesn't work with multi-statements + Fix Bug#53644 InnoDB thinks that /*/ starts and ends a comment + +2010-05-12 The InnoDB Team + + * handler/handler0alter.cc: + Fix Bug#53591 crash with fast alter table and text/blob prefix + primary key + +2010-05-12 The InnoDB Team + + * row/row0merge.c: + Fix Bug#53471 row_merge_drop_temp_indexes() refers freed memory, SEGVs + +2010-05-11 The InnoDB Team + + * mysql-test/innodb_bug53290.test, mysql-test/innodb_bug53290.result, + include/rem0cmp.h, rem/rem0cmp.c, row/row0merge.c: + Fix Bug#53290 wrong duplicate key error when adding a unique index + via fast alter table + +2010-05-11 The InnoDB Team + * buf/buf0lru.c, include/buf0buf.ic: + Fix Bug#53307 valgrind: warnings in main.partition_innodb_plugin + +2010-05-05 The InnoDB Team + + * row/row0merge.c: + Fix Bug#53256 in a stress test, assert dict/dict0dict.c:815 + table2 == NULL + +2010-05-05 The InnoDB Team + + * handler/ha_innodb.cc: + Fix Bug#53165 Setting innodb_change_buffering=DEFAULT produces + incorrect result + +2010-05-04 The InnoDB Team + + * fsp/fsp0fsp.c: + Fix Bug#53306 valgrind: warnings in innodb.innodb + 2010-05-03 The InnoDB Team * buf0buf.c: @@ -48,12 +173,6 @@ Only check the record size at index creation time when innodb_strict_mode is set or when ROW_FORMAT is DYNAMIC or COMPRESSED. -2010-04-20 The InnoDB Team - - * btr/btr0btr.c, include/univ.i: - Implement UNIV_BTR_AVOID_COPY, for avoiding writes when a B-tree - node is split at the first or last record. - 2010-04-15 The InnoDB Team * trx/trx0rec.c: @@ -72,6 +191,10 @@ * mysql-test/innodb_bug38231.test: Remove non-determinism in the test case. +2010-03-29 The InnoDB Team + + InnoDB Plugin 1.0.7 released + 2010-03-18 The InnoDB Team * CMakeLists.txt: @@ -194,6 +317,14 @@ Fix Bug#49497 Error 1467 (ER_AUTOINC_READ_FAILED) on inserting a negative value +2010-01-28 The InnoDB Team + * handler/ha_innodb.h, handler/ha_innodb.cc, + handler/handler0alter.cc, + mysql-test/innodb_bug47622.test, + mysql-test/innodb_bug47622.result: + Fix Bug#47622 the new index is added before the existing ones + in MySQL, but after one in SE + 2010-01-27 The InnoDB Team * include/row0mysql.h, log/log0recv.c, row/row0mysql.c: diff --git a/storage/xtradb/Makefile.am b/storage/xtradb/Makefile.am index 172b9099568..3813b6602b8 100644 --- a/storage/xtradb/Makefile.am +++ b/storage/xtradb/Makefile.am @@ -333,8 +333,8 @@ EXTRA_LTLIBRARIES= ha_xtradb.la pkgplugin_LTLIBRARIES= @plugin_xtradb_shared_target@ ha_xtradb_la_LDFLAGS= -module -rpath $(pkgplugindir) -L$(top_builddir)/libservices -lmysqlservices -ha_xtradb_la_CXXFLAGS= $(AM_CFLAGS) $(INNODB_DYNAMIC_CFLAGS) -ha_xtradb_la_CFLAGS= $(AM_CFLAGS) $(INNODB_DYNAMIC_CFLAGS) +ha_xtradb_la_CXXFLAGS= -shared $(AM_CXXFLAGS) $(INNODB_DYNAMIC_CFLAGS) +ha_xtradb_la_CFLAGS= -shared $(AM_CFLAGS) $(INNODB_DYNAMIC_CFLAGS) ha_xtradb_la_SOURCES= $(libxtradb_a_SOURCES) EXTRA_DIST= CMakeLists.txt plug.in \ diff --git a/storage/xtradb/btr/btr0btr.c b/storage/xtradb/btr/btr0btr.c index 12e57dcc490..ff047095aa4 100644 --- a/storage/xtradb/btr/btr0btr.c +++ b/storage/xtradb/btr/btr0btr.c @@ -2030,6 +2030,7 @@ func_start: goto insert_empty; } } else if (UNIV_UNLIKELY(insert_left)) { + ut_a(n_iterations > 0); first_rec = page_rec_get_next(page_get_infimum_rec(page)); move_limit = page_rec_get_next(btr_cur_get_rec(cursor)); } else { @@ -2076,17 +2077,7 @@ insert_empty: } /* 5. Move then the records to the new page */ - if (direction == FSP_DOWN -#ifdef UNIV_BTR_AVOID_COPY - && page_rec_is_supremum(move_limit)) { - /* Instead of moving all records, make the new page - the empty page. */ - - left_block = block; - right_block = new_block; - } else if (direction == FSP_DOWN -#endif /* UNIV_BTR_AVOID_COPY */ - ) { + if (direction == FSP_DOWN) { /* fputs("Split left\n", stderr); */ if (0 @@ -2129,14 +2120,6 @@ insert_empty: right_block = block; lock_update_split_left(right_block, left_block); -#ifdef UNIV_BTR_AVOID_COPY - } else if (!split_rec) { - /* Instead of moving all records, make the new page - the empty page. */ - - left_block = new_block; - right_block = block; -#endif /* UNIV_BTR_AVOID_COPY */ } else { /* fputs("Split right\n", stderr); */ diff --git a/storage/xtradb/btr/btr0cur.c b/storage/xtradb/btr/btr0cur.c index 0e75b4e8442..9b87d969a64 100644 --- a/storage/xtradb/btr/btr0cur.c +++ b/storage/xtradb/btr/btr0cur.c @@ -2136,9 +2136,8 @@ any_extern: err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info, thr, mtr, &roll_ptr); if (err != DB_SUCCESS) { -err_exit: - mem_heap_free(heap); - return(err); + + goto err_exit; } /* Ok, we may do the replacement. Store on the page infimum the @@ -2184,9 +2183,10 @@ err_exit: page_cur_move_to_next(page_cursor); + err = DB_SUCCESS; +err_exit: mem_heap_free(heap); - - return(DB_SUCCESS); + return(err); } /*************************************************************//** @@ -4259,6 +4259,8 @@ btr_store_big_rec_extern_fields( field_ref += local_len; } extern_len = big_rec_vec->fields[i].len; + UNIV_MEM_ASSERT_RW(big_rec_vec->fields[i].data, + extern_len); ut_a(extern_len > 0); @@ -4895,6 +4897,7 @@ btr_copy_blob_prefix( mtr_commit(&mtr); if (page_no == FIL_NULL || copy_len != part_len) { + UNIV_MEM_ASSERT_RW(buf, copied_len); return(copied_len); } @@ -5078,6 +5081,7 @@ btr_copy_externally_stored_field_prefix_low( space_id, page_no, offset); inflateEnd(&d_stream); mem_heap_free(heap); + UNIV_MEM_ASSERT_RW(buf, d_stream.total_out); return(d_stream.total_out); } else { return(btr_copy_blob_prefix(buf, len, space_id, diff --git a/storage/xtradb/btr/btr0sea.c b/storage/xtradb/btr/btr0sea.c index 61909903a67..36dadd47e69 100644 --- a/storage/xtradb/btr/btr0sea.c +++ b/storage/xtradb/btr/btr0sea.c @@ -182,6 +182,7 @@ void btr_search_sys_free(void) /*=====================*/ { + rw_lock_free(&btr_search_latch); mem_free(btr_search_latch_temp); btr_search_latch_temp = NULL; mem_heap_free(btr_search_sys->hash_index->heap); diff --git a/storage/xtradb/buf/buf0buddy.c b/storage/xtradb/buf/buf0buddy.c index e4a79026d3a..e6b80bcda55 100644 --- a/storage/xtradb/buf/buf0buddy.c +++ b/storage/xtradb/buf/buf0buddy.c @@ -452,6 +452,8 @@ buf_buddy_relocate( buf_page_t* bpage; const ulint size = BUF_BUDDY_LOW << i; ullint usec = ut_time_us(NULL); + ulint space; + ulint page_no; //ut_ad(buf_pool_mutex_own()); ut_ad(mutex_own(&zip_free_mutex)); @@ -490,11 +492,15 @@ buf_buddy_relocate( pool), so there is nothing wrong about this. The mach_read_from_4() calls here will only trigger bogus Valgrind memcheck warnings in UNIV_DEBUG_VALGRIND builds. */ - bpage = buf_page_hash_get( - mach_read_from_4((const byte*) src - + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID), - mach_read_from_4((const byte*) src - + FIL_PAGE_OFFSET)); + space = mach_read_from_4( + (const byte*) src + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); + page_no = mach_read_from_4( + (const byte*) src + FIL_PAGE_OFFSET); + /* Suppress Valgrind warnings about conditional jump + on uninitialized value. */ + UNIV_MEM_VALID(&space, sizeof space); + UNIV_MEM_VALID(&page_no, sizeof page_no); + bpage = buf_page_hash_get(space, page_no); if (!bpage || bpage->zip.data != src) { /* The block has probably been freshly diff --git a/storage/xtradb/buf/buf0buf.c b/storage/xtradb/buf/buf0buf.c index a5a9e1d9004..94a67c1759c 100644 --- a/storage/xtradb/buf/buf0buf.c +++ b/storage/xtradb/buf/buf0buf.c @@ -53,6 +53,10 @@ Created 11/5/1995 Heikki Tuuri #include "page0zip.h" #include "trx0trx.h" #include "srv0start.h" +#include "que0que.h" +#include "read0read.h" +#include "row0row.h" +#include "ha_prototypes.h" /* prototypes for new functions added to ha_innodb.cc */ trx_t* innobase_get_trx(); @@ -78,9 +82,9 @@ inline void _increment_page_get_statistics(buf_block_t* block, trx_t* trx) block_hash_byte = block_hash >> 3; block_hash_offset = (byte) block_hash & 0x07; if (block_hash_byte >= DPAH_SIZE) - fprintf(stderr, "!!! block_hash_byte = %lu block_hash_offset = %lu !!!\n", (unsigned long) block_hash_byte, (unsigned long) block_hash_offset); + fprintf(stderr, "!!! block_hash_byte = %lu block_hash_offset = %d !!!\n", block_hash_byte, block_hash_offset); if (block_hash_offset > 7) - fprintf(stderr, "!!! block_hash_byte = %lu block_hash_offset = %lu !!!\n", (unsigned long) block_hash_byte, (unsigned long) block_hash_offset); + fprintf(stderr, "!!! block_hash_byte = %lu block_hash_offset = %d !!!\n", block_hash_byte, block_hash_offset); if ((trx->distinct_page_access_hash[block_hash_byte] & ((byte) 0x01 << block_hash_offset)) == 0) trx->distinct_page_access++; trx->distinct_page_access_hash[block_hash_byte] |= (byte) 0x01 << block_hash_offset; @@ -310,14 +314,30 @@ read-ahead or flush occurs */ UNIV_INTERN ibool buf_debug_prints = FALSE; #endif /* UNIV_DEBUG */ -/** A chunk of buffers. The buffer pool is allocated in chunks. */ -struct buf_chunk_struct{ - ulint mem_size; /*!< allocated size of the chunk */ - ulint size; /*!< size of frames[] and blocks[] */ - void* mem; /*!< pointer to the memory area which - was allocated for the frames */ - buf_block_t* blocks; /*!< array of buffer control blocks */ +/* Buffer pool shared memory segment information */ +typedef struct buf_shm_info_struct buf_shm_info_t; + +struct buf_shm_info_struct { + char head_str[8]; + ulint binary_id; + ibool is_new; /* during initializing */ + ibool clean; /* clean shutdowned and free */ + ibool reusable; /* reusable */ + ulint buf_pool_size; /* backup value */ + ulint page_size; /* backup value */ + ulint frame_offset; /* offset of the first frame based on chunk->mem */ + ulint zip_hash_offset; + ulint zip_hash_n; + + ulint checksum; + + buf_pool_t buf_pool_backup; + buf_chunk_t chunk_backup; + + ib_uint64_t dummy; }; + +#define BUF_SHM_INFO_HEAD "XTRA_SHM" #endif /* !UNIV_HOTBACKUP */ /********************************************************************//** @@ -764,6 +784,45 @@ buf_block_init( #endif /* UNIV_SYNC_DEBUG */ } +static +void +buf_block_reuse( +/*============*/ + buf_block_t* block, + ptrdiff_t frame_offset) +{ + /* block_init */ + block->frame = ((byte*)(block->frame) + frame_offset); + + UNIV_MEM_DESC(block->frame, UNIV_PAGE_SIZE, block); + + block->index = NULL; + +#ifdef UNIV_DEBUG + /* recreate later */ + block->page.in_page_hash = FALSE; + block->page.in_zip_hash = FALSE; +#endif /* UNIV_DEBUG */ + +#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG + block->n_pointers = 0; +#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ + + if (block->page.zip.data) + block->page.zip.data = ((byte*)(block->page.zip.data) + frame_offset); + + block->is_hashed = FALSE; + + mutex_create(&block->mutex, SYNC_BUF_BLOCK); + + rw_lock_create(&block->lock, SYNC_LEVEL_VARYING); + ut_ad(rw_lock_validate(&(block->lock))); + +#ifdef UNIV_SYNC_DEBUG + rw_lock_create(&block->debug_latch, SYNC_NO_ORDER_CHECK); +#endif /* UNIV_SYNC_DEBUG */ +} + /********************************************************************//** Allocates a chunk of buffer frames. @return chunk, or NULL on failure */ @@ -776,26 +835,167 @@ buf_chunk_init( { buf_block_t* block; byte* frame; + ulint zip_hash_n = 0; + ulint zip_hash_mem_size = 0; + hash_table_t* zip_hash_tmp = NULL; ulint i; + buf_shm_info_t* shm_info = NULL; /* Round down to a multiple of page size, although it already should be. */ mem_size = ut_2pow_round(mem_size, UNIV_PAGE_SIZE); + + if (srv_buffer_pool_shm_key) { + /* zip_hash size */ + zip_hash_n = (mem_size / UNIV_PAGE_SIZE) * 2; + zip_hash_mem_size = ut_2pow_round(hash_create_needed(zip_hash_n) + + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE); + } + /* Reserve space for the block descriptors. */ mem_size += ut_2pow_round((mem_size / UNIV_PAGE_SIZE) * (sizeof *block) + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE); + if (srv_buffer_pool_shm_key) { + mem_size += ut_2pow_round(sizeof(buf_shm_info_t) + + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE); + mem_size += zip_hash_mem_size; + } chunk->mem_size = mem_size; + + if (srv_buffer_pool_shm_key) { + ulint binary_id; + ibool is_new; + + ut_a(buf_pool->n_chunks == 1); + + fprintf(stderr, + "InnoDB: Notice: innodb_buffer_pool_shm_key option is specified.\n" + "InnoDB: This option may not be safe to keep consistency of datafiles.\n" + "InnoDB: Because InnoDB cannot lock datafiles when shutdown until reusing shared memory segment.\n" + "InnoDB: You should ensure no change of InnoDB files while using innodb_buffer_pool_shm_key.\n"); + + /* FIXME: This is vague id still */ + binary_id = (ulint) ((char*)mtr_commit - (char *)btr_root_get) + + (ulint) ((char *)os_get_os_version - (char *)buf_calc_page_new_checksum) + + (ulint) ((char *)page_dir_find_owner_slot - (char *)dfield_data_is_binary_equal) + + (ulint) ((char *)que_graph_publish - (char *)dict_casedn_str) + + (ulint) ((char *)read_view_oldest_copy_or_open_new - (char *)fil_space_get_version) + + (ulint) ((char *)rec_get_n_extern_new - (char *)fsp_get_size_low) + + (ulint) ((char *)row_get_trx_id_offset - (char *)ha_create_func) + + (ulint) ((char *)srv_set_io_thread_op_info - (char *)thd_is_replication_slave_thread) + + (ulint) ((char *)mutex_create_func - (char *)ibuf_inside) + + (ulint) ((char *)trx_set_detailed_error - (char *)lock_check_trx_id_sanity) + + (ulint) ((char *)ut_time - (char *)mem_heap_strdup); + + chunk->mem = os_shm_alloc(&chunk->mem_size, srv_buffer_pool_shm_key, &is_new); + + if (UNIV_UNLIKELY(chunk->mem == NULL)) { + return(NULL); + } + +#ifdef UNIV_SET_MEM_TO_ZERO + if (is_new) { + memset(chunk->mem, '\0', chunk->mem_size); + } +#endif + + shm_info = chunk->mem; + + zip_hash_tmp = (hash_table_t*)((char *)chunk->mem + chunk->mem_size - zip_hash_mem_size); + + if (is_new) { + strncpy(shm_info->head_str, BUF_SHM_INFO_HEAD, 8); + shm_info->binary_id = binary_id; + shm_info->is_new = TRUE; /* changed to FALSE when the initialization is finished */ + shm_info->clean = FALSE; /* changed to TRUE when free the segment. */ + shm_info->reusable = FALSE; /* changed to TRUE when validation is finished. */ + shm_info->buf_pool_size = srv_buf_pool_size; + shm_info->page_size = srv_page_size; + shm_info->zip_hash_offset = chunk->mem_size - zip_hash_mem_size; + shm_info->zip_hash_n = zip_hash_n; + } else { + ulint checksum; + + if (strncmp(shm_info->head_str, BUF_SHM_INFO_HEAD, 8)) { + fprintf(stderr, + "InnoDB: Error: The shared memory segment seems not to be for buffer pool.\n"); + return(NULL); + } + if (shm_info->binary_id != binary_id) { + fprintf(stderr, + "InnoDB: Error: The shared memory segment seems not to be for this binary.\n"); + return(NULL); + } + if (shm_info->is_new) { + fprintf(stderr, + "InnoDB: Error: The shared memory was not initialized yet.\n"); + return(NULL); + } + if (!shm_info->clean) { + fprintf(stderr, + "InnoDB: Error: The shared memory was not shut down cleanly.\n"); + return(NULL); + } + if (!shm_info->reusable) { + fprintf(stderr, + "InnoDB: Error: The shared memory has unrecoverable contents.\n"); + return(NULL); + } + if (shm_info->buf_pool_size != srv_buf_pool_size) { + fprintf(stderr, + "InnoDB: Error: srv_buf_pool_size is different (shm=%lu current=%lu).\n", + shm_info->buf_pool_size, srv_buf_pool_size); + return(NULL); + } + if (shm_info->page_size != srv_page_size) { + fprintf(stderr, + "InnoDB: Error: srv_page_size is different (shm=%lu current=%lu).\n", + shm_info->page_size, srv_page_size); + return(NULL); + } + + ut_a(shm_info->zip_hash_offset == chunk->mem_size - zip_hash_mem_size); + ut_a(shm_info->zip_hash_n == zip_hash_n); + + /* check checksum */ + checksum = ut_fold_binary((byte*)chunk->mem + sizeof(buf_shm_info_t), + chunk->mem_size - sizeof(buf_shm_info_t)); + if (shm_info->checksum != checksum) { + fprintf(stderr, + "InnoDB: Error: checksum of the shared memory is not match. " + "(stored=%lu calculated=%lu)\n", + shm_info->checksum, checksum); + return(NULL); + } + + /* flag to use the segment. */ + shm_info->clean = FALSE; /* changed to TRUE when free the segment. */ + } + + /* init zip_hash contents */ + if (is_new) { + hash_create_init(zip_hash_tmp, zip_hash_n); + } else { + /* adjust offset is done later */ + hash_create_reuse(zip_hash_tmp); + } + } else { chunk->mem = os_mem_alloc_large(&chunk->mem_size); if (UNIV_UNLIKELY(chunk->mem == NULL)) { return(NULL); } + } /* Allocate the block descriptors from the start of the memory block. */ + if (srv_buffer_pool_shm_key) { + chunk->blocks = (buf_block_t*)((char*)chunk->mem + sizeof(buf_shm_info_t)); + } else { chunk->blocks = chunk->mem; + } /* Align a pointer to the first frame. Note that when os_large_page_size is smaller than UNIV_PAGE_SIZE, @@ -803,8 +1003,13 @@ buf_chunk_init( it is bigger, we may allocate more blocks than requested. */ frame = ut_align(chunk->mem, UNIV_PAGE_SIZE); + if (srv_buffer_pool_shm_key) { + /* reserve zip_hash space and always -1 for reproductibity */ + chunk->size = (chunk->mem_size - zip_hash_mem_size) / UNIV_PAGE_SIZE - 1; + } else { chunk->size = chunk->mem_size / UNIV_PAGE_SIZE - (frame != chunk->mem); + } /* Subtract the space needed for block descriptors. */ { @@ -818,6 +1023,98 @@ buf_chunk_init( chunk->size = size; } + if (shm_info && !(shm_info->is_new)) { + /* convert the shared memory segment for reuse */ + ptrdiff_t phys_offset; + ptrdiff_t logi_offset; + ptrdiff_t blocks_offset; + byte* previous_frame_address; + + if (chunk->size < shm_info->chunk_backup.size) { + fprintf(stderr, + "InnoDB: Error: The buffer pool became smaller because of allocated address.\n" + "InnoDB: Retrying may avoid this situation.\n"); + shm_info->clean = TRUE; /* release the flag for retrying */ + return(NULL); + } + + chunk->size = shm_info->chunk_backup.size; + phys_offset = (char*)frame - ((char*)chunk->mem + shm_info->frame_offset); + logi_offset = (char *)frame - (char *)chunk->blocks[0].frame; + previous_frame_address = chunk->blocks[0].frame; + blocks_offset = (char *)chunk->blocks - (char *)shm_info->chunk_backup.blocks; + + if (phys_offset || logi_offset || blocks_offset) { + fprintf(stderr, + "InnoDB: Buffer pool in the shared memory segment should be converted.\n" + "InnoDB: Previous frames in address : %p\n" + "InnoDB: Previous frames were located : %p\n" + "InnoDB: Current frames should be located: %p\n" + "InnoDB: Pysical offset : %ld (%#lx)\n" + "InnoDB: Logical offset (frames) : %ld (%#lx)\n" + "InnoDB: Logical offset (blocks) : %ld (%#lx)\n", + (char *)chunk->mem + shm_info->frame_offset, + chunk->blocks[0].frame, frame, + (ulong) phys_offset, (ulong) phys_offset, (ulong) logi_offset, (ulong) logi_offset, + (ulong) blocks_offset, (ulong) blocks_offset); + } else { + fprintf(stderr, + "InnoDB: Buffer pool in the shared memory segment can be used as it is.\n"); + } + + if (phys_offset) { + fprintf(stderr, + "InnoDB: Aligning physical offset..."); + + memmove(frame, ((char*)chunk->mem + shm_info->frame_offset), + chunk->size * UNIV_PAGE_SIZE); + + fprintf(stderr, + " Done.\n"); + } + + if (logi_offset || blocks_offset) { + fprintf(stderr, + "InnoDB: Aligning logical offset..."); + + /* buf_block_t */ + block = chunk->blocks; + + for (i = chunk->size; i--; ) { + buf_block_reuse(block, logi_offset); + block++; + } + + /* buf_pool_t buf_pool_backup */ + UT_LIST_OFFSET(flush_list, buf_page_t, shm_info->buf_pool_backup.flush_list, + previous_frame_address, logi_offset, blocks_offset); + UT_LIST_OFFSET(free, buf_page_t, shm_info->buf_pool_backup.free, + previous_frame_address, logi_offset, blocks_offset); + UT_LIST_OFFSET(LRU, buf_page_t, shm_info->buf_pool_backup.LRU, + previous_frame_address, logi_offset, blocks_offset); + if (shm_info->buf_pool_backup.LRU_old) + shm_info->buf_pool_backup.LRU_old = + (buf_page_t*)((char*)(shm_info->buf_pool_backup.LRU_old) + + (((byte*)shm_info->buf_pool_backup.LRU_old > previous_frame_address) + ? logi_offset : blocks_offset)); + + UT_LIST_OFFSET(unzip_LRU, buf_block_t, shm_info->buf_pool_backup.unzip_LRU, + previous_frame_address, logi_offset, blocks_offset); + + UT_LIST_OFFSET(zip_list, buf_page_t, shm_info->buf_pool_backup.zip_clean, + previous_frame_address, logi_offset, blocks_offset); + for (i = 0; i < BUF_BUDDY_SIZES_MAX; i++) { + UT_LIST_OFFSET(zip_list, buf_page_t, shm_info->buf_pool_backup.zip_free[i], + previous_frame_address, logi_offset, blocks_offset); + } + + HASH_OFFSET(zip_hash_tmp, buf_page_t, hash, + previous_frame_address, logi_offset, blocks_offset); + + fprintf(stderr, + " Done.\n"); + } + } else { /* Init block structs and assign frames for them. Then we assign the frames to the first blocks (we already mapped the memory above). */ @@ -841,6 +1138,11 @@ buf_chunk_init( block++; frame += UNIV_PAGE_SIZE; } + } + + if (shm_info) { + shm_info->frame_offset = (char*)chunk->blocks[0].frame - (char*)chunk->mem; + } return(chunk); } @@ -940,6 +1242,11 @@ buf_chunk_not_freed( ready = buf_flush_ready_for_replace(&block->page); mutex_exit(&block->mutex); + if (block->page.is_corrupt) { + /* corrupt page may remain, it can be skipped */ + break; + } + if (!ready) { return(block); @@ -1017,6 +1324,8 @@ buf_chunk_free( UNIV_MEM_UNDESC(block); } + ut_a(!srv_buffer_pool_shm_key); + os_mem_free_large(chunk->mem, chunk->mem_size); } @@ -1066,7 +1375,10 @@ buf_pool_init(void) srv_buf_pool_curr_size = buf_pool->curr_size * UNIV_PAGE_SIZE; buf_pool->page_hash = hash_create(2 * buf_pool->curr_size); + /* zip_hash is allocated to shm when srv_buffer_pool_shm_key is enabled */ + if (!srv_buffer_pool_shm_key) { buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size); + } buf_pool->last_printout_time = time(NULL); @@ -1081,6 +1393,86 @@ buf_pool_init(void) --------------------------- */ /* All fields are initialized by mem_zalloc(). */ + if (srv_buffer_pool_shm_key) { + buf_shm_info_t* shm_info; + + ut_a((char*)chunk->blocks == (char*)chunk->mem + sizeof(buf_shm_info_t)); + shm_info = chunk->mem; + + buf_pool->zip_hash = (hash_table_t*)((char*)chunk->mem + shm_info->zip_hash_offset); + + if(shm_info->is_new) { + shm_info->is_new = FALSE; /* initialization was finished */ + } else { + buf_block_t* block = chunk->blocks; + buf_page_t* b; + + /* shm_info->buf_pool_backup should be converted */ + /* at buf_chunk_init(). So copy simply. */ + buf_pool->flush_list = shm_info->buf_pool_backup.flush_list; + buf_pool->freed_page_clock = shm_info->buf_pool_backup.freed_page_clock; + buf_pool->free = shm_info->buf_pool_backup.free; + buf_pool->LRU = shm_info->buf_pool_backup.LRU; + buf_pool->LRU_old = shm_info->buf_pool_backup.LRU_old; + buf_pool->LRU_old_len = shm_info->buf_pool_backup.LRU_old_len; + buf_pool->unzip_LRU = shm_info->buf_pool_backup.unzip_LRU; + buf_pool->zip_clean = shm_info->buf_pool_backup.zip_clean; + for (i = 0; i < BUF_BUDDY_SIZES_MAX; i++) { + buf_pool->zip_free[i] = shm_info->buf_pool_backup.zip_free[i]; + } + + for (i = 0; i < chunk->size; i++, block++) { + if (buf_block_get_state(block) + == BUF_BLOCK_FILE_PAGE) { + ut_d(block->page.in_page_hash = TRUE); + HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, + buf_page_address_fold( + block->page.space, + block->page.offset), + &block->page); + } + } + + for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b; + b = UT_LIST_GET_NEXT(zip_list, b)) { + ut_ad(!b->in_flush_list); + ut_ad(b->in_LRU_list); + + ut_d(b->in_page_hash = TRUE); + HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, + buf_page_address_fold(b->space, b->offset), b); + } + + for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b; + b = UT_LIST_GET_NEXT(flush_list, b)) { + ut_ad(b->in_flush_list); + ut_ad(b->in_LRU_list); + + switch (buf_page_get_state(b)) { + case BUF_BLOCK_ZIP_DIRTY: + ut_d(b->in_page_hash = TRUE); + HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, + buf_page_address_fold(b->space, + b->offset), b); + break; + case BUF_BLOCK_FILE_PAGE: + /* uncompressed page */ + break; + case BUF_BLOCK_ZIP_FREE: + case BUF_BLOCK_ZIP_PAGE: + case BUF_BLOCK_NOT_USED: + case BUF_BLOCK_READY_FOR_USE: + case BUF_BLOCK_MEMORY: + case BUF_BLOCK_REMOVE_HASH: + ut_error; + break; + } + } + + + } + } + mutex_exit(&LRU_list_mutex); rw_lock_x_unlock(&page_hash_latch); buf_pool_mutex_exit(); @@ -1105,6 +1497,30 @@ buf_pool_free(void) buf_chunk_t* chunk; buf_chunk_t* chunks; + if (srv_buffer_pool_shm_key) { + buf_shm_info_t* shm_info; + + ut_a(buf_pool->n_chunks == 1); + + chunk = buf_pool->chunks; + shm_info = chunk->mem; + ut_a((char*)chunk->blocks == (char*)chunk->mem + sizeof(buf_shm_info_t)); + + /* validation the shared memory segment doesn't have unrecoverable contents. */ + /* Currently, validation became not needed */ + shm_info->reusable = TRUE; + + memcpy(&(shm_info->buf_pool_backup), buf_pool, sizeof(buf_pool_t)); + memcpy(&(shm_info->chunk_backup), chunk, sizeof(buf_chunk_t)); + + if (srv_fast_shutdown < 2) { + shm_info->checksum = ut_fold_binary((byte*)chunk->mem + sizeof(buf_shm_info_t), + chunk->mem_size - sizeof(buf_shm_info_t)); + shm_info->clean = TRUE; + } + + os_shm_free(chunk->mem, chunk->mem_size); + } else { chunks = buf_pool->chunks; chunk = chunks + buf_pool->n_chunks; @@ -1113,10 +1529,13 @@ buf_pool_free(void) would fail at shutdown. */ os_mem_free_large(chunk->mem, chunk->mem_size); } + } mem_free(buf_pool->chunks); hash_table_free(buf_pool->page_hash); + if (!srv_buffer_pool_shm_key) { hash_table_free(buf_pool->zip_hash); + } mem_free(buf_pool); buf_pool = NULL; } @@ -1311,6 +1730,11 @@ try_again: //buf_pool_mutex_enter(); mutex_enter(&LRU_list_mutex); + if (srv_buffer_pool_shm_key) { + /* Cannot support shrink */ + goto func_done; + } + shrink_again: if (buf_pool->n_chunks <= 1) { @@ -1554,6 +1978,11 @@ void buf_pool_resize(void) /*=================*/ { + if (srv_buffer_pool_shm_key) { + /* Cannot support resize */ + return; + } + //buf_pool_mutex_enter(); mutex_enter(&LRU_list_mutex); @@ -2458,7 +2887,7 @@ wait_until_unfixed: block->page.buf_fix_count = 1; buf_block_set_io_fix(block, BUF_IO_READ); - rw_lock_x_lock(&block->lock); + rw_lock_x_lock_func(&block->lock, 0, file, line); UNIV_MEM_INVALID(bpage, sizeof *bpage); diff --git a/storage/xtradb/buf/buf0flu.c b/storage/xtradb/buf/buf0flu.c index 17588475bbf..0a03d583549 100644 --- a/storage/xtradb/buf/buf0flu.c +++ b/storage/xtradb/buf/buf0flu.c @@ -257,6 +257,17 @@ buf_flush_insert_into_flush_list( ut_d(block->page.in_flush_list = TRUE); UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page); +#ifdef UNIV_DEBUG_VALGRIND + { + ulint zip_size = buf_block_get_zip_size(block); + + if (UNIV_UNLIKELY(zip_size)) { + UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size); + } else { + UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE); + } + } +#endif /* UNIV_DEBUG_VALGRIND */ #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG ut_a(buf_flush_validate_low()); #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ @@ -286,6 +297,18 @@ buf_flush_insert_sorted_into_flush_list( ut_ad(!block->page.in_flush_list); ut_d(block->page.in_flush_list = TRUE); +#ifdef UNIV_DEBUG_VALGRIND + { + ulint zip_size = buf_block_get_zip_size(block); + + if (UNIV_UNLIKELY(zip_size)) { + UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size); + } else { + UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE); + } + } +#endif /* UNIV_DEBUG_VALGRIND */ + prev_b = NULL; /* For the most part when this function is called the flush_rbt @@ -830,6 +853,7 @@ try_again: zip_size = buf_page_get_zip_size(bpage); if (UNIV_UNLIKELY(zip_size)) { + UNIV_MEM_ASSERT_RW(bpage->zip.data, zip_size); /* Copy the compressed page and clear the rest. */ memcpy(trx_doublewrite->write_buf + UNIV_PAGE_SIZE * trx_doublewrite->first_free, @@ -839,6 +863,8 @@ try_again: + zip_size, 0, UNIV_PAGE_SIZE - zip_size); } else { ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE); + UNIV_MEM_ASSERT_RW(((buf_block_t*) bpage)->frame, + UNIV_PAGE_SIZE); memcpy(trx_doublewrite->write_buf + UNIV_PAGE_SIZE * trx_doublewrite->first_free, @@ -1533,6 +1559,7 @@ retry: } else if (!have_LRU_mutex) { /* confirm it again with LRU_mutex for exactness */ have_LRU_mutex = TRUE; + distance = 0; goto retry; } diff --git a/storage/xtradb/buf/buf0lru.c b/storage/xtradb/buf/buf0lru.c index 7b01c4aec50..14ec1720873 100644 --- a/storage/xtradb/buf/buf0lru.c +++ b/storage/xtradb/buf/buf0lru.c @@ -1455,7 +1455,7 @@ buf_LRU_make_block_old( Try to free a block. If bpage is a descriptor of a compressed-only page, the descriptor object will be freed as well. -NOTE: If this function returns BUF_LRU_FREED, it will not temporarily +NOTE: If this function returns BUF_LRU_FREED, it will temporarily release buf_pool_mutex. Furthermore, the page frame will no longer be accessible via bpage. diff --git a/storage/xtradb/dict/dict0boot.c b/storage/xtradb/dict/dict0boot.c index 0a713f0deaa..43cfced65a0 100644 --- a/storage/xtradb/dict/dict0boot.c +++ b/storage/xtradb/dict/dict0boot.c @@ -62,32 +62,47 @@ dict_hdr_get( } /**********************************************************************//** -Returns a new table, index, or tree id. -@return the new id */ +Returns a new table, index, or space id. */ UNIV_INTERN -dulint +void dict_hdr_get_new_id( /*================*/ - ulint type) /*!< in: DICT_HDR_ROW_ID, ... */ + dulint* table_id, /*!< out: table id (not assigned if NULL) */ + dulint* index_id, /*!< out: index id (not assigned if NULL) */ + ulint* space_id) /*!< out: space id (not assigned if NULL) */ { dict_hdr_t* dict_hdr; dulint id; mtr_t mtr; - ut_ad((type == DICT_HDR_TABLE_ID) || (type == DICT_HDR_INDEX_ID)); - mtr_start(&mtr); dict_hdr = dict_hdr_get(&mtr); - id = mtr_read_dulint(dict_hdr + type, &mtr); - id = ut_dulint_add(id, 1); + if (table_id) { + id = mtr_read_dulint(dict_hdr + DICT_HDR_TABLE_ID, &mtr); + id = ut_dulint_add(id, 1); + mlog_write_dulint(dict_hdr + DICT_HDR_TABLE_ID, id, &mtr); + *table_id = id; + } - mlog_write_dulint(dict_hdr + type, id, &mtr); + if (index_id) { + id = mtr_read_dulint(dict_hdr + DICT_HDR_INDEX_ID, &mtr); + id = ut_dulint_add(id, 1); + mlog_write_dulint(dict_hdr + DICT_HDR_INDEX_ID, id, &mtr); + *index_id = id; + } - mtr_commit(&mtr); + if (space_id) { + *space_id = mtr_read_ulint(dict_hdr + DICT_HDR_MAX_SPACE_ID, + MLOG_4BYTES, &mtr); + if (fil_assign_new_space_id(space_id)) { + mlog_write_ulint(dict_hdr + DICT_HDR_MAX_SPACE_ID, + *space_id, MLOG_4BYTES, &mtr); + } + } - return(id); + mtr_commit(&mtr); } /**********************************************************************//** @@ -151,9 +166,12 @@ dict_hdr_create( mlog_write_dulint(dict_header + DICT_HDR_INDEX_ID, ut_dulint_create(0, DICT_HDR_FIRST_ID), mtr); - /* Obsolete, but we must initialize it to 0 anyway. */ - mlog_write_dulint(dict_header + DICT_HDR_MIX_ID, - ut_dulint_create(0, DICT_HDR_FIRST_ID), mtr); + mlog_write_ulint(dict_header + DICT_HDR_MAX_SPACE_ID, + 0, MLOG_4BYTES, mtr); + + /* Obsolete, but we must initialize it anyway. */ + mlog_write_ulint(dict_header + DICT_HDR_MIX_ID_LOW, + DICT_HDR_FIRST_ID, MLOG_4BYTES, mtr); /* Create the B-tree roots for the clustered indexes of the basic system tables */ @@ -245,6 +263,29 @@ dict_boot(void) /* Get the dictionary header */ dict_hdr = dict_hdr_get(&mtr); + if (ut_dulint_cmp(mtr_read_dulint(dict_hdr + DICT_HDR_XTRADB_MARK, &mtr), + DICT_HDR_XTRADB_FLAG) != 0) { + /* not extended yet by XtraDB, need to be extended */ + ulint root_page_no; + + root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, + DICT_HDR_SPACE, 0, DICT_STATS_ID, + dict_ind_redundant, &mtr); + if (root_page_no == FIL_NULL) { + fprintf(stderr, "InnoDB: Warning: failed to create SYS_STATS btr.\n"); + srv_use_sys_stats_table = FALSE; + } else { + mlog_write_ulint(dict_hdr + DICT_HDR_STATS, root_page_no, + MLOG_4BYTES, &mtr); + mlog_write_dulint(dict_hdr + DICT_HDR_XTRADB_MARK, + DICT_HDR_XTRADB_FLAG, &mtr); + } + mtr_commit(&mtr); + /* restart mtr */ + mtr_start(&mtr); + dict_hdr = dict_hdr_get(&mtr); + } + /* Because we only write new row ids to disk-based data structure (dictionary header) when it is divisible by DICT_HDR_ROW_ID_WRITE_MARGIN, in recovery we will not recover @@ -406,7 +447,7 @@ dict_boot(void) table->id = DICT_FIELDS_ID; dict_table_add_to_cache(table, heap); dict_sys->sys_fields = table; - mem_heap_free(heap); + mem_heap_empty(heap); index = dict_mem_index_create("SYS_FIELDS", "CLUST_IND", DICT_HDR_SPACE, @@ -423,6 +464,41 @@ dict_boot(void) FALSE); ut_a(error == DB_SUCCESS); + /*-------------------------*/ + table = dict_mem_table_create("SYS_STATS", DICT_HDR_SPACE, 3, 0); + table->n_mysql_handles_opened = 1; /* for pin */ + + dict_mem_table_add_col(table, heap, "INDEX_ID", DATA_BINARY, 0, 0); + dict_mem_table_add_col(table, heap, "KEY_COLS", DATA_INT, 0, 4); + dict_mem_table_add_col(table, heap, "DIFF_VALS", DATA_BINARY, 0, 0); + + /* The '+ 2' below comes from the fields DB_TRX_ID, DB_ROLL_PTR */ +#if DICT_SYS_STATS_DIFF_VALS_FIELD != 2 + 2 +#error "DICT_SYS_STATS_DIFF_VALS_FIELD != 2 + 2" +#endif + + table->id = DICT_STATS_ID; + dict_table_add_to_cache(table, heap); + dict_sys->sys_stats = table; + mem_heap_empty(heap); + + index = dict_mem_index_create("SYS_STATS", "CLUST_IND", + DICT_HDR_SPACE, + DICT_UNIQUE | DICT_CLUSTERED, 2); + + dict_mem_index_add_field(index, "INDEX_ID", 0); + dict_mem_index_add_field(index, "KEY_COLS", 0); + + index->id = DICT_STATS_ID; + error = dict_index_add_to_cache(table, index, + mtr_read_ulint(dict_hdr + + DICT_HDR_STATS, + MLOG_4BYTES, &mtr), + FALSE); + ut_a(error == DB_SUCCESS); + + mem_heap_free(heap); + mtr_commit(&mtr); /*-------------------------*/ @@ -436,6 +512,7 @@ dict_boot(void) dict_load_sys_table(dict_sys->sys_columns); dict_load_sys_table(dict_sys->sys_indexes); dict_load_sys_table(dict_sys->sys_fields); + dict_load_sys_table(dict_sys->sys_stats); mutex_exit(&(dict_sys->mutex)); } diff --git a/storage/xtradb/dict/dict0crea.c b/storage/xtradb/dict/dict0crea.c index d4e735c73ca..258bf77d1fc 100644 --- a/storage/xtradb/dict/dict0crea.c +++ b/storage/xtradb/dict/dict0crea.c @@ -239,16 +239,34 @@ dict_build_table_def_step( const char* path_or_name; ibool is_path; mtr_t mtr; + ulint space = 0; + ibool file_per_table; ut_ad(mutex_own(&(dict_sys->mutex))); table = node->table; - table->id = dict_hdr_get_new_id(DICT_HDR_TABLE_ID); + /* Cache the global variable "srv_file_per_table" to + a local variable before using it. Please note + "srv_file_per_table" is not under dict_sys mutex + protection, and could be changed while executing + this function. So better to cache the current value + to a local variable, and all future reference to + "srv_file_per_table" should use this local variable. */ + file_per_table = srv_file_per_table; + + dict_hdr_get_new_id(&table->id, NULL, NULL); thr_get_trx(thr)->table_id = table->id; - if (srv_file_per_table) { + if (file_per_table) { + /* Get a new space id if srv_file_per_table is set */ + dict_hdr_get_new_id(NULL, NULL, &space); + + if (UNIV_UNLIKELY(space == ULINT_UNDEFINED)) { + return(DB_ERROR); + } + /* We create a new single-table tablespace for the table. We initially let it be 4 pages: - page 0 is the fsp header and an extent descriptor page, @@ -257,8 +275,6 @@ dict_build_table_def_step( - page 3 will contain the root of the clustered index of the table we create here. */ - ulint space = 0; /* reset to zero for the call below */ - if (table->dir_path_of_temp_table) { /* We place tables created with CREATE TEMPORARY TABLE in the tmp dir of mysqld server */ @@ -276,7 +292,7 @@ dict_build_table_def_step( flags = table->flags & ~(~0 << DICT_TF_BITS); error = fil_create_new_single_table_tablespace( - &space, path_or_name, is_path, + space, path_or_name, is_path, flags == DICT_TF_COMPACT ? 0 : flags, FIL_IBD_FILE_INITIAL_SIZE); table->space = (unsigned int) space; @@ -492,6 +508,51 @@ dict_create_sys_fields_tuple( } /*****************************************************************//** +Based on an index object, this function builds the entry to be inserted +in the SYS_STATS system table. +@return the tuple which should be inserted */ +static +dtuple_t* +dict_create_sys_stats_tuple( +/*========================*/ + const dict_index_t* index, + ulint i, + mem_heap_t* heap) +{ + dict_table_t* sys_stats; + dtuple_t* entry; + dfield_t* dfield; + byte* ptr; + + ut_ad(index); + ut_ad(heap); + + sys_stats = dict_sys->sys_stats; + + entry = dtuple_create(heap, 3 + DATA_N_SYS_COLS); + + dict_table_copy_types(entry, sys_stats); + + /* 0: INDEX_ID -----------------------*/ + dfield = dtuple_get_nth_field(entry, 0/*INDEX_ID*/); + ptr = mem_heap_alloc(heap, 8); + mach_write_to_8(ptr, index->id); + dfield_set_data(dfield, ptr, 8); + /* 1: KEY_COLS -----------------------*/ + dfield = dtuple_get_nth_field(entry, 1/*KEY_COLS*/); + ptr = mem_heap_alloc(heap, 4); + mach_write_to_4(ptr, i); + dfield_set_data(dfield, ptr, 4); + /* 4: DIFF_VALS ----------------------*/ + dfield = dtuple_get_nth_field(entry, 2/*DIFF_VALS*/); + ptr = mem_heap_alloc(heap, 8); + mach_write_to_8(ptr, ut_dulint_zero); /* initial value is 0 */ + dfield_set_data(dfield, ptr, 8); + + return(entry); +} + +/*****************************************************************//** Creates the tuple with which the index entry is searched for writing the index tree root page number, if such a tree is created. @return the tuple for search */ @@ -561,7 +622,7 @@ dict_build_index_def_step( ut_ad((UT_LIST_GET_LEN(table->indexes) > 0) || dict_index_is_clust(index)); - index->id = dict_hdr_get_new_id(DICT_HDR_INDEX_ID); + dict_hdr_get_new_id(NULL, &index->id, NULL); /* Inherit the space id from the table; we store all indexes of a table in the same tablespace */ @@ -601,6 +662,27 @@ dict_build_field_def_step( } /***************************************************************//** +Builds a row for storing stats to insert. +@return DB_SUCCESS */ +static +ulint +dict_build_stats_def_step( +/*======================*/ + ind_node_t* node) +{ + dict_index_t* index; + dtuple_t* row; + + index = node->index; + + row = dict_create_sys_stats_tuple(index, node->stats_no, node->heap); + + ins_node_set_new_row(node->stats_def, row); + + return(DB_SUCCESS); +} + +/***************************************************************//** Creates an index tree for the index if it is not a member of a cluster. @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ static @@ -924,6 +1006,49 @@ ind_create_graph_create( dict_sys->sys_fields, heap); node->field_def->common.parent = node; + if (srv_use_sys_stats_table) { + node->stats_def = ins_node_create(INS_DIRECT, + dict_sys->sys_stats, heap); + node->stats_def->common.parent = node; + } else { + node->stats_def = NULL; + } + + node->commit_node = commit_node_create(heap); + node->commit_node->common.parent = node; + + return(node); +} + +/*********************************************************************//** +*/ +UNIV_INTERN +ind_node_t* +ind_insert_stats_graph_create( +/*==========================*/ + dict_index_t* index, + mem_heap_t* heap) +{ + ind_node_t* node; + + node = mem_heap_alloc(heap, sizeof(ind_node_t)); + + node->common.type = QUE_NODE_INSERT_STATS; + + node->index = index; + + node->state = INDEX_BUILD_STATS_COLS; + node->page_no = FIL_NULL; + node->heap = mem_heap_create(256); + + node->ind_def = NULL; + node->field_def = NULL; + + node->stats_def = ins_node_create(INS_DIRECT, + dict_sys->sys_stats, heap); + node->stats_def->common.parent = node; + node->stats_no = 0; + node->commit_node = commit_node_create(heap); node->commit_node->common.parent = node; @@ -1074,6 +1199,7 @@ dict_create_index_step( node->state = INDEX_BUILD_FIELD_DEF; node->field_no = 0; + node->stats_no = 0; thr->run_node = node->ind_def; @@ -1119,7 +1245,31 @@ dict_create_index_step( goto function_exit; } - node->state = INDEX_CREATE_INDEX_TREE; + if (srv_use_sys_stats_table) { + node->state = INDEX_BUILD_STATS_COLS; + } else { + node->state = INDEX_CREATE_INDEX_TREE; + } + } + + if (node->state == INDEX_BUILD_STATS_COLS) { + if (node->stats_no <= dict_index_get_n_unique(node->index)) { + + err = dict_build_stats_def_step(node); + + if (err != DB_SUCCESS) { + + goto function_exit; + } + + node->stats_no++; + + thr->run_node = node->stats_def; + + return(thr); + } else { + node->state = INDEX_CREATE_INDEX_TREE; + } } if (node->state == INDEX_CREATE_INDEX_TREE) { @@ -1171,6 +1321,66 @@ function_exit: } /****************************************************************//** +*/ +UNIV_INTERN +que_thr_t* +dict_insert_stats_step( +/*===================*/ + que_thr_t* thr) /*!< in: query thread */ +{ + ind_node_t* node; + ulint err = DB_ERROR; + trx_t* trx; + + ut_ad(thr); + + trx = thr_get_trx(thr); + + node = thr->run_node; + + if (thr->prev_node == que_node_get_parent(node)) { + node->state = INDEX_BUILD_STATS_COLS; + } + + if (node->state == INDEX_BUILD_STATS_COLS) { + if (node->stats_no <= dict_index_get_n_unique(node->index)) { + + err = dict_build_stats_def_step(node); + + if (err != DB_SUCCESS) { + + goto function_exit; + } + + node->stats_no++; + + thr->run_node = node->stats_def; + + return(thr); + } else { + node->state = INDEX_COMMIT_WORK; + } + } + + if (node->state == INDEX_COMMIT_WORK) { + + /* do not commit transaction here for now */ + } + +function_exit: + trx->error_state = err; + + if (err == DB_SUCCESS) { + } else { + return(NULL); + } + + thr->run_node = que_node_get_parent(node); + + return(thr); +} + +/****************************************************************//** Creates the foreign key constraints system tables inside InnoDB at database creation or database start if they are not found or are not of the right form. diff --git a/storage/xtradb/dict/dict0dict.c b/storage/xtradb/dict/dict0dict.c index 1d088b2e02b..51ee7f9246f 100644 --- a/storage/xtradb/dict/dict0dict.c +++ b/storage/xtradb/dict/dict0dict.c @@ -83,7 +83,7 @@ static char dict_ibfk[] = "_ibfk_"; /** array of mutexes protecting dict_index_t::stat_n_diff_key_vals[] */ #define DICT_INDEX_STAT_MUTEX_SIZE 32 -mutex_t dict_index_stat_mutex[DICT_INDEX_STAT_MUTEX_SIZE]; +static mutex_t dict_index_stat_mutex[DICT_INDEX_STAT_MUTEX_SIZE]; /*******************************************************************//** Tries to find column names for the index and sets the col field of the @@ -571,13 +571,11 @@ dict_table_get_on_id( if (ut_dulint_cmp(table_id, DICT_FIELDS_ID) <= 0 || trx->dict_operation_lock_mode == RW_X_LATCH) { - /* It is a system table which will always exist in the table - cache: we avoid acquiring the dictionary mutex, because - if we are doing a rollback to handle an error in TABLE - CREATE, for example, we already have the mutex! */ - ut_ad(mutex_own(&(dict_sys->mutex)) - || trx->dict_operation_lock_mode == RW_X_LATCH); + /* Note: An X latch implies that the transaction + already owns the dictionary mutex. */ + + ut_ad(mutex_own(&dict_sys->mutex)); return(dict_table_get_on_id_low(table_id)); } @@ -712,7 +710,7 @@ dict_table_get( /* If table->ibd_file_missing == TRUE, this will print an error message and return without doing anything. */ - dict_update_statistics(table); + dict_update_statistics(table, FALSE); } } @@ -855,7 +853,8 @@ dict_table_add_to_cache( /* Add table to LRU list of tables */ UT_LIST_ADD_FIRST(table_LRU, dict_sys->table_LRU, table); - dict_sys->size += mem_heap_get_size(table->heap); + dict_sys->size += mem_heap_get_size(table->heap) + + strlen(table->name) + 1; } /**********************************************************************//** @@ -909,14 +908,21 @@ dict_table_rename_in_cache( dict_foreign_t* foreign; dict_index_t* index; ulint fold; - ulint old_size; - const char* old_name; + char old_name[MAX_TABLE_NAME_LEN + 1]; ut_ad(table); ut_ad(mutex_own(&(dict_sys->mutex))); - old_size = mem_heap_get_size(table->heap); - old_name = table->name; + /* store the old/current name to an automatic variable */ + if (strlen(table->name) + 1 <= sizeof(old_name)) { + memcpy(old_name, table->name, strlen(table->name) + 1); + } else { + ut_print_timestamp(stderr); + fprintf(stderr, "InnoDB: too long table name: '%s', " + "max length is %d\n", table->name, + MAX_TABLE_NAME_LEN); + ut_error; + } fold = ut_fold_string(new_name); @@ -962,12 +968,22 @@ dict_table_rename_in_cache( /* Remove table from the hash tables of tables */ HASH_DELETE(dict_table_t, name_hash, dict_sys->table_hash, ut_fold_string(old_name), table); - table->name = mem_heap_strdup(table->heap, new_name); + + if (strlen(new_name) > strlen(table->name)) { + /* We allocate MAX_TABLE_NAME_LEN+1 bytes here to avoid + memory fragmentation, we assume a repeated calls of + ut_realloc() with the same size do not cause fragmentation */ + ut_a(strlen(new_name) <= MAX_TABLE_NAME_LEN); + table->name = ut_realloc(table->name, MAX_TABLE_NAME_LEN + 1); + } + memcpy(table->name, new_name, strlen(new_name) + 1); /* Add table to hash table of tables */ HASH_INSERT(dict_table_t, name_hash, dict_sys->table_hash, fold, table); - dict_sys->size += (mem_heap_get_size(table->heap) - old_size); + + dict_sys->size += strlen(new_name) - strlen(old_name); + ut_a(dict_sys->size > 0); /* Update the table_name field in indexes */ index = dict_table_get_first_index(table); @@ -1192,7 +1208,7 @@ dict_table_remove_from_cache( /* Remove table from LRU list of tables */ UT_LIST_REMOVE(table_LRU, dict_sys->table_LRU, table); - size = mem_heap_get_size(table->heap); + size = mem_heap_get_size(table->heap) + strlen(table->name) + 1; ut_ad(dict_sys->size >= size); @@ -3076,25 +3092,28 @@ static char* dict_strip_comments( /*================*/ - const char* sql_string) /*!< in: SQL string */ + const char* sql_string, /*!< in: SQL string */ + size_t sql_length) /*!< in: length of sql_string */ { char* str; const char* sptr; + const char* eptr = sql_string + sql_length; char* ptr; /* unclosed quote character (0 if none) */ char quote = 0; - str = mem_alloc(strlen(sql_string) + 1); + str = mem_alloc(sql_length + 1); sptr = sql_string; ptr = str; for (;;) { scan_more: - if (*sptr == '\0') { + if (sptr >= eptr || *sptr == '\0') { +end_of_string: *ptr = '\0'; - ut_a(ptr <= str + strlen(sql_string)); + ut_a(ptr <= str + sql_length); return(str); } @@ -3113,30 +3132,35 @@ scan_more: || (sptr[0] == '-' && sptr[1] == '-' && sptr[2] == ' ')) { for (;;) { + if (++sptr >= eptr) { + goto end_of_string; + } + /* In Unix a newline is 0x0A while in Windows it is 0x0D followed by 0x0A */ - if (*sptr == (char)0x0A - || *sptr == (char)0x0D - || *sptr == '\0') { - + switch (*sptr) { + case (char) 0X0A: + case (char) 0x0D: + case '\0': goto scan_more; } - - sptr++; } } else if (!quote && *sptr == '/' && *(sptr + 1) == '*') { + sptr += 2; for (;;) { - if (*sptr == '*' && *(sptr + 1) == '/') { - - sptr += 2; - - goto scan_more; + if (sptr >= eptr) { + goto end_of_string; } - if (*sptr == '\0') { - + switch (*sptr) { + case '\0': goto scan_more; + case '*': + if (sptr[1] == '/') { + sptr += 2; + goto scan_more; + } } sptr++; @@ -3817,6 +3841,7 @@ dict_create_foreign_constraints( name before it: test.table2; the default database id the database of parameter name */ + size_t sql_length, /*!< in: length of sql_string */ const char* name, /*!< in: table full name in the normalized form database_name/table_name */ @@ -3831,7 +3856,7 @@ dict_create_foreign_constraints( ut_a(trx); ut_a(trx->mysql_thd); - str = dict_strip_comments(sql_string); + str = dict_strip_comments(sql_string, sql_length); heap = mem_heap_create(10000); err = dict_create_foreign_constraints_low( @@ -3864,6 +3889,7 @@ dict_foreign_parse_drop_constraints( dict_foreign_t* foreign; ibool success; char* str; + size_t len; const char* ptr; const char* id; FILE* ef = dict_foreign_err_file; @@ -3878,7 +3904,10 @@ dict_foreign_parse_drop_constraints( *constraints_to_drop = mem_heap_alloc(heap, 1000 * sizeof(char*)); - str = dict_strip_comments(*(trx->mysql_query_str)); + ptr = innobase_get_stmt(trx->mysql_thd, &len); + + str = dict_strip_comments(ptr, len); + ptr = str; ut_ad(mutex_own(&(dict_sys->mutex))); @@ -4219,6 +4248,259 @@ dict_index_calc_min_rec_len( } /*********************************************************************//** +functions to use SYS_STATS system table. */ +static +ibool +dict_reload_statistics( +/*===================*/ + dict_table_t* table, + ulint* sum_of_index_sizes) +{ + dict_index_t* index; + ulint size; + mem_heap_t* heap; + + index = dict_table_get_first_index(table); + + if (index == NULL) { + /* Table definition is corrupt */ + + return(FALSE); + } + + heap = mem_heap_create(1000); + + while (index) { + if (table->is_corrupt) { + ut_a(srv_pass_corrupt_table); + mem_heap_free(heap); + return(FALSE); + } + + size = btr_get_size(index, BTR_TOTAL_SIZE); + + index->stat_index_size = size; + + *sum_of_index_sizes += size; + + size = btr_get_size(index, BTR_N_LEAF_PAGES); + + if (size == 0) { + /* The root node of the tree is a leaf */ + size = 1; + } + + index->stat_n_leaf_pages = size; + +/*===========================================*/ +{ + dict_table_t* sys_stats; + dict_index_t* sys_index; + btr_pcur_t pcur; + dtuple_t* tuple; + dfield_t* dfield; + ulint key_cols; + ulint n_cols; + const rec_t* rec; + const byte* field; + ulint len; + ib_int64_t* stat_n_diff_key_vals_tmp; + byte* buf; + ulint i; + mtr_t mtr; + + n_cols = dict_index_get_n_unique(index); + stat_n_diff_key_vals_tmp = mem_heap_zalloc(heap, (n_cols + 1) * sizeof(ib_int64_t)); + + sys_stats = dict_sys->sys_stats; + sys_index = UT_LIST_GET_FIRST(sys_stats->indexes); + ut_a(!dict_table_is_comp(sys_stats)); + + tuple = dtuple_create(heap, 1); + dfield = dtuple_get_nth_field(tuple, 0); + + buf = mem_heap_alloc(heap, 8); + mach_write_to_8(buf, index->id); + + dfield_set_data(dfield, buf, 8); + dict_index_copy_types(tuple, sys_index, 1); + + mtr_start(&mtr); + + btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, + BTR_SEARCH_LEAF, &pcur, &mtr); + for (i = 0; i <= n_cols; i++) { + rec = btr_pcur_get_rec(&pcur); + + if (!btr_pcur_is_on_user_rec(&pcur) + || ut_dulint_cmp(mach_read_from_8(rec_get_nth_field_old(rec, 0, &len)), + index->id)) { + /* not found: even 1 if not found should not be alowed */ + fprintf(stderr, "InnoDB: Warning: stats for %s/%s (%lu/%lu)" + " not fonund in SYS_STATS\n", + index->table_name, index->name, i, n_cols); + btr_pcur_close(&pcur); + mtr_commit(&mtr); + mem_heap_free(heap); + return(FALSE); + } + + if (rec_get_deleted_flag(rec, 0)) { + goto next_rec; + } + + field = rec_get_nth_field_old(rec, 1, &len); + ut_a(len == 4); + + key_cols = mach_read_from_4(field); + + ut_a(i == key_cols); + + field = rec_get_nth_field_old(rec, DICT_SYS_STATS_DIFF_VALS_FIELD, &len); + ut_a(len == 8); + + stat_n_diff_key_vals_tmp[i] = ut_conv_dulint_to_longlong(mach_read_from_8(field)); +next_rec: + btr_pcur_move_to_next_user_rec(&pcur, &mtr); + } + + btr_pcur_close(&pcur); + mtr_commit(&mtr); + + dict_index_stat_mutex_enter(index); + for (i = 0; i <= n_cols; i++) { + index->stat_n_diff_key_vals[i] = stat_n_diff_key_vals_tmp[i]; + } + dict_index_stat_mutex_exit(index); +} +/*===========================================*/ + + index = dict_table_get_next_index(index); + } + + mem_heap_free(heap); + return(TRUE); +} + +static +void +dict_store_statistics( +/*==================*/ + dict_table_t* table) +{ + dict_index_t* index; + mem_heap_t* heap; + + index = dict_table_get_first_index(table); + + ut_a(index); + + heap = mem_heap_create(1000); + + while (index) { + if (table->is_corrupt) { + ut_a(srv_pass_corrupt_table); + mem_heap_free(heap); + return; + } + +/*===========================================*/ +{ + dict_table_t* sys_stats; + dict_index_t* sys_index; + btr_pcur_t pcur; + dtuple_t* tuple; + dfield_t* dfield; + ulint key_cols; + ulint n_cols; + ulint rests; + const rec_t* rec; + const byte* field; + ulint len; + ib_int64_t* stat_n_diff_key_vals_tmp; + byte* buf; + ulint i; + mtr_t mtr; + + n_cols = dict_index_get_n_unique(index); + stat_n_diff_key_vals_tmp = mem_heap_zalloc(heap, (n_cols + 1) * sizeof(ib_int64_t)); + + dict_index_stat_mutex_enter(index); + for (i = 0; i <= n_cols; i++) { + stat_n_diff_key_vals_tmp[i] = index->stat_n_diff_key_vals[i]; + } + dict_index_stat_mutex_exit(index); + + sys_stats = dict_sys->sys_stats; + sys_index = UT_LIST_GET_FIRST(sys_stats->indexes); + ut_a(!dict_table_is_comp(sys_stats)); + + tuple = dtuple_create(heap, 1); + dfield = dtuple_get_nth_field(tuple, 0); + + buf = mem_heap_alloc(heap, 8); + mach_write_to_8(buf, index->id); + + dfield_set_data(dfield, buf, 8); + dict_index_copy_types(tuple, sys_index, 1); + + mtr_start(&mtr); + + btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, + BTR_MODIFY_LEAF, &pcur, &mtr); + rests = n_cols + 1; + for (i = 0; i <= n_cols; i++) { + rec = btr_pcur_get_rec(&pcur); + + if (!btr_pcur_is_on_user_rec(&pcur) + || ut_dulint_cmp(mach_read_from_8(rec_get_nth_field_old(rec, 0, &len)), + index->id)) { + /* not found */ + btr_pcur_close(&pcur); + mtr_commit(&mtr); + break; + } + + if (rec_get_deleted_flag(rec, 0)) { + goto next_rec; + } + + field = rec_get_nth_field_old(rec, 1, &len); + ut_a(len == 4); + + key_cols = mach_read_from_4(field); + + field = rec_get_nth_field_old(rec, DICT_SYS_STATS_DIFF_VALS_FIELD, &len); + ut_a(len == 8); + + mlog_write_dulint((byte*)field, + ut_dulint_create((ulint) (stat_n_diff_key_vals_tmp[key_cols] >> 32), + (ulint) stat_n_diff_key_vals_tmp[key_cols] & 0xFFFFFFFF), + &mtr); + + rests--; + +next_rec: + btr_pcur_move_to_next_user_rec(&pcur, &mtr); + } + btr_pcur_close(&pcur); + mtr_commit(&mtr); + + if (rests) { + fprintf(stderr, "InnoDB: Warning: failed to store %lu stats entries" + " of %s/%s to SYS_STATS system table.\n", + rests, index->table_name, index->name); + } +} +/*===========================================*/ + + index = dict_table_get_next_index(index); + } + + mem_heap_free(heap); +} + +/*********************************************************************//** Calculates new estimates for table and index statistics. The statistics are used in query optimization. */ UNIV_INTERN @@ -4226,9 +4508,10 @@ void dict_update_statistics_low( /*=======================*/ dict_table_t* table, /*!< in/out: table */ - ibool has_dict_mutex __attribute__((unused))) + ibool has_dict_mutex __attribute__((unused)), /*!< in: TRUE if the caller has the dictionary mutex */ + ibool sync) /*!< in: TRUE if must update SYS_STATS */ { dict_index_t* index; ulint size; @@ -4254,6 +4537,23 @@ dict_update_statistics_low( return; } + if (srv_use_sys_stats_table && !sync) { + /* reload statistics from SYS_STATS table */ + if (dict_reload_statistics(table, &sum_of_index_sizes)) { + /* success */ +#ifdef UNIV_DEBUG + fprintf(stderr, "InnoDB: DEBUG: reload_statistics is scceeded for %s.\n", + table->name); +#endif + goto end; + } + } +#ifdef UNIV_DEBUG + fprintf(stderr, "InnoDB: DEBUG: update_statistics for %s.\n", + table->name); +#endif + sum_of_index_sizes = 0; + /* Find out the sizes of the indexes and how many different values for the key they approximately have */ @@ -4291,6 +4591,11 @@ dict_update_statistics_low( index = dict_table_get_next_index(index); } + if (srv_use_sys_stats_table) { + /* store statistics to SYS_STATS table */ + dict_store_statistics(table); + } +end: index = dict_table_get_first_index(table); dict_index_stat_mutex_enter(index); @@ -4317,9 +4622,10 @@ UNIV_INTERN void dict_update_statistics( /*===================*/ - dict_table_t* table) /*!< in/out: table */ + dict_table_t* table, /*!< in/out: table */ + ibool sync) { - dict_update_statistics_low(table, FALSE); + dict_update_statistics_low(table, FALSE, sync); } /**********************************************************************//** @@ -4400,7 +4706,7 @@ dict_table_print_low( ut_ad(mutex_own(&(dict_sys->mutex))); if (srv_stats_auto_update) - dict_update_statistics_low(table, TRUE); + dict_update_statistics_low(table, TRUE, FALSE); fprintf(stderr, "--------------------------------------\n" diff --git a/storage/xtradb/dict/dict0load.c b/storage/xtradb/dict/dict0load.c index 528c3786254..0d8292cc2bf 100644 --- a/storage/xtradb/dict/dict0load.c +++ b/storage/xtradb/dict/dict0load.c @@ -223,7 +223,7 @@ loop: is no index */ if (srv_stats_auto_update && dict_table_get_first_index(table)) { - dict_update_statistics_low(table, TRUE); + dict_update_statistics_low(table, TRUE, FALSE); } dict_table_print_low(table); @@ -317,7 +317,7 @@ dict_check_tablespaces_and_store_max_id( dict_index_t* sys_index; btr_pcur_t pcur; const rec_t* rec; - ulint max_space_id = 0; + ulint max_space_id; mtr_t mtr; mutex_enter(&(dict_sys->mutex)); @@ -328,6 +328,11 @@ dict_check_tablespaces_and_store_max_id( sys_index = UT_LIST_GET_FIRST(sys_tables->indexes); ut_a(!dict_table_is_comp(sys_tables)); + max_space_id = mtr_read_ulint(dict_hdr_get(&mtr) + + DICT_HDR_MAX_SPACE_ID, + MLOG_4BYTES, &mtr); + fil_set_max_space_id_if_bigger(max_space_id); + btr_pcur_open_at_index_side(TRUE, sys_index, BTR_SEARCH_LEAF, &pcur, TRUE, &mtr); loop: @@ -974,6 +979,7 @@ err_exit: /* Try to open the tablespace */ if (!fil_open_single_table_tablespace( TRUE, space, + flags == DICT_TF_COMPACT ? 0 : flags & ~(~0 << DICT_TF_BITS), name)) { /* We failed to find a sensible tablespace file */ diff --git a/storage/xtradb/dict/dict0mem.c b/storage/xtradb/dict/dict0mem.c index 388d46568ac..f2d219bfd4f 100644 --- a/storage/xtradb/dict/dict0mem.c +++ b/storage/xtradb/dict/dict0mem.c @@ -68,7 +68,8 @@ dict_mem_table_create( table->heap = heap; table->flags = (unsigned int) flags; - table->name = mem_heap_strdup(heap, name); + table->name = ut_malloc(strlen(name) + 1); + memcpy(table->name, name, strlen(name) + 1); table->space = (unsigned int) space; table->n_cols = (unsigned int) (n_cols + DATA_N_SYS_COLS); @@ -108,6 +109,7 @@ dict_mem_table_free( #ifndef UNIV_HOTBACKUP mutex_free(&(table->autoinc_mutex)); #endif /* UNIV_HOTBACKUP */ + ut_free(table->name); mem_heap_free(table->heap); } diff --git a/storage/xtradb/fil/fil0fil.c b/storage/xtradb/fil/fil0fil.c index 189c0e706f9..0139fa0cce5 100644 --- a/storage/xtradb/fil/fil0fil.c +++ b/storage/xtradb/fil/fil0fil.c @@ -43,8 +43,8 @@ Created 10/25/1995 Heikki Tuuri #include "trx0trx.h" #include "trx0sys.h" #include "pars0pars.h" -#include "row0row.h" #include "row0mysql.h" +#include "row0row.h" #include "que0que.h" #ifndef UNIV_HOTBACKUP # include "buf0lru.h" @@ -286,6 +286,10 @@ struct fil_system_struct { request */ UT_LIST_BASE_NODE_T(fil_space_t) space_list; /*!< list of all file spaces */ + ibool space_id_reuse_warned; + /* !< TRUE if fil_space_create() + has issued a warning about + potential space_id reuse */ }; /** The tablespace memory cache. This variable is NULL before the module is @@ -1200,7 +1204,19 @@ try_again: space->tablespace_version = fil_system->tablespace_version; space->mark = FALSE; - if (purpose == FIL_TABLESPACE && id > fil_system->max_assigned_id) { + if (UNIV_LIKELY(purpose == FIL_TABLESPACE && !recv_recovery_on) + && UNIV_UNLIKELY(id > fil_system->max_assigned_id)) { + if (!fil_system->space_id_reuse_warned) { + fil_system->space_id_reuse_warned = TRUE; + + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Warning: allocated tablespace %lu," + " old maximum was %lu\n", + (ulong) id, + (ulong) fil_system->max_assigned_id); + } + fil_system->max_assigned_id = id; } @@ -1240,19 +1256,25 @@ try_again: Assigns a new space id for a new single-table tablespace. This works simply by incrementing the global counter. If 4 billion id's is not enough, we may need to recycle id's. -@return new tablespace id; ULINT_UNDEFINED if could not assign an id */ -static -ulint -fil_assign_new_space_id(void) -/*=========================*/ +@return TRUE if assigned, FALSE if not */ +UNIV_INTERN +ibool +fil_assign_new_space_id( +/*====================*/ + ulint* space_id) /*!< in/out: space id */ { - ulint id; + ulint id; + ibool success; mutex_enter(&fil_system->mutex); - fil_system->max_assigned_id++; + id = *space_id; - id = fil_system->max_assigned_id; + if (id < fil_system->max_assigned_id) { + id = fil_system->max_assigned_id; + } + + id++; if (id > (SRV_LOG_SPACE_FIRST_ID / 2) && (id % 1000000UL == 0)) { ut_print_timestamp(stderr); @@ -1268,7 +1290,11 @@ fil_assign_new_space_id(void) (ulong) SRV_LOG_SPACE_FIRST_ID); } - if (id >= SRV_LOG_SPACE_FIRST_ID) { + success = (id < SRV_LOG_SPACE_FIRST_ID); + + if (success) { + *space_id = fil_system->max_assigned_id = id; + } else { ut_print_timestamp(stderr); fprintf(stderr, "InnoDB: You have run out of single-table" @@ -1278,14 +1304,12 @@ fil_assign_new_space_id(void) " have to dump all your tables and\n" "InnoDB: recreate the whole InnoDB installation.\n", (ulong) id); - fil_system->max_assigned_id--; - - id = ULINT_UNDEFINED; + *space_id = ULINT_UNDEFINED; } mutex_exit(&fil_system->mutex); - return(id); + return(success); } /*******************************************************************//** @@ -1521,7 +1545,7 @@ fil_init( ut_a(hash_size > 0); ut_a(max_n_open > 0); - fil_system = mem_alloc(sizeof(fil_system_t)); + fil_system = mem_zalloc(sizeof(fil_system_t)); mutex_create(&fil_system->mutex, SYNC_ANY_LATCH); @@ -1530,16 +1554,9 @@ fil_init( UT_LIST_INIT(fil_system->LRU); - fil_system->n_open = 0; fil_system->max_n_open = max_n_open; - fil_system->modification_counter = 0; fil_system->max_assigned_id = TRX_SYS_SPACE_MAX; - - fil_system->tablespace_version = 0; - - UT_LIST_INIT(fil_system->unflushed_spaces); - UT_LIST_INIT(fil_system->space_list); } /*******************************************************************//** @@ -2124,7 +2141,7 @@ fil_op_log_parse_or_replay( fil_create_directory_for_tablename(name); if (fil_create_new_single_table_tablespace( - &space_id, name, FALSE, flags, + space_id, name, FALSE, flags, FIL_IBD_FILE_INITIAL_SIZE) != DB_SUCCESS) { ut_error; } @@ -2571,9 +2588,7 @@ UNIV_INTERN ulint fil_create_new_single_table_tablespace( /*===================================*/ - ulint* space_id, /*!< in/out: space id; if this is != 0, - then this is an input parameter, - otherwise output */ + ulint space_id, /*!< in: space id */ const char* tablename, /*!< in: the table name in the usual databasename/tablename format of InnoDB, or a dir path to a temp @@ -2593,6 +2608,8 @@ fil_create_new_single_table_tablespace( ibool success; char* path; + ut_a(space_id > 0); + ut_a(space_id < SRV_LOG_SPACE_FIRST_ID); ut_a(size >= FIL_IBD_FILE_INITIAL_SIZE); /* The tablespace flags (FSP_SPACE_FLAGS) should be 0 for ROW_FORMAT=COMPACT @@ -2649,38 +2666,21 @@ fil_create_new_single_table_tablespace( return(DB_ERROR); } - buf2 = ut_malloc(3 * UNIV_PAGE_SIZE); - /* Align the memory for file i/o if we might have O_DIRECT set */ - page = ut_align(buf2, UNIV_PAGE_SIZE); - ret = os_file_set_size(path, file, size * UNIV_PAGE_SIZE, 0); if (!ret) { - ut_free(buf2); - os_file_close(file); - os_file_delete(path); - - mem_free(path); - return(DB_OUT_OF_FILE_SPACE); - } - - if (*space_id == 0) { - *space_id = fil_assign_new_space_id(); - } - - /* printf("Creating tablespace %s id %lu\n", path, *space_id); */ - - if (*space_id == ULINT_UNDEFINED) { - ut_free(buf2); + err = DB_OUT_OF_FILE_SPACE; error_exit: os_file_close(file); error_exit2: os_file_delete(path); mem_free(path); - return(DB_ERROR); + return(err); } + /* printf("Creating tablespace %s id %lu\n", path, space_id); */ + /* We have to write the space id to the file immediately and flush the file to disk. This is because in crash recovery we must be aware what tablespaces exist and what are their space id's, so that we can apply @@ -2690,10 +2690,14 @@ error_exit2: with zeros from the call of os_file_set_size(), until a buffer pool flush would write to it. */ + buf2 = ut_malloc(3 * UNIV_PAGE_SIZE); + /* Align the memory for file i/o if we might have O_DIRECT set */ + page = ut_align(buf2, UNIV_PAGE_SIZE); + memset(page, '\0', UNIV_PAGE_SIZE); - fsp_header_init_fields(page, *space_id, flags); - mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, *space_id); + fsp_header_init_fields(page, space_id, flags); + mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, space_id); if (!(flags & DICT_TF_ZSSIZE_MASK)) { buf_flush_init_for_writing(page, NULL, 0); @@ -2724,6 +2728,7 @@ error_exit2: " to tablespace ", stderr); ut_print_filename(stderr, path); putc('\n', stderr); + err = DB_ERROR; goto error_exit; } @@ -2733,22 +2738,20 @@ error_exit2: fputs("InnoDB: Error: file flush of tablespace ", stderr); ut_print_filename(stderr, path); fputs(" failed\n", stderr); + err = DB_ERROR; goto error_exit; } os_file_close(file); - if (*space_id == ULINT_UNDEFINED) { - goto error_exit2; - } - - success = fil_space_create(path, *space_id, flags, FIL_TABLESPACE); + success = fil_space_create(path, space_id, flags, FIL_TABLESPACE); if (!success) { + err = DB_ERROR; goto error_exit2; } - fil_node_create(path, size, *space_id, FALSE); + fil_node_create(path, size, space_id, FALSE); #ifndef UNIV_HOTBACKUP { @@ -2759,7 +2762,7 @@ error_exit2: fil_op_write_log(flags ? MLOG_FILE_CREATE2 : MLOG_FILE_CREATE, - *space_id, + space_id, is_temp ? MLOG_FILE_FLAG_TEMP : 0, flags, tablename, NULL, &mtr); @@ -3124,7 +3127,7 @@ fil_open_single_table_tablespace( for (i = 0; i < n_index; i++) { new_id[i] = dict_table_get_index_on_name(table, - (page + (i + 1) * 512 + 12))->id; + (char*)(page + (i + 1) * 512 + 12))->id; old_id[i] = mach_read_from_8(page + (i + 1) * 512); root_page[i] = mach_read_from_4(page + (i + 1) * 512 + 8); } @@ -3148,7 +3151,7 @@ skip_info: /* over write space id of all pages */ rec_offs_init(offsets_); - fprintf(stderr, "%s", "InnoDB: Progress in %:"); + fprintf(stderr, "InnoDB: Progress in %%:"); for (offset = 0; offset < size_bytes; offset += UNIV_PAGE_SIZE) { ulint checksum_field; @@ -3890,39 +3893,6 @@ next_datadir_item: return(err); } -/********************************************************************//** -If we need crash recovery, and we have called -fil_load_single_table_tablespaces() and dict_load_single_table_tablespaces(), -we can call this function to print an error message of orphaned .ibd files -for which there is not a data dictionary entry with a matching table name -and space id. */ -UNIV_INTERN -void -fil_print_orphaned_tablespaces(void) -/*================================*/ -{ - fil_space_t* space; - - mutex_enter(&fil_system->mutex); - - space = UT_LIST_GET_FIRST(fil_system->space_list); - - while (space) { - if (space->purpose == FIL_TABLESPACE && !trx_sys_sys_space(space->id) - && !space->mark) { - fputs("InnoDB: Warning: tablespace ", stderr); - ut_print_filename(stderr, space->name); - fprintf(stderr, " of id %lu has no matching table in\n" - "InnoDB: the InnoDB data dictionary.\n", - (ulong) space->id); - } - - space = UT_LIST_GET_NEXT(space_list, space); - } - - mutex_exit(&fil_system->mutex); -} - /*******************************************************************//** Returns TRUE if a single-table tablespace does not exist in the memory cache, or is being deleted there. diff --git a/storage/xtradb/ha/hash0hash.c b/storage/xtradb/ha/hash0hash.c index 30c304dafcd..70516deb005 100644 --- a/storage/xtradb/ha/hash0hash.c +++ b/storage/xtradb/ha/hash0hash.c @@ -128,6 +128,70 @@ hash_create( } /*************************************************************//** +*/ +UNIV_INTERN +ulint +hash_create_needed( +/*===============*/ + ulint n) +{ + ulint prime; + ulint offset; + + prime = ut_find_prime(n); + + offset = (sizeof(hash_table_t) + 7) / 8; + offset *= 8; + + return(offset + sizeof(hash_cell_t) * prime); +} + +UNIV_INTERN +void +hash_create_init( +/*=============*/ + hash_table_t* table, + ulint n) +{ + ulint prime; + ulint offset; + + prime = ut_find_prime(n); + + offset = (sizeof(hash_table_t) + 7) / 8; + offset *= 8; + + table->array = (hash_cell_t*)(((char*)table) + offset); + table->n_cells = prime; +# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG + table->adaptive = FALSE; +# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ + table->n_mutexes = 0; + table->mutexes = NULL; + table->heaps = NULL; + table->heap = NULL; + ut_d(table->magic_n = HASH_TABLE_MAGIC_N); + + /* Initialize the cell array */ + hash_table_clear(table); +} + +UNIV_INTERN +void +hash_create_reuse( +/*==============*/ + hash_table_t* table) +{ + ulint offset; + + offset = (sizeof(hash_table_t) + 7) / 8; + offset *= 8; + + table->array = (hash_cell_t*)(((char*)table) + offset); + ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); +} + +/*************************************************************//** Frees a hash table. */ UNIV_INTERN void diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index 68da379b841..5b1ab98ea03 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -193,6 +193,7 @@ static my_bool innobase_overwrite_relay_log_info = FALSE; static my_bool innobase_rollback_on_timeout = FALSE; static my_bool innobase_create_status_file = FALSE; static my_bool innobase_stats_on_metadata = TRUE; +static my_bool innobase_use_sys_stats_table = FALSE; static char* internal_innobase_data_file_path = NULL; @@ -336,6 +337,12 @@ static MYSQL_THDVAR_ULONG(lock_wait_timeout, PLUGIN_VAR_RQCMDARG, "Timeout in seconds an InnoDB transaction may wait for a lock before being rolled back. Values above 100000000 disable the timeout.", NULL, NULL, 50, 1, 1024 * 1024 * 1024, 0); +static MYSQL_THDVAR_ULONG(flush_log_at_trx_commit_session, PLUGIN_VAR_RQCMDARG, + "Control innodb_flush_log_at_trx_commit for each sessions. " + "The value 0~2 are same meanings to innodb_flush_log_at_trx_commit. " + "The value 3 regards innodb_flush_log_at_trx_commit (default).", + NULL, NULL, 3, 0, 3, 0); + static handler *innobase_create_handler(handlerton *hton, TABLE_SHARE *table, @@ -716,6 +723,17 @@ thd_lock_wait_timeout( return(THDVAR((THD*) thd, lock_wait_timeout)); } +/******************************************************************//** +*/ +extern "C" UNIV_INTERN +ulong +thd_flush_log_at_trx_commit_session( +/*================================*/ + void* thd) +{ + return(THDVAR((THD*) thd, flush_log_at_trx_commit_session)); +} + /********************************************************************//** Obtain the InnoDB transaction of a MySQL thread. @return reference to transaction pointer */ @@ -1031,6 +1049,29 @@ innobase_get_charset( return(thd_charset((THD*) mysql_thd)); } +/**********************************************************************//** +Determines the current SQL statement. +@return SQL statement string */ +extern "C" UNIV_INTERN +const char* +innobase_get_stmt( +/*==============*/ + void* mysql_thd, /*!< in: MySQL thread handle */ + size_t* length) /*!< out: length of the SQL statement */ +{ +#if MYSQL_VERSION_ID >= 50142 + LEX_STRING* stmt; + + stmt = thd_query_string((THD*) mysql_thd); + *length = stmt->length; + return(stmt->str); +#else + const char* stmt_str = thd_query((THD*) mysql_thd); + *length = strlen(stmt_str); + return(stmt_str); +#endif +} + #if defined (__WIN__) && defined (MYSQL_DYNAMIC_PLUGIN) extern MYSQL_PLUGIN_IMPORT MY_TMPDIR mysql_tmpdir_list; /*******************************************************************//** @@ -1351,7 +1392,6 @@ innobase_trx_allocate( trx = trx_allocate_for_mysql(); trx->mysql_thd = thd; - trx->mysql_query_str = thd_query(thd); innobase_trx_init(thd, trx); @@ -2035,12 +2075,12 @@ innobase_init( srv_page_size_shift = 0; if (innobase_page_size != (1 << 14)) { - int n_shift; + uint n_shift; fprintf(stderr, "InnoDB: Warning: innodb_page_size has been changed from default value 16384. (###EXPERIMENTAL### operation)\n"); for (n_shift = 12; n_shift <= UNIV_PAGE_SIZE_SHIFT_MAX; n_shift++) { - if (innobase_page_size == (1u << n_shift)) { + if (innobase_page_size == ((ulong)1 << n_shift)) { srv_page_size_shift = n_shift; srv_page_size = (1 << srv_page_size_shift); fprintf(stderr, @@ -2231,6 +2271,8 @@ mem_free_and_error: srv_extra_undoslots = (ibool) innobase_extra_undoslots; + srv_use_sys_stats_table = (ibool) innobase_use_sys_stats_table; + /* -------------- Log files ---------------------------*/ /* The default dir for log files is the datadir of MySQL */ @@ -5710,6 +5752,9 @@ ha_innobase::index_read( prebuilt->index_usable = FALSE; DBUG_RETURN(HA_ERR_CRASHED); } + if (UNIV_UNLIKELY(!prebuilt->index_usable)) { + DBUG_RETURN(HA_ERR_TABLE_DEF_CHANGED); + } /* Note that if the index for which the search template is built is not necessarily prebuilt->index, but can also be the clustered index */ @@ -6825,6 +6870,9 @@ ha_innobase::create( /* Cache the value of innodb_file_format, in case it is modified by another thread while the table is being created. */ const ulint file_format = srv_file_format; + const char* stmt; + size_t stmt_len; + enum row_type row_type; DBUG_ENTER("ha_innobase::create"); @@ -6945,94 +6993,94 @@ ha_innobase::create( } } - if (create_info->used_fields & HA_CREATE_USED_ROW_FORMAT) { - if (flags) { - /* KEY_BLOCK_SIZE was specified. */ - if (form->s->row_type != ROW_TYPE_COMPRESSED) { - /* ROW_FORMAT other than COMPRESSED - ignores KEY_BLOCK_SIZE. It does not - make sense to reject conflicting - KEY_BLOCK_SIZE and ROW_FORMAT, because - such combinations can be obtained - with ALTER TABLE anyway. */ - push_warning_printf( - thd, - MYSQL_ERROR::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: ignoring KEY_BLOCK_SIZE=%lu" - " unless ROW_FORMAT=COMPRESSED.", - create_info->key_block_size); - flags = 0; - } - } else { - /* No KEY_BLOCK_SIZE */ - if (form->s->row_type == ROW_TYPE_COMPRESSED) { - /* ROW_FORMAT=COMPRESSED without - KEY_BLOCK_SIZE implies half the - maximum KEY_BLOCK_SIZE. */ - flags = (DICT_TF_ZSSIZE_MAX - 1) - << DICT_TF_ZSSIZE_SHIFT - | DICT_TF_COMPACT - | DICT_TF_FORMAT_ZIP - << DICT_TF_FORMAT_SHIFT; + row_type = form->s->row_type; + + if (flags) { + /* KEY_BLOCK_SIZE was specified. */ + if (!(create_info->used_fields & HA_CREATE_USED_ROW_FORMAT)) { + /* ROW_FORMAT was not specified; + default to ROW_FORMAT=COMPRESSED */ + row_type = ROW_TYPE_COMPRESSED; + } else if (row_type != ROW_TYPE_COMPRESSED) { + /* ROW_FORMAT other than COMPRESSED + ignores KEY_BLOCK_SIZE. It does not + make sense to reject conflicting + KEY_BLOCK_SIZE and ROW_FORMAT, because + such combinations can be obtained + with ALTER TABLE anyway. */ + push_warning_printf( + thd, + MYSQL_ERROR::WARN_LEVEL_WARN, + ER_ILLEGAL_HA_CREATE_OPTION, + "InnoDB: ignoring KEY_BLOCK_SIZE=%lu" + " unless ROW_FORMAT=COMPRESSED.", + create_info->key_block_size); + flags = 0; + } + } else { + /* No KEY_BLOCK_SIZE */ + if (row_type == ROW_TYPE_COMPRESSED) { + /* ROW_FORMAT=COMPRESSED without + KEY_BLOCK_SIZE implies half the + maximum KEY_BLOCK_SIZE. */ + flags = (DICT_TF_ZSSIZE_MAX - 1) + << DICT_TF_ZSSIZE_SHIFT + | DICT_TF_COMPACT + | DICT_TF_FORMAT_ZIP + << DICT_TF_FORMAT_SHIFT; //#if DICT_TF_ZSSIZE_MAX < 1 //# error "DICT_TF_ZSSIZE_MAX < 1" //#endif - } } + } - switch (form->s->row_type) { - const char* row_format_name; - case ROW_TYPE_REDUNDANT: - break; - case ROW_TYPE_COMPRESSED: - case ROW_TYPE_DYNAMIC: - row_format_name - = form->s->row_type == ROW_TYPE_COMPRESSED - ? "COMPRESSED" - : "DYNAMIC"; - - if (!srv_file_per_table) { - push_warning_printf( - thd, - MYSQL_ERROR::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: ROW_FORMAT=%s" - " requires innodb_file_per_table.", - row_format_name); - } else if (file_format < DICT_TF_FORMAT_ZIP) { - push_warning_printf( - thd, - MYSQL_ERROR::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: ROW_FORMAT=%s" - " requires innodb_file_format >" - " Antelope.", - row_format_name); - } else { - flags |= DICT_TF_COMPACT - | (DICT_TF_FORMAT_ZIP - << DICT_TF_FORMAT_SHIFT); - break; - } + switch (row_type) { + const char* row_format_name; + case ROW_TYPE_REDUNDANT: + break; + case ROW_TYPE_COMPRESSED: + case ROW_TYPE_DYNAMIC: + row_format_name + = row_type == ROW_TYPE_COMPRESSED + ? "COMPRESSED" + : "DYNAMIC"; - /* fall through */ - case ROW_TYPE_NOT_USED: - case ROW_TYPE_FIXED: - default: - push_warning(thd, - MYSQL_ERROR::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: assuming ROW_FORMAT=COMPACT."); - case ROW_TYPE_DEFAULT: - case ROW_TYPE_COMPACT: - flags = DICT_TF_COMPACT; + if (!srv_file_per_table) { + push_warning_printf( + thd, + MYSQL_ERROR::WARN_LEVEL_WARN, + ER_ILLEGAL_HA_CREATE_OPTION, + "InnoDB: ROW_FORMAT=%s" + " requires innodb_file_per_table.", + row_format_name); + } else if (file_format < DICT_TF_FORMAT_ZIP) { + push_warning_printf( + thd, + MYSQL_ERROR::WARN_LEVEL_WARN, + ER_ILLEGAL_HA_CREATE_OPTION, + "InnoDB: ROW_FORMAT=%s" + " requires innodb_file_format >" + " Antelope.", + row_format_name); + } else { + flags |= DICT_TF_COMPACT + | (DICT_TF_FORMAT_ZIP + << DICT_TF_FORMAT_SHIFT); break; } - } else if (!flags) { - /* No KEY_BLOCK_SIZE or ROW_FORMAT specified: - use ROW_FORMAT=COMPACT by default. */ + + /* fall through */ + case ROW_TYPE_NOT_USED: + case ROW_TYPE_FIXED: + default: + push_warning(thd, + MYSQL_ERROR::WARN_LEVEL_WARN, + ER_ILLEGAL_HA_CREATE_OPTION, + "InnoDB: assuming ROW_FORMAT=COMPACT."); + case ROW_TYPE_DEFAULT: + case ROW_TYPE_COMPACT: flags = DICT_TF_COMPACT; + break; } /* Look for a primary key */ @@ -7041,7 +7089,7 @@ ha_innobase::create( (int) form->s->primary_key : -1); - /* Our function row_get_mysql_key_number_for_index assumes + /* Our function innobase_get_mysql_key_number_for_index assumes the primary key is always number 0, if it exists */ ut_a(primary_key_no == -1 || primary_key_no == 0); @@ -7101,9 +7149,11 @@ ha_innobase::create( } } - if (*trx->mysql_query_str) { - error = row_table_add_foreign_constraints(trx, - *trx->mysql_query_str, norm_name, + stmt = innobase_get_stmt(thd, &stmt_len); + + if (stmt) { + error = row_table_add_foreign_constraints( + trx, stmt, stmt_len, norm_name, create_info->options & HA_LEX_CREATE_TMP_TABLE); error = convert_error_code_to_mysql(error, flags, NULL); @@ -7396,7 +7446,6 @@ innobase_drop_database( /* In the Windows plugin, thd = current_thd is always NULL */ trx = trx_allocate_for_mysql(); trx->mysql_thd = NULL; - trx->mysql_query_str = NULL; #else trx = innobase_trx_allocate(thd); #endif @@ -7598,6 +7647,10 @@ ha_innobase::records_in_range( n_rows = HA_POS_ERROR; goto func_exit; } + if (UNIV_UNLIKELY(!row_merge_is_index_usable(prebuilt->trx, index))) { + n_rows = HA_ERR_TABLE_DEF_CHANGED; + goto func_exit; + } heap = mem_heap_create(2 * (key->key_parts * sizeof(dfield_t) + sizeof(dtuple_t))); @@ -7777,6 +7830,86 @@ ha_innobase::is_corrupt() const } /*********************************************************************//** +Calculates the key number used inside MySQL for an Innobase index. We will +first check the "index translation table" for a match of the index to get +the index number. If there does not exist an "index translation table", +or not able to find the index in the translation table, then we will fall back +to the traditional way of looping through dict_index_t list to find a +match. In this case, we have to take into account if we generated a +default clustered index for the table +@return the key number used inside MySQL */ +static +unsigned int +innobase_get_mysql_key_number_for_index( +/*====================================*/ + INNOBASE_SHARE* share, /*!< in: share structure for index + translation table. */ + const TABLE* table, /*!< in: table in MySQL data + dictionary */ + dict_table_t* ib_table,/*!< in: table in Innodb data + dictionary */ + const dict_index_t* index) /*!< in: index */ +{ + const dict_index_t* ind; + unsigned int i; + + ut_ad(index); + ut_ad(ib_table); + ut_ad(table); + ut_ad(share); + + /* If index does not belong to the table of share structure. Search + index->table instead */ + if (index->table != ib_table) { + i = 0; + ind = dict_table_get_first_index(index->table); + + while (index != ind) { + ind = dict_table_get_next_index(ind); + i++; + } + + if (row_table_got_default_clust_index(index->table)) { + ut_a(i > 0); + i--; + } + + return(i); + } + + /* If index translation table exists, we will first check + the index through index translation table for a match. */ + if (share->idx_trans_tbl.index_mapping) { + for (i = 0; i < share->idx_trans_tbl.index_count; i++) { + if (share->idx_trans_tbl.index_mapping[i] == index) { + return(i); + } + } + + /* Print an error message if we cannot find the index + ** in the "index translation table". */ + sql_print_error("Cannot find index %s in InnoDB index " + "translation table.", index->name); + } + + /* If we do not have an "index translation table", or not able + to find the index in the translation table, we'll directly find + matching index in the dict_index_t list */ + for (i = 0; i < table->s->keys; i++) { + ind = dict_table_get_index_on_name( + ib_table, table->key_info[i].name); + + if (index == ind) { + return(i); + } + } + + sql_print_error("Cannot find matching index number for index %s " + "in InnoDB index list.", index->name); + + return(0); +} +/*********************************************************************//** Returns statistics information of the table to the MySQL interpreter, in various fields of the handle object. */ UNIV_INTERN @@ -7833,9 +7966,30 @@ ha_innobase::info( /* In sql_show we call with this flag: update then statistics so that they are up-to-date */ + if (srv_use_sys_stats_table + && thd_sql_command(user_thd) == SQLCOM_ANALYZE) { + /* If the indexes on the table don't have enough rows in SYS_STATS system table, */ + /* they need to be created. */ + dict_index_t* index; + + prebuilt->trx->op_info = "confirming rows of SYS_STATS to store statistics"; + + ut_a(prebuilt->trx->conc_state == TRX_NOT_STARTED); + + for (index = dict_table_get_first_index(ib_table); + index != NULL; + index = dict_table_get_next_index(index)) { + row_insert_stats_for_mysql(index, prebuilt->trx); + innobase_commit_low(prebuilt->trx); + } + + ut_a(prebuilt->trx->conc_state == TRX_NOT_STARTED); + } + prebuilt->trx->op_info = "updating table statistics"; - dict_update_statistics(ib_table); + dict_update_statistics(ib_table, + (thd_sql_command(user_thd) == SQLCOM_ANALYZE)?TRUE:FALSE); prebuilt->trx->op_info = "returning various info to MySQL"; } @@ -8047,8 +8201,8 @@ ha_innobase::info( err_index = trx_get_error_info(prebuilt->trx); if (err_index) { - errkey = (unsigned int) - row_get_mysql_key_number_for_index(err_index); + errkey = innobase_get_mysql_key_number_for_index( + share, table, ib_table, err_index); } else { errkey = (unsigned int) prebuilt->trx->error_key_num; } @@ -10815,7 +10969,35 @@ innodb_old_blocks_pct_update( } /*************************************************************//** -Check if it is a valid value of innodb_change_buffering. This function is +Find the corresponding ibuf_use_t value that indexes into +innobase_change_buffering_values[] array for the input +change buffering option name. +@return corresponding IBUF_USE_* value for the input variable +name, or IBUF_USE_COUNT if not able to find a match */ +static +ibuf_use_t +innodb_find_change_buffering_value( +/*===============================*/ + const char* input_name) /*!< in: input change buffering + option name */ +{ + ulint use; + + for (use = 0; use < UT_ARR_SIZE(innobase_change_buffering_values); + use++) { + /* found a match */ + if (!innobase_strcasecmp( + input_name, innobase_change_buffering_values[use])) { + return((ibuf_use_t)use); + } + } + + /* Did not find any match */ + return(IBUF_USE_COUNT); +} + +/*************************************************************//** +Check if it is a valid value of innodb_change_buffering. This function is registered as a callback with MySQL. @return 0 for valid innodb_change_buffering */ static @@ -10839,19 +11021,22 @@ innodb_change_buffering_validate( change_buffering_input = value->val_str(value, buff, &len); if (change_buffering_input != NULL) { - ulint use; + ibuf_use_t use; - for (use = 0; use < UT_ARR_SIZE(innobase_change_buffering_values); - use++) { - if (!innobase_strcasecmp( - change_buffering_input, - innobase_change_buffering_values[use])) { - *(ibuf_use_t*) save = (ibuf_use_t) use; - return(0); - } + use = innodb_find_change_buffering_value( + change_buffering_input); + + if (use != IBUF_USE_COUNT) { + /* Find a matching change_buffering option value. */ + *static_cast<const char**>(save) = + innobase_change_buffering_values[use]; + + return(0); } } + /* No corresponding change buffering option for user supplied + "change_buffering_input" */ return(1); } @@ -10862,21 +11047,27 @@ static void innodb_change_buffering_update( /*===========================*/ - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var* var, /*!< in: pointer to - system variable */ - void* var_ptr, /*!< out: where the - formal string goes */ - const void* save) /*!< in: immediate result - from check function */ + THD* thd, /*!< in: thread handle */ + struct st_mysql_sys_var* var, /*!< in: pointer to + system variable */ + void* var_ptr,/*!< out: where the + formal string goes */ + const void* save) /*!< in: immediate result + from check function */ { + ibuf_use_t use; + ut_a(var_ptr != NULL); ut_a(save != NULL); - ut_a((*(ibuf_use_t*) save) < IBUF_USE_COUNT); - ibuf_use = *(const ibuf_use_t*) save; + use = innodb_find_change_buffering_value( + *static_cast<const char*const*>(save)); + + ut_a(use < IBUF_USE_COUNT); - *(const char**) var_ptr = innobase_change_buffering_values[ibuf_use]; + ibuf_use = use; + *static_cast<const char**>(var_ptr) = + *static_cast<const char*const*>(save); } static int show_innodb_vars(THD *thd, SHOW_VAR *var, char *buff) @@ -11142,6 +11333,14 @@ static MYSQL_SYSVAR_ULINT(stats_update_need_lock, srv_stats_update_need_lock, "e.g. Data_free.", NULL, NULL, 1, 0, 1, 0); +static MYSQL_SYSVAR_BOOL(use_sys_stats_table, innobase_use_sys_stats_table, + PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, + "Enable to use SYS_STATS system table to store statistics statically, " + "And avoids to calculate statistics at every first open of the tables. " + "This option may make the opportunities of update statistics less. " + "So you should use ANALYZE TABLE command intentionally.", + NULL, NULL, FALSE); + static MYSQL_SYSVAR_BOOL(adaptive_hash_index, btr_search_enabled, PLUGIN_VAR_OPCMDARG, "Enable InnoDB adaptive hash index (enabled by default). " @@ -11167,7 +11366,12 @@ static MYSQL_SYSVAR_ULONG(autoextend_increment, srv_auto_extend_increment, static MYSQL_SYSVAR_LONGLONG(buffer_pool_size, innobase_buffer_pool_size, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, "The size of the memory buffer InnoDB uses to cache data and indexes of its tables.", - NULL, NULL, 128*1024*1024L, 5*1024*1024L, LONGLONG_MAX, 1024*1024L); + NULL, NULL, 128*1024*1024L, 32*1024*1024L, LONGLONG_MAX, 1024*1024L); + +static MYSQL_SYSVAR_UINT(buffer_pool_shm_key, srv_buffer_pool_shm_key, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "[experimental] The key value of shared memory segment for the buffer pool. 0 means disable the feature (default).", + NULL, NULL, 0, 0, INT_MAX32, 0); static MYSQL_SYSVAR_ULONG(commit_concurrency, innobase_commit_concurrency, PLUGIN_VAR_RQCMDARG, @@ -11298,7 +11502,7 @@ static MYSQL_SYSVAR_STR(change_buffering, innobase_change_buffering, "Buffer changes to reduce random access: " "OFF, ON, none, inserts.", innodb_change_buffering_validate, - innodb_change_buffering_update, NULL); + innodb_change_buffering_update, "inserts"); static MYSQL_SYSVAR_ULONG(read_ahead_threshold, srv_read_ahead_threshold, PLUGIN_VAR_RQCMDARG, @@ -11428,6 +11632,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(additional_mem_pool_size), MYSQL_SYSVAR(autoextend_increment), MYSQL_SYSVAR(buffer_pool_size), + MYSQL_SYSVAR(buffer_pool_shm_key), MYSQL_SYSVAR(checksums), MYSQL_SYSVAR(fast_checksum), MYSQL_SYSVAR(commit_concurrency), @@ -11472,6 +11677,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(stats_method), MYSQL_SYSVAR(stats_auto_update), MYSQL_SYSVAR(stats_update_need_lock), + MYSQL_SYSVAR(use_sys_stats_table), MYSQL_SYSVAR(stats_sample_pages), MYSQL_SYSVAR(adaptive_hash_index), MYSQL_SYSVAR(replication_delay), @@ -11495,6 +11701,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(flush_neighbor_pages), MYSQL_SYSVAR(read_ahead), MYSQL_SYSVAR(adaptive_checkpoint), + MYSQL_SYSVAR(flush_log_at_trx_commit_session), MYSQL_SYSVAR(enable_unsafe_group_commit), MYSQL_SYSVAR(expand_import), MYSQL_SYSVAR(extra_rsegments), @@ -11514,7 +11721,7 @@ mysql_declare_plugin(xtradb) &innobase_storage_engine, innobase_hton_name, "Percona", - "XtraDB engine based on InnoDB plugin. Supports transactions, row-level locking, and foreign keys", + "Percona-XtraDB, Supports transactions, row-level locking, and foreign keys", PLUGIN_LICENSE_GPL, innobase_init, /* Plugin Init */ NULL, /* Plugin Deinit */ @@ -11539,6 +11746,7 @@ i_s_innodb_index_stats, i_s_innodb_admin_command, i_s_innodb_sys_tables, i_s_innodb_sys_indexes, +i_s_innodb_sys_stats, i_s_innodb_patches mysql_declare_plugin_end; maria_declare_plugin(xtradb) @@ -11573,6 +11781,7 @@ i_s_innodb_index_stats_maria, i_s_innodb_admin_command_maria, i_s_innodb_sys_tables_maria, i_s_innodb_sys_indexes_maria, +i_s_innodb_sys_stats_maria, i_s_innodb_patches_maria maria_declare_plugin_end; diff --git a/storage/xtradb/handler/ha_innodb.h b/storage/xtradb/handler/ha_innodb.h index 5f5f6de98ba..2c14549cbfc 100644 --- a/storage/xtradb/handler/ha_innodb.h +++ b/storage/xtradb/handler/ha_innodb.h @@ -234,7 +234,11 @@ the definitions are bracketed with #ifdef INNODB_COMPATIBILITY_HOOKS */ extern "C" { struct charset_info_st *thd_charset(MYSQL_THD thd); +#if MYSQL_VERSION_ID >= 50142 +LEX_STRING *thd_query_string(MYSQL_THD thd); +#else char **thd_query(MYSQL_THD thd); +#endif /** Get the file name of the MySQL binlog. * @return the name of the binlog file diff --git a/storage/xtradb/handler/handler0alter.cc b/storage/xtradb/handler/handler0alter.cc index 2eb9c9f474c..3a32ed9cf36 100644 --- a/storage/xtradb/handler/handler0alter.cc +++ b/storage/xtradb/handler/handler0alter.cc @@ -894,6 +894,8 @@ error: prebuilt->trx->error_info = NULL; /* fall through */ default: + trx->error_state = DB_SUCCESS; + if (new_primary) { if (indexed_table != innodb_table) { row_merge_drop_table(trx, indexed_table); diff --git a/storage/xtradb/handler/i_s.cc b/storage/xtradb/handler/i_s.cc index 7cb4a7ac718..0f656528315 100644 --- a/storage/xtradb/handler/i_s.cc +++ b/storage/xtradb/handler/i_s.cc @@ -43,20 +43,11 @@ extern "C" { #include "ha_prototypes.h" /* for innobase_convert_name() */ #include "srv0start.h" /* for srv_was_started */ #include "btr0btr.h" /* for btr_page_get_index_id */ -#include "dict0dict.h" /* for dict_index_get_if_in_cache */ #include "trx0rseg.h" /* for trx_rseg_struct */ #include "trx0sys.h" /* for trx_sys */ #include "dict0dict.h" /* for dict_sys */ #include "btr0pcur.h" #include "buf0lru.h" /* for XTRA_LRU_[DUMP/RESTORE] */ -/* from buf0buf.c */ -struct buf_chunk_struct{ - ulint mem_size; /* allocated size of the chunk */ - ulint size; /* size of frames[] and blocks[] */ - void* mem; /* pointer to the memory area which - was allocated for the frames */ - buf_block_t* blocks; /* array of buffer control blocks */ -}; } static const char plugin_author[] = "Innobase Oy"; @@ -503,27 +494,11 @@ static ST_FIELD_INFO i_s_innodb_buffer_pool_pages_fields_info[] = static ST_FIELD_INFO i_s_innodb_buffer_pool_pages_index_fields_info[] = { - {STRUCT_FLD(field_name, "schema_name"), - STRUCT_FLD(field_length, 64), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - {STRUCT_FLD(field_name, "table_name"), - STRUCT_FLD(field_length, 64), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - {STRUCT_FLD(field_name, "index_name"), - STRUCT_FLD(field_length, 64), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), + {STRUCT_FLD(field_name, "index_id"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), STRUCT_FLD(old_name, ""), STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, @@ -724,7 +699,6 @@ i_s_innodb_buffer_pool_pages_fill( RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name); buf_pool_mutex_enter(); - mutex_enter(&(dict_sys->mutex)); chunk = buf_pool->chunks; @@ -796,7 +770,6 @@ i_s_innodb_buffer_pool_pages_fill( } } - mutex_exit(&(dict_sys->mutex)); buf_pool_mutex_exit(); DBUG_RETURN(status); @@ -817,13 +790,8 @@ i_s_innodb_buffer_pool_pages_index_fill( int status = 0; ulint n_chunks, n_blocks; - dict_index_t* index; dulint index_id; - const char *p; - char db_name_raw[NAME_LEN*5+1], db_name[NAME_LEN+1]; - char table_name_raw[NAME_LEN*5+1], table_name[NAME_LEN+1]; - buf_chunk_t* chunk; DBUG_ENTER("i_s_innodb_buffer_pool_pages_index_fill"); @@ -837,7 +805,6 @@ i_s_innodb_buffer_pool_pages_index_fill( RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name); buf_pool_mutex_enter(); - mutex_enter(&(dict_sys->mutex)); chunk = buf_pool->chunks; @@ -849,48 +816,28 @@ i_s_innodb_buffer_pool_pages_index_fill( if (fil_page_get_type(frame) == FIL_PAGE_INDEX) { index_id = btr_page_get_index_id(frame); - index = dict_index_get_if_in_cache_low(index_id); - if(index) - { - if((p = (char*) strchr(index->table_name, '/'))) - { - strncpy(db_name_raw, index->table_name, p-index->table_name); - db_name_raw[p-index->table_name] = 0; - filename_to_tablename(db_name_raw, db_name, sizeof(db_name)); - field_store_string(table->field[0], db_name); - p++; - } else { - field_store_string(table->field[0], NULL); - p = index->table_name; - } - strcpy(table_name_raw, (const char*)p); - filename_to_tablename(table_name_raw, table_name, sizeof(table_name)); - field_store_string(table->field[1], table_name); - field_store_string(table->field[2], index->name); - - table->field[3]->store(block->page.space); - table->field[4]->store(block->page.offset); - table->field[5]->store(page_get_n_recs(frame)); - table->field[6]->store(page_get_data_size(frame)); - table->field[7]->store(block->is_hashed); - table->field[8]->store(block->page.access_time); - table->field[9]->store(block->page.newest_modification != 0); - table->field[10]->store(block->page.oldest_modification != 0); - table->field[11]->store(block->page.old); - table->field[12]->store(0); - table->field[13]->store(block->page.buf_fix_count); - table->field[14]->store(block->page.flush_type); + table->field[0]->store(ut_conv_dulint_to_longlong(index_id)); + table->field[1]->store(block->page.space); + table->field[2]->store(block->page.offset); + table->field[3]->store(page_get_n_recs(frame)); + table->field[4]->store(page_get_data_size(frame)); + table->field[5]->store(block->is_hashed); + table->field[6]->store(block->page.access_time); + table->field[7]->store(block->page.newest_modification != 0); + table->field[8]->store(block->page.oldest_modification != 0); + table->field[9]->store(block->page.old); + table->field[10]->store(0); + table->field[11]->store(block->page.buf_fix_count); + table->field[12]->store(block->page.flush_type); - if (schema_table_store_record(thd, table)) { - status = 1; - break; - } + if (schema_table_store_record(thd, table)) { + status = 1; + break; } } } } - mutex_exit(&(dict_sys->mutex)); buf_pool_mutex_exit(); DBUG_RETURN(status); @@ -928,7 +875,6 @@ i_s_innodb_buffer_pool_pages_blob_fill( RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name); buf_pool_mutex_enter(); - mutex_enter(&(dict_sys->mutex)); chunk = buf_pool->chunks; @@ -984,7 +930,6 @@ i_s_innodb_buffer_pool_pages_blob_fill( } } - mutex_exit(&(dict_sys->mutex)); buf_pool_mutex_exit(); DBUG_RETURN(status); @@ -3998,6 +3943,35 @@ static ST_FIELD_INFO i_s_innodb_sys_indexes_info[] = END_OF_ST_FIELD_INFO }; +static ST_FIELD_INFO i_s_innodb_sys_stats_info[] = +{ + {STRUCT_FLD(field_name, "INDEX_ID"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + + {STRUCT_FLD(field_name, "KEY_COLS"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + + {STRUCT_FLD(field_name, "DIFF_VALS"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + + END_OF_ST_FIELD_INFO +}; + static int copy_string_field( @@ -4254,6 +4228,40 @@ copy_sys_indexes_rec( static int +copy_sys_stats_rec( +/*===============*/ + TABLE* table, + const dict_index_t* index, + const rec_t* rec +) +{ + int status; + int field; + + /* INDEX_ID */ + field = dict_index_get_nth_col_pos(index, 0); + status = copy_id_field(table, 0, rec, field); + if (status) { + return status; + } + /* KEY_COLS */ + field = dict_index_get_nth_col_pos(index, 1); + status = copy_int_field(table, 1, rec, field); + if (status) { + return status; + } + /* DIFF_VALS */ + field = dict_index_get_nth_col_pos(index, 2); + status = copy_id_field(table, 2, rec, field); + if (status) { + return status; + } + + return 0; +} + +static +int i_s_innodb_schema_table_fill( /*=========================*/ THD* thd, @@ -4281,6 +4289,8 @@ i_s_innodb_schema_table_fill( id = 0; } else if (innobase_strcasecmp(table_name, "innodb_sys_indexes") == 0) { id = 1; + } else if (innobase_strcasecmp(table_name, "innodb_sys_stats") == 0) { + id = 2; } else { DBUG_RETURN(1); } @@ -4294,8 +4304,10 @@ i_s_innodb_schema_table_fill( if (id == 0) { innodb_table = dict_table_get_low("SYS_TABLES"); - } else { + } else if (id == 1) { innodb_table = dict_table_get_low("SYS_INDEXES"); + } else { + innodb_table = dict_table_get_low("SYS_STATS"); } index = UT_LIST_GET_FIRST(innodb_table->indexes); @@ -4320,8 +4332,10 @@ i_s_innodb_schema_table_fill( if (id == 0) { status = copy_sys_tables_rec(table, index, rec); - } else { + } else if (id == 1) { status = copy_sys_indexes_rec(table, index, rec); + } else { + status = copy_sys_stats_rec(table, index, rec); } if (status) { btr_pcur_close(&pcur); @@ -4386,6 +4400,21 @@ i_s_innodb_sys_indexes_init( DBUG_RETURN(0); } +static +int +i_s_innodb_sys_stats_init( +/*======================*/ + void* p) +{ + DBUG_ENTER("i_s_innodb_sys_stats_init"); + ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p; + + schema->fields_info = i_s_innodb_sys_stats_info; + schema->fill_table = i_s_innodb_schema_table_fill; + + DBUG_RETURN(0); +} + UNIV_INTERN struct st_mysql_plugin i_s_innodb_sys_tables = { STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), @@ -4452,3 +4481,36 @@ UNIV_INTERN struct st_maria_plugin i_s_innodb_sys_indexes_maria = STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_BETA) }; +UNIV_INTERN struct st_mysql_plugin i_s_innodb_sys_stats = +{ + STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), + STRUCT_FLD(info, &i_s_info), + STRUCT_FLD(name, "INNODB_SYS_STATS"), + STRUCT_FLD(author, plugin_author), + STRUCT_FLD(descr, "InnoDB SYS_STATS table"), + STRUCT_FLD(license, PLUGIN_LICENSE_GPL), + STRUCT_FLD(init, i_s_innodb_sys_stats_init), + STRUCT_FLD(deinit, i_s_common_deinit), + STRUCT_FLD(version, 0x0100 /* 1.0 */), + STRUCT_FLD(status_vars, NULL), + STRUCT_FLD(system_vars, NULL), + STRUCT_FLD(__reserved1, NULL) +}; + +UNIV_INTERN struct st_maria_plugin i_s_innodb_sys_stats_maria = +{ + STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), + STRUCT_FLD(info, &i_s_info), + STRUCT_FLD(name, "INNODB_SYS_STATS"), + STRUCT_FLD(author, plugin_author), + STRUCT_FLD(descr, "InnoDB SYS_STATS table"), + STRUCT_FLD(license, PLUGIN_LICENSE_GPL), + STRUCT_FLD(init, i_s_innodb_sys_stats_init), + STRUCT_FLD(deinit, i_s_common_deinit), + STRUCT_FLD(version, 0x0100 /* 1.0 */), + STRUCT_FLD(status_vars, NULL), + STRUCT_FLD(system_vars, NULL), + STRUCT_FLD(version_info, "1.0"), + STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_BETA) +}; + diff --git a/storage/xtradb/handler/i_s.h b/storage/xtradb/handler/i_s.h index 8252c6d1fa2..7a5c3ead5ed 100644 --- a/storage/xtradb/handler/i_s.h +++ b/storage/xtradb/handler/i_s.h @@ -43,6 +43,7 @@ extern struct st_mysql_plugin i_s_innodb_index_stats; extern struct st_mysql_plugin i_s_innodb_admin_command; extern struct st_mysql_plugin i_s_innodb_sys_tables; extern struct st_mysql_plugin i_s_innodb_sys_indexes; +extern struct st_mysql_plugin i_s_innodb_sys_stats; extern struct st_maria_plugin i_s_innodb_buffer_pool_pages_maria; extern struct st_maria_plugin i_s_innodb_buffer_pool_pages_index_maria; @@ -61,5 +62,6 @@ extern struct st_maria_plugin i_s_innodb_index_stats_maria; extern struct st_maria_plugin i_s_innodb_admin_command_maria; extern struct st_maria_plugin i_s_innodb_sys_tables_maria; extern struct st_maria_plugin i_s_innodb_sys_indexes_maria; +extern struct st_maria_plugin i_s_innodb_sys_stats_maria; #endif /* i_s_h */ diff --git a/storage/xtradb/handler/innodb_patch_info.h b/storage/xtradb/handler/innodb_patch_info.h index 38b97411340..e68f12d0fec 100644 --- a/storage/xtradb/handler/innodb_patch_info.h +++ b/storage/xtradb/handler/innodb_patch_info.h @@ -47,5 +47,6 @@ struct innodb_enhancement { {"innodb_fast_checksum","Using the checksum on 32bit-unit calculation","incompatible for unpatched ver.","http://www.percona.com/docs/wiki/percona-xtradb"}, {"innodb_files_extend","allow >4GB transaction log files, and can vary universal page size of datafiles","incompatible for unpatched ver.","http://www.percona.com/docs/wiki/percona-xtradb"}, {"innodb_sys_tables_sys_indexes","Expose InnoDB SYS_TABLES and SYS_INDEXES schema tables","","http://www.percona.com/docs/wiki/percona-xtradb"}, +{"innodb_buffer_pool_shm","Put buffer pool contents to shared memory segment and reuse it at clean restart [experimental]","","http://www.percona.com/docs/wiki/percona-xtradb"}, {NULL, NULL, NULL, NULL} }; diff --git a/storage/xtradb/include/buf0buf.h b/storage/xtradb/include/buf0buf.h index f93510be6d6..a7854e3038d 100644 --- a/storage/xtradb/include/buf0buf.h +++ b/storage/xtradb/include/buf0buf.h @@ -36,6 +36,7 @@ Created 11/5/1995 Heikki Tuuri #include "ut0rbt.h" #ifndef UNIV_HOTBACKUP #include "os0proc.h" +#include "srv0srv.h" /** @name Modes for buf_page_get_gen */ /* @{ */ @@ -1301,11 +1302,23 @@ struct buf_block_struct{ /**********************************************************************//** Compute the hash fold value for blocks in buf_pool->zip_hash. */ /* @{ */ -#define BUF_POOL_ZIP_FOLD_PTR(ptr) ((ulint) (ptr) / UNIV_PAGE_SIZE) +/* the fold should be relative when srv_buffer_pool_shm_key is enabled */ +#define BUF_POOL_ZIP_FOLD_PTR(ptr) (!srv_buffer_pool_shm_key\ + ?((ulint) (ptr) / UNIV_PAGE_SIZE)\ + :((ulint) ((char*)ptr - (char*)(buf_pool->chunks->blocks->frame)) / UNIV_PAGE_SIZE)) #define BUF_POOL_ZIP_FOLD(b) BUF_POOL_ZIP_FOLD_PTR((b)->frame) #define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b)) /* @} */ +/** A chunk of buffers. The buffer pool is allocated in chunks. */ +struct buf_chunk_struct{ + ulint mem_size; /*!< allocated size of the chunk */ + ulint size; /*!< size of frames[] and blocks[] */ + void* mem; /*!< pointer to the memory area which + was allocated for the frames */ + buf_block_t* blocks; /*!< array of buffer control blocks */ +}; + /** @brief The buffer pool statistics structure. */ struct buf_pool_stat_struct{ ulint n_page_gets; /*!< number of page gets performed; diff --git a/storage/xtradb/include/buf0lru.h b/storage/xtradb/include/buf0lru.h index 0a7d01c95cf..d3b59e8b579 100644 --- a/storage/xtradb/include/buf0lru.h +++ b/storage/xtradb/include/buf0lru.h @@ -96,7 +96,7 @@ buf_LRU_insert_zip_clean( Try to free a block. If bpage is a descriptor of a compressed-only page, the descriptor object will be freed as well. -NOTE: If this function returns BUF_LRU_FREED, it will not temporarily +NOTE: If this function returns BUF_LRU_FREED, it will temporarily release buf_pool_mutex. Furthermore, the page frame will no longer be accessible via bpage. diff --git a/storage/xtradb/include/buf0rea.h b/storage/xtradb/include/buf0rea.h index 71f62ff7b49..56d3d24a3b7 100644 --- a/storage/xtradb/include/buf0rea.h +++ b/storage/xtradb/include/buf0rea.h @@ -158,8 +158,7 @@ buf_read_recv_pages( /** The size in pages of the area which the read-ahead algorithms read if invoked */ -#define BUF_READ_AHEAD_AREA \ - ut_min(64, ut_2_power_up(buf_pool->curr_size / 32)) +#define BUF_READ_AHEAD_AREA 64 /** @name Modes used in read-ahead @{ */ /** read only pages belonging to the insert buffer tree */ diff --git a/storage/xtradb/include/db0err.h b/storage/xtradb/include/db0err.h index 747e9b5364e..c841c2b4afe 100644 --- a/storage/xtradb/include/db0err.h +++ b/storage/xtradb/include/db0err.h @@ -28,6 +28,8 @@ Created 5/24/1996 Heikki Tuuri enum db_err { + DB_SUCCESS_LOCKED_REC = 9, /*!< like DB_SUCCESS, but a new + explicit record lock was created */ DB_SUCCESS = 10, /* The following are error codes */ diff --git a/storage/xtradb/include/dict0boot.h b/storage/xtradb/include/dict0boot.h index 1a13bd1503a..9239e031a7f 100644 --- a/storage/xtradb/include/dict0boot.h +++ b/storage/xtradb/include/dict0boot.h @@ -46,13 +46,14 @@ dict_hdr_get( /*=========*/ mtr_t* mtr); /*!< in: mtr */ /**********************************************************************//** -Returns a new row, table, index, or tree id. -@return the new id */ +Returns a new table, index, or space id. */ UNIV_INTERN -dulint +void dict_hdr_get_new_id( /*================*/ - ulint type); /*!< in: DICT_HDR_ROW_ID, ... */ + dulint* table_id, /*!< out: table id (not assigned if NULL) */ + dulint* index_id, /*!< out: index id (not assigned if NULL) */ + ulint* space_id); /*!< out: space id (not assigned if NULL) */ /**********************************************************************//** Returns a new row id. @return the new id */ @@ -100,6 +101,7 @@ dict_create(void); #define DICT_COLUMNS_ID ut_dulint_create(0, 2) #define DICT_INDEXES_ID ut_dulint_create(0, 3) #define DICT_FIELDS_ID ut_dulint_create(0, 4) +#define DICT_STATS_ID ut_dulint_create(0, 6) /* The following is a secondary index on SYS_TABLES */ #define DICT_TABLE_IDS_ID ut_dulint_create(0, 5) @@ -119,17 +121,21 @@ dict_create(void); #define DICT_HDR_ROW_ID 0 /* The latest assigned row id */ #define DICT_HDR_TABLE_ID 8 /* The latest assigned table id */ #define DICT_HDR_INDEX_ID 16 /* The latest assigned index id */ -#define DICT_HDR_MIX_ID 24 /* Obsolete, always 0. */ +#define DICT_HDR_MAX_SPACE_ID 24 /* The latest assigned space id, or 0*/ +#define DICT_HDR_MIX_ID_LOW 28 /* Obsolete,always DICT_HDR_FIRST_ID */ #define DICT_HDR_TABLES 32 /* Root of the table index tree */ #define DICT_HDR_TABLE_IDS 36 /* Root of the table index tree */ #define DICT_HDR_COLUMNS 40 /* Root of the column index tree */ #define DICT_HDR_INDEXES 44 /* Root of the index index tree */ #define DICT_HDR_FIELDS 48 /* Root of the index field index tree */ +#define DICT_HDR_STATS 52 /* Root of the stats tree */ #define DICT_HDR_FSEG_HEADER 56 /* Segment header for the tablespace segment into which the dictionary header is created */ + +#define DICT_HDR_XTRADB_MARK 256 /* Flag to distinguish expansion of XtraDB */ /*-------------------------------------------------------------*/ /* The field number of the page number field in the sys_indexes table @@ -139,11 +145,15 @@ clustered index */ #define DICT_SYS_INDEXES_TYPE_FIELD 6 #define DICT_SYS_INDEXES_NAME_FIELD 4 +#define DICT_SYS_STATS_DIFF_VALS_FIELD 4 + /* When a row id which is zero modulo this number (which must be a power of two) is assigned, the field DICT_HDR_ROW_ID on the dictionary header page is updated */ #define DICT_HDR_ROW_ID_WRITE_MARGIN 256 +#define DICT_HDR_XTRADB_FLAG ut_dulint_create(0x58545241UL,0x44425F31UL) /* "XTRADB_1" */ + #ifndef UNIV_NONINL #include "dict0boot.ic" #endif diff --git a/storage/xtradb/include/dict0crea.h b/storage/xtradb/include/dict0crea.h index cce1246b789..0249091a195 100644 --- a/storage/xtradb/include/dict0crea.h +++ b/storage/xtradb/include/dict0crea.h @@ -53,6 +53,14 @@ ind_create_graph_create( dict_index_t* index, /*!< in: index to create, built as a memory data structure */ mem_heap_t* heap); /*!< in: heap where created */ +/*********************************************************************//** +*/ +UNIV_INTERN +ind_node_t* +ind_insert_stats_graph_create( +/*==========================*/ + dict_index_t* index, + mem_heap_t* heap); /***********************************************************//** Creates a table. This is a high-level function used in SQL execution graphs. @return query thread to run next or NULL */ @@ -62,6 +70,13 @@ dict_create_table_step( /*===================*/ que_thr_t* thr); /*!< in: query thread */ /***********************************************************//** +*/ +UNIV_INTERN +que_thr_t* +dict_insert_stats_step( +/*===================*/ + que_thr_t* thr); +/***********************************************************//** Creates an index. This is a high-level function used in SQL execution graphs. @return query thread to run next or NULL */ @@ -170,6 +185,7 @@ struct ind_node_struct{ ins_node_t* field_def; /* child node which does the inserts of the field definitions; the row to be inserted is built by the parent node */ + ins_node_t* stats_def; commit_node_t* commit_node; /* child node which performs a commit after a successful index creation */ @@ -180,6 +196,7 @@ struct ind_node_struct{ dict_table_t* table; /*!< table which owns the index */ dtuple_t* ind_row;/* index definition row built */ ulint field_no;/* next field definition to insert */ + ulint stats_no; mem_heap_t* heap; /*!< memory heap used as auxiliary storage */ }; @@ -189,6 +206,7 @@ struct ind_node_struct{ #define INDEX_CREATE_INDEX_TREE 3 #define INDEX_COMMIT_WORK 4 #define INDEX_ADD_TO_CACHE 5 +#define INDEX_BUILD_STATS_COLS 6 #ifndef UNIV_NONINL #include "dict0crea.ic" diff --git a/storage/xtradb/include/dict0dict.h b/storage/xtradb/include/dict0dict.h index 0879e91ab33..3c5e620d3c1 100644 --- a/storage/xtradb/include/dict0dict.h +++ b/storage/xtradb/include/dict0dict.h @@ -352,6 +352,7 @@ dict_create_foreign_constraints( name before it: test.table2; the default database id the database of parameter name */ + size_t sql_length, /*!< in: length of sql_string */ const char* name, /*!< in: table full name in the normalized form database_name/table_name */ @@ -1039,8 +1040,9 @@ void dict_update_statistics_low( /*=======================*/ dict_table_t* table, /*!< in/out: table */ - ibool has_dict_mutex);/*!< in: TRUE if the caller has the + ibool has_dict_mutex, /*!< in: TRUE if the caller has the dictionary mutex */ + ibool sync); /*********************************************************************//** Calculates new estimates for table and index statistics. The statistics are used in query optimization. */ @@ -1048,7 +1050,8 @@ UNIV_INTERN void dict_update_statistics( /*===================*/ - dict_table_t* table); /*!< in/out: table */ + dict_table_t* table, /*!< in/out: table */ + ibool sync); /********************************************************************//** Reserves the dictionary system mutex for MySQL. */ UNIV_INTERN @@ -1159,6 +1162,7 @@ struct dict_sys_struct{ dict_table_t* sys_columns; /*!< SYS_COLUMNS table */ dict_table_t* sys_indexes; /*!< SYS_INDEXES table */ dict_table_t* sys_fields; /*!< SYS_FIELDS table */ + dict_table_t* sys_stats; /*!< SYS_STATS table */ }; #endif /* !UNIV_HOTBACKUP */ diff --git a/storage/xtradb/include/dict0mem.h b/storage/xtradb/include/dict0mem.h index ee3107a3be1..37c5a4a24fc 100644 --- a/storage/xtradb/include/dict0mem.h +++ b/storage/xtradb/include/dict0mem.h @@ -382,7 +382,7 @@ initialized to 0, NULL or FALSE in dict_mem_table_create(). */ struct dict_table_struct{ dulint id; /*!< id of the table */ mem_heap_t* heap; /*!< memory heap */ - const char* name; /*!< table name */ + char* name; /*!< table name */ const char* dir_path_of_temp_table;/*!< NULL or the directory path where a TEMPORARY table that was explicitly created by a user should be placed if diff --git a/storage/xtradb/include/fil0fil.h b/storage/xtradb/include/fil0fil.h index 163cacf2892..07c80ef8609 100644 --- a/storage/xtradb/include/fil0fil.h +++ b/storage/xtradb/include/fil0fil.h @@ -226,6 +226,16 @@ fil_space_create( 0 for uncompressed tablespaces */ ulint purpose);/*!< in: FIL_TABLESPACE, or FIL_LOG if log */ /*******************************************************************//** +Assigns a new space id for a new single-table tablespace. This works simply by +incrementing the global counter. If 4 billion id's is not enough, we may need +to recycle id's. +@return TRUE if assigned, FALSE if not */ +UNIV_INTERN +ibool +fil_assign_new_space_id( +/*====================*/ + ulint* space_id); /*!< in/out: space id */ +/*******************************************************************//** Returns the size of the space in pages. The tablespace must be cached in the memory cache. @return space size, 0 if space not found */ @@ -428,9 +438,7 @@ UNIV_INTERN ulint fil_create_new_single_table_tablespace( /*===================================*/ - ulint* space_id, /*!< in/out: space id; if this is != 0, - then this is an input parameter, - otherwise output */ + ulint space_id, /*!< in: space id */ const char* tablename, /*!< in: the table name in the usual databasename/tablename format of InnoDB, or a dir path to a temp @@ -499,16 +507,6 @@ UNIV_INTERN ulint fil_load_single_table_tablespaces(void); /*===================================*/ -/********************************************************************//** -If we need crash recovery, and we have called -fil_load_single_table_tablespaces() and dict_load_single_table_tablespaces(), -we can call this function to print an error message of orphaned .ibd files -for which there is not a data dictionary entry with a matching table name -and space id. */ -UNIV_INTERN -void -fil_print_orphaned_tablespaces(void); -/*================================*/ /*******************************************************************//** Returns TRUE if a single-table tablespace does not exist in the memory cache, or is being deleted there. diff --git a/storage/xtradb/include/ha_prototypes.h b/storage/xtradb/include/ha_prototypes.h index b737a00b3dc..445d94eeabb 100644 --- a/storage/xtradb/include/ha_prototypes.h +++ b/storage/xtradb/include/ha_prototypes.h @@ -215,11 +215,21 @@ innobase_casedn_str( /**********************************************************************//** Determines the connection character set. @return connection character set */ +UNIV_INTERN struct charset_info_st* innobase_get_charset( /*=================*/ void* mysql_thd); /*!< in: MySQL thread handle */ - +/**********************************************************************//** +Determines the current SQL statement. +@return SQL statement string */ +UNIV_INTERN +const char* +innobase_get_stmt( +/*==============*/ + void* mysql_thd, /*!< in: MySQL thread handle */ + size_t* length) /*!< out: length of the SQL statement */ + __attribute__((nonnull)); /******************************************************************//** This function is used to find the storage length in bytes of the first n characters for prefix indexes using a multibyte character set. The function @@ -258,4 +268,12 @@ thd_lock_wait_timeout( void* thd); /*!< in: thread handle (THD*), or NULL to query the global innodb_lock_wait_timeout */ +/******************************************************************//** +*/ + +ulong +thd_flush_log_at_trx_commit_session( +/*================================*/ + void* thd); + #endif diff --git a/storage/xtradb/include/hash0hash.h b/storage/xtradb/include/hash0hash.h index b17c21a45ef..9dc20cc057f 100644 --- a/storage/xtradb/include/hash0hash.h +++ b/storage/xtradb/include/hash0hash.h @@ -49,6 +49,28 @@ hash_table_t* hash_create( /*========*/ ulint n); /*!< in: number of array cells */ + +/*************************************************************//** +*/ +UNIV_INTERN +ulint +hash_create_needed( +/*===============*/ + ulint n); + +UNIV_INTERN +void +hash_create_init( +/*=============*/ + hash_table_t* table, + ulint n); + +UNIV_INTERN +void +hash_create_reuse( +/*==============*/ + hash_table_t* table); + #ifndef UNIV_HOTBACKUP /*************************************************************//** Creates a mutex array to protect a hash table. */ @@ -328,6 +350,33 @@ do {\ }\ } while (0) +/********************************************************************//** +Align nodes with moving location.*/ +#define HASH_OFFSET(TABLE, NODE_TYPE, PTR_NAME, FADDR, FOFFSET, BOFFSET) \ +do {\ + ulint i2222;\ + ulint cell_count2222;\ +\ + cell_count2222 = hash_get_n_cells(TABLE);\ +\ + for (i2222 = 0; i2222 < cell_count2222; i2222++) {\ + NODE_TYPE* node2222;\ +\ + if ((TABLE)->array[i2222].node) \ + (TABLE)->array[i2222].node = (void*)((char*)(TABLE)->array[i2222].node \ + + (((TABLE)->array[i2222].node > (void*)FADDR)?FOFFSET:BOFFSET));\ + node2222 = HASH_GET_FIRST((TABLE), i2222);\ +\ + while (node2222) {\ + if (node2222->PTR_NAME) \ + node2222->PTR_NAME = (void*)((char*)node2222->PTR_NAME \ + + ((((void*)node2222->PTR_NAME) > (void*)FADDR)?FOFFSET:BOFFSET));\ +\ + node2222 = node2222->PTR_NAME;\ + }\ + }\ +} while (0) + /************************************************************//** Gets the mutex index for a fold value in a hash table. @return mutex number */ diff --git a/storage/xtradb/include/lock0lock.h b/storage/xtradb/include/lock0lock.h index 89a6977b589..73f885ecf04 100644 --- a/storage/xtradb/include/lock0lock.h +++ b/storage/xtradb/include/lock0lock.h @@ -341,11 +341,12 @@ lock_sec_rec_modify_check_and_lock( que_thr_t* thr, /*!< in: query thread */ mtr_t* mtr); /*!< in/out: mini-transaction */ /*********************************************************************//** -Like the counterpart for a clustered index below, but now we read a +Like lock_clust_rec_read_check_and_lock(), but reads a secondary index record. -@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK, +or DB_QUE_THR_SUSPENDED */ UNIV_INTERN -ulint +enum db_err lock_sec_rec_read_check_and_lock( /*=============================*/ ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG @@ -372,9 +373,10 @@ if the query thread should anyway be suspended for some reason; if not, then puts the transaction and the query thread to the lock wait state and inserts a waiting request for a record lock to the lock queue. Sets the requested mode lock on the record. -@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK, +or DB_QUE_THR_SUSPENDED */ UNIV_INTERN -ulint +enum db_err lock_clust_rec_read_check_and_lock( /*===============================*/ ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG diff --git a/storage/xtradb/include/log0log.ic b/storage/xtradb/include/log0log.ic index 139f4041a36..1ce00fd7313 100644 --- a/storage/xtradb/include/log0log.ic +++ b/storage/xtradb/include/log0log.ic @@ -433,7 +433,10 @@ void log_free_check(void) /*================*/ { - /* ut_ad(sync_thread_levels_empty()); */ + +#ifdef UNIV_SYNC_DEBUG + ut_ad(sync_thread_levels_empty_gen(TRUE)); +#endif /* UNIV_SYNC_DEBUG */ if (log_sys->check_flush_or_checkpoint) { diff --git a/storage/xtradb/include/mach0data.ic b/storage/xtradb/include/mach0data.ic index ef20356bd31..96d2417ac81 100644 --- a/storage/xtradb/include/mach0data.ic +++ b/storage/xtradb/include/mach0data.ic @@ -36,7 +36,7 @@ mach_write_to_1( ulint n) /*!< in: ulint integer to be stored, >= 0, < 256 */ { ut_ad(b); - ut_ad(n <= 0xFFUL); + ut_ad((n | 0xFFUL) <= 0xFFUL); b[0] = (byte)n; } @@ -65,7 +65,7 @@ mach_write_to_2( ulint n) /*!< in: ulint integer to be stored */ { ut_ad(b); - ut_ad(n <= 0xFFFFUL); + ut_ad((n | 0xFFFFUL) <= 0xFFFFUL); b[0] = (byte)(n >> 8); b[1] = (byte)(n); @@ -81,10 +81,7 @@ mach_read_from_2( /*=============*/ const byte* b) /*!< in: pointer to 2 bytes */ { - ut_ad(b); - return( ((ulint)(b[0]) << 8) - + (ulint)(b[1]) - ); + return(((ulint)(b[0]) << 8) | (ulint)(b[1])); } /********************************************************//** @@ -129,7 +126,7 @@ mach_write_to_3( ulint n) /*!< in: ulint integer to be stored */ { ut_ad(b); - ut_ad(n <= 0xFFFFFFUL); + ut_ad((n | 0xFFFFFFUL) <= 0xFFFFFFUL); b[0] = (byte)(n >> 16); b[1] = (byte)(n >> 8); @@ -148,8 +145,8 @@ mach_read_from_3( { ut_ad(b); return( ((ulint)(b[0]) << 16) - + ((ulint)(b[1]) << 8) - + (ulint)(b[2]) + | ((ulint)(b[1]) << 8) + | (ulint)(b[2]) ); } @@ -183,9 +180,9 @@ mach_read_from_4( { ut_ad(b); return( ((ulint)(b[0]) << 24) - + ((ulint)(b[1]) << 16) - + ((ulint)(b[2]) << 8) - + (ulint)(b[3]) + | ((ulint)(b[1]) << 16) + | ((ulint)(b[2]) << 8) + | (ulint)(b[3]) ); } @@ -721,7 +718,7 @@ mach_read_from_2_little_endian( /*===========================*/ const byte* buf) /*!< in: from where to read */ { - return((ulint)(*buf) + ((ulint)(*(buf + 1))) * 256); + return((ulint)(buf[0]) | ((ulint)(buf[1]) << 8)); } /*********************************************************//** diff --git a/storage/xtradb/include/mtr0log.ic b/storage/xtradb/include/mtr0log.ic index db017c7d16e..63af02ba409 100644 --- a/storage/xtradb/include/mtr0log.ic +++ b/storage/xtradb/include/mtr0log.ic @@ -203,7 +203,7 @@ mlog_write_initial_log_record_fast( system tablespace */ if ((space == TRX_SYS_SPACE || (srv_doublewrite_file && space == TRX_DOUBLEWRITE_SPACE)) - && offset >= FSP_EXTENT_SIZE && offset < 3 * FSP_EXTENT_SIZE) { + && offset >= (ulint)FSP_EXTENT_SIZE && offset < 3 * (ulint)FSP_EXTENT_SIZE) { if (trx_doublewrite_buf_is_being_created) { /* Do nothing: we only come to this branch in an InnoDB database creation. We do not redo log diff --git a/storage/xtradb/include/os0proc.h b/storage/xtradb/include/os0proc.h index fd46bd7db87..582cef6f803 100644 --- a/storage/xtradb/include/os0proc.h +++ b/storage/xtradb/include/os0proc.h @@ -32,6 +32,11 @@ Created 9/30/1995 Heikki Tuuri #ifdef UNIV_LINUX #include <sys/ipc.h> #include <sys/shm.h> +#else +# if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H +#include <sys/ipc.h> +#include <sys/shm.h> +# endif #endif typedef void* os_process_t; @@ -70,6 +75,29 @@ os_mem_free_large( ulint size); /*!< in: size returned by os_mem_alloc_large() */ + +/****************************************************************//** +Allocates or attaches and reuses shared memory segment. +The content is not cleared automatically. +@return allocated memory */ +UNIV_INTERN +void* +os_shm_alloc( +/*=========*/ + ulint* n, /*!< in/out: number of bytes */ + uint key, + ibool* is_new); + +/****************************************************************//** +Detach shared memory segment. */ +UNIV_INTERN +void +os_shm_free( +/*========*/ + void *ptr, /*!< in: pointer returned by + os_shm_alloc() */ + ulint size); /*!< in: size returned by + os_shm_alloc() */ #ifndef UNIV_NONINL #include "os0proc.ic" #endif diff --git a/storage/xtradb/include/page0page.h b/storage/xtradb/include/page0page.h index e182c8f58be..5b2bcf7c054 100644 --- a/storage/xtradb/include/page0page.h +++ b/storage/xtradb/include/page0page.h @@ -500,7 +500,7 @@ ibool page_is_leaf( /*=========*/ const page_t* page) /*!< in: page */ - __attribute__((nonnull, pure)); + __attribute__((pure)); /************************************************************//** Gets the pointer to the next record on the page. @return pointer to next record */ diff --git a/storage/xtradb/include/page0page.ic b/storage/xtradb/include/page0page.ic index 9655e6c7e27..dab9dc742e4 100644 --- a/storage/xtradb/include/page0page.ic +++ b/storage/xtradb/include/page0page.ic @@ -275,6 +275,9 @@ page_is_leaf( /*=========*/ const page_t* page) /*!< in: page */ { + if (!page) { + return(FALSE); + } return(!*(const uint16*) (page + (PAGE_HEADER + PAGE_LEVEL))); } diff --git a/storage/xtradb/include/page0zip.h b/storage/xtradb/include/page0zip.h index 574809e5227..4d37302ed20 100644 --- a/storage/xtradb/include/page0zip.h +++ b/storage/xtradb/include/page0zip.h @@ -114,7 +114,7 @@ page_zip_compress( const page_t* page, /*!< in: uncompressed page */ dict_index_t* index, /*!< in: index of the B-tree node */ mtr_t* mtr) /*!< in: mini-transaction, or NULL */ - __attribute__((nonnull(1,2,3))); + __attribute__((nonnull(1,3))); /**********************************************************************//** Decompress a page. This function should tolerate errors on the compressed diff --git a/storage/xtradb/include/que0que.h b/storage/xtradb/include/que0que.h index 39f8d07af89..09a671f49b1 100644 --- a/storage/xtradb/include/que0que.h +++ b/storage/xtradb/include/que0que.h @@ -492,6 +492,8 @@ struct que_fork_struct{ #define QUE_NODE_CALL 31 #define QUE_NODE_EXIT 32 +#define QUE_NODE_INSERT_STATS 34 + /* Query thread states */ #define QUE_THR_RUNNING 1 #define QUE_THR_PROCEDURE_WAIT 2 diff --git a/storage/xtradb/include/rem0cmp.h b/storage/xtradb/include/rem0cmp.h index 421308af49b..fcea62ad486 100644 --- a/storage/xtradb/include/rem0cmp.h +++ b/storage/xtradb/include/rem0cmp.h @@ -148,7 +148,9 @@ cmp_rec_rec_simple( const rec_t* rec2, /*!< in: physical record */ const ulint* offsets1,/*!< in: rec_get_offsets(rec1, ...) */ const ulint* offsets2,/*!< in: rec_get_offsets(rec2, ...) */ - const dict_index_t* index); /*!< in: data dictionary index */ + const dict_index_t* index, /*!< in: data dictionary index */ + ibool* null_eq);/*!< out: set to TRUE if + found matching null values */ /*************************************************************//** This function is used to compare two physical records. Only the common first fields are compared, and if an externally stored field is diff --git a/storage/xtradb/include/row0mysql.h b/storage/xtradb/include/row0mysql.h index d2a8734c61f..a604f6e3724 100644 --- a/storage/xtradb/include/row0mysql.h +++ b/storage/xtradb/include/row0mysql.h @@ -253,15 +253,6 @@ row_table_got_default_clust_index( /*==============================*/ const dict_table_t* table); /*!< in: table */ /*********************************************************************//** -Calculates the key number used inside MySQL for an Innobase index. We have -to take into account if we generated a default clustered index for the table -@return the key number used inside MySQL */ -UNIV_INTERN -ulint -row_get_mysql_key_number_for_index( -/*===============================*/ - const dict_index_t* index); /*!< in: index */ -/*********************************************************************//** Does an update or delete of a row for MySQL. @return error code or DB_SUCCESS */ UNIV_INTERN @@ -273,27 +264,26 @@ row_update_for_mysql( row_prebuilt_t* prebuilt); /*!< in: prebuilt struct in MySQL handle */ /*********************************************************************//** -This can only be used when srv_locks_unsafe_for_binlog is TRUE or -session is using a READ COMMITTED isolation level. Before -calling this function we must use trx_reset_new_rec_lock_info() and -trx_register_new_rec_lock() to store the information which new record locks -really were set. This function removes a newly set lock under prebuilt->pcur, -and also under prebuilt->clust_pcur. Currently, this is only used and tested -in the case of an UPDATE or a DELETE statement, where the row lock is of the -LOCK_X type. -Thus, this implements a 'mini-rollback' that releases the latest record -locks we set. -@return error code or DB_SUCCESS */ +This can only be used when srv_locks_unsafe_for_binlog is TRUE or this +session is using a READ COMMITTED or READ UNCOMMITTED isolation level. +Before calling this function row_search_for_mysql() must have +initialized prebuilt->new_rec_locks to store the information which new +record locks really were set. This function removes a newly set +clustered index record lock under prebuilt->pcur or +prebuilt->clust_pcur. Thus, this implements a 'mini-rollback' that +releases the latest clustered index record lock we set. +@return error code or DB_SUCCESS */ UNIV_INTERN int row_unlock_for_mysql( /*=================*/ - row_prebuilt_t* prebuilt, /*!< in: prebuilt struct in MySQL + row_prebuilt_t* prebuilt, /*!< in/out: prebuilt struct in MySQL handle */ - ibool has_latches_on_recs);/*!< TRUE if called so that we have - the latches on the records under pcur - and clust_pcur, and we do not need to - reposition the cursors. */ + ibool has_latches_on_recs);/*!< in: TRUE if called + so that we have the latches on + the records under pcur and + clust_pcur, and we do not need + to reposition the cursors. */ /*********************************************************************//** Creates an query graph node of 'update' type to be used in the MySQL interface. @@ -386,6 +376,14 @@ row_create_index_for_mysql( then checked for not being too large. */ /*********************************************************************//** +*/ +UNIV_INTERN +int +row_insert_stats_for_mysql( +/*=======================*/ + dict_index_t* index, + trx_t* trx); +/*********************************************************************//** Scans a table create SQL string and adds to the data dictionary the foreign key constraints declared in the string. This function should be called after the indexes for a table have been created. @@ -403,6 +401,7 @@ row_table_add_foreign_constraints( FOREIGN KEY (a, b) REFERENCES table2(c, d), table2 can be written also with the database name before it: test.table2 */ + size_t sql_length, /*!< in: length of sql_string */ const char* name, /*!< in: table full name in the normalized form database_name/table_name */ @@ -710,18 +709,17 @@ struct row_prebuilt_struct { ulint new_rec_locks; /*!< normally 0; if srv_locks_unsafe_for_binlog is TRUE or session is using READ - COMMITTED isolation level, in a - cursor search, if we set a new - record lock on an index, this is - incremented; this is used in - releasing the locks under the - cursors if we are performing an - UPDATE and we determine after - retrieving the row that it does - not need to be locked; thus, - these can be used to implement a - 'mini-rollback' that releases - the latest record locks */ + COMMITTED or READ UNCOMMITTED + isolation level, set in + row_search_for_mysql() if we set a new + record lock on the secondary + or clustered index; this is + used in row_unlock_for_mysql() + when releasing the lock under + the cursor if we determine + after retrieving the row that + it does not need to be locked + ('mini-rollback') */ ulint mysql_prefix_len;/*!< byte offset of the end of the last requested column */ ulint mysql_row_len; /*!< length in bytes of a row in the diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h index a463075f435..0904a5da1eb 100644 --- a/storage/xtradb/include/srv0srv.h +++ b/storage/xtradb/include/srv0srv.h @@ -156,6 +156,8 @@ extern ulint srv_buf_pool_curr_size; /*!< current size in bytes */ extern ulint srv_mem_pool_size; extern ulint srv_lock_table_size; +extern uint srv_buffer_pool_shm_key; + extern ibool srv_thread_concurrency_timer_based; extern ulint srv_n_file_io_threads; @@ -207,6 +209,7 @@ extern ulong srv_stats_method; #define SRV_STATS_METHOD_IGNORE_NULLS 2 extern ulong srv_stats_auto_update; extern ulint srv_stats_update_need_lock; +extern ibool srv_use_sys_stats_table; extern ibool srv_use_doublewrite_buf; extern ibool srv_use_checksums; @@ -367,8 +370,9 @@ enum { when writing data files, but do flush after writing to log files */ SRV_UNIX_NOSYNC, /*!< do not flush after writing */ - SRV_UNIX_O_DIRECT /*!< invoke os_file_set_nocache() on + SRV_UNIX_O_DIRECT, /*!< invoke os_file_set_nocache() on data files */ + SRV_UNIX_ALL_O_DIRECT /* new method for examination: logfile also open O_DIRECT */ }; /** Alternatives for file i/o in Windows */ diff --git a/storage/xtradb/include/sync0rw.h b/storage/xtradb/include/sync0rw.h index 1fe517ab30a..4edf93f4042 100644 --- a/storage/xtradb/include/sync0rw.h +++ b/storage/xtradb/include/sync0rw.h @@ -556,11 +556,12 @@ struct rw_lock_struct { //unsigned cline:14; /*!< Line where created */ unsigned last_s_line:14; /*!< Line number where last time s-locked */ unsigned last_x_line:14; /*!< Line number where last time x-locked */ +#ifdef UNIV_DEBUG ulint magic_n; /*!< RW_LOCK_MAGIC_N */ -}; - /** Value of rw_lock_struct::magic_n */ #define RW_LOCK_MAGIC_N 22643 +#endif /* UNIV_DEBUG */ +}; #ifdef UNIV_SYNC_DEBUG /** The structure for storing debug info of an rw-lock */ diff --git a/storage/xtradb/include/sync0sync.h b/storage/xtradb/include/sync0sync.h index 7e210ea82f1..a500cf1da45 100644 --- a/storage/xtradb/include/sync0sync.h +++ b/storage/xtradb/include/sync0sync.h @@ -438,7 +438,7 @@ or row lock! */ #define SYNC_FILE_FORMAT_TAG 1200 /* Used to serialize access to the file format tag */ #define SYNC_DICT_OPERATION 1001 /* table create, drop, etc. reserve - this in X-mode, implicit or backround + this in X-mode; implicit or backround operations purge, rollback, foreign key checks reserve this in S-mode */ #define SYNC_DICT 1000 diff --git a/storage/xtradb/include/trx0sys.h b/storage/xtradb/include/trx0sys.h index 8b941cdd4e6..9ef9485b611 100644 --- a/storage/xtradb/include/trx0sys.h +++ b/storage/xtradb/include/trx0sys.h @@ -326,6 +326,7 @@ UNIV_INTERN void trx_sys_update_mysql_binlog_offset( /*===============================*/ + trx_sysf_t* sys_header, const char* file_name_in,/*!< in: MySQL log file name */ ib_int64_t offset, /*!< in: position in that log file */ ulint field, /*!< in: offset of the MySQL log info field in diff --git a/storage/xtradb/include/trx0trx.h b/storage/xtradb/include/trx0trx.h index 4a4b54b93a0..4c0ce392bcd 100644 --- a/storage/xtradb/include/trx0trx.h +++ b/storage/xtradb/include/trx0trx.h @@ -497,6 +497,7 @@ struct trx_struct{ FALSE, one can save CPU time and about 150 bytes in the undo log size as then we skip XA steps */ + ulint flush_log_at_trx_commit_session; ulint flush_log_later;/* In 2PC, we hold the prepare_commit mutex across both phases. In that case, we @@ -560,9 +561,6 @@ struct trx_struct{ /*------------------------------*/ void* mysql_thd; /*!< MySQL thread handle corresponding to this trx, or NULL */ - char** mysql_query_str;/* pointer to the field in mysqld_thd - which contains the pointer to the - current SQL query string */ const char* mysql_log_file_name; /* if MySQL binlog is used, this field contains a pointer to the latest file diff --git a/storage/xtradb/include/univ.i b/storage/xtradb/include/univ.i index 90ce618b9da..71476443964 100644 --- a/storage/xtradb/include/univ.i +++ b/storage/xtradb/include/univ.i @@ -46,8 +46,8 @@ Created 1/20/1994 Heikki Tuuri #define INNODB_VERSION_MAJOR 1 #define INNODB_VERSION_MINOR 0 -#define INNODB_VERSION_BUGFIX 8 -#define PERCONA_INNODB_VERSION 11.2 +#define INNODB_VERSION_BUGFIX 10 +#define PERCONA_INNODB_VERSION 12.0 /* The following is the InnoDB version as shown in SELECT plugin_version FROM information_schema.plugins; @@ -170,7 +170,7 @@ command. Not tested on Windows. */ #if defined(HAVE_valgrind)&& defined(HAVE_VALGRIND_MEMCHECK_H) # define UNIV_DEBUG_VALGRIND -#endif /* HAVE_valgrind */ +#endif #if 0 #define UNIV_DEBUG_VALGRIND /* Enable extra Valgrind instrumentation */ @@ -208,10 +208,6 @@ operations (very slow); also UNIV_DEBUG must be defined */ adaptive hash index */ #define UNIV_SRV_PRINT_LATCH_WAITS /* enable diagnostic output in sync0sync.c */ -#define UNIV_BTR_AVOID_COPY /* when splitting B-tree nodes, - do not move any records when - all the records would - be moved */ #define UNIV_BTR_PRINT /* enable functions for printing B-trees */ #define UNIV_ZIP_DEBUG /* extensive consistency checks @@ -301,6 +297,12 @@ management to ensure correct alignment for doubles etc. */ /* Maximum number of parallel threads in a parallelized operation */ #define UNIV_MAX_PARALLELISM 32 +/* The maximum length of a table name. This is the MySQL limit and is +defined in mysql_com.h like NAME_CHAR_LEN*SYSTEM_CHARSET_MBMAXLEN, the +number does not include a terminating '\0'. InnoDB probably can handle +longer names internally */ +#define MAX_TABLE_NAME_LEN 192 + /* UNIVERSAL TYPE DEFINITIONS ========================== diff --git a/storage/xtradb/include/ut0lst.h b/storage/xtradb/include/ut0lst.h index 7b15c052978..4305f6571b5 100644 --- a/storage/xtradb/include/ut0lst.h +++ b/storage/xtradb/include/ut0lst.h @@ -257,5 +257,48 @@ do { \ ut_a(ut_list_node_313 == NULL); \ } while (0) +/********************************************************************//** +Align nodes with moving location. +@param NAME the name of the list +@param TYPE node type +@param BASE base node (not a pointer to it) +@param OFFSET offset moved */ +#define UT_LIST_OFFSET(NAME, TYPE, BASE, FADDR, FOFFSET, BOFFSET) \ +do { \ + ulint ut_list_i_313; \ + TYPE* ut_list_node_313; \ + \ + if ((BASE).start) \ + (BASE).start = (void*)((char*)((BASE).start) \ + + (((void*)((BASE).start) > (void*)FADDR)?FOFFSET:BOFFSET));\ + if ((BASE).end) \ + (BASE).end = (void*)((char*)((BASE).end) \ + + (((void*)((BASE).end) > (void*)FADDR)?FOFFSET:BOFFSET));\ + \ + ut_list_node_313 = (BASE).start; \ + \ + for (ut_list_i_313 = (BASE).count; ut_list_i_313--; ) { \ + ut_a(ut_list_node_313); \ + if ((ut_list_node_313->NAME).prev) \ + (ut_list_node_313->NAME).prev = (void*)((char*)((ut_list_node_313->NAME).prev) \ + + (((void*)((ut_list_node_313->NAME).prev) > (void*)FADDR)?FOFFSET:BOFFSET));\ + if ((ut_list_node_313->NAME).next) \ + (ut_list_node_313->NAME).next = (void *)((char*)((ut_list_node_313->NAME).next) \ + + (((void*)((ut_list_node_313->NAME).next)> (void*)FADDR)?FOFFSET:BOFFSET));\ + ut_list_node_313 = (ut_list_node_313->NAME).next; \ + } \ + \ + ut_a(ut_list_node_313 == NULL); \ + \ + ut_list_node_313 = (BASE).end; \ + \ + for (ut_list_i_313 = (BASE).count; ut_list_i_313--; ) { \ + ut_a(ut_list_node_313); \ + ut_list_node_313 = (ut_list_node_313->NAME).prev; \ + } \ + \ + ut_a(ut_list_node_313 == NULL); \ +} while (0) + #endif diff --git a/storage/xtradb/lock/lock0lock.c b/storage/xtradb/lock/lock0lock.c index b103ee79578..7ec4a53e0ea 100644 --- a/storage/xtradb/lock/lock0lock.c +++ b/storage/xtradb/lock/lock0lock.c @@ -1733,11 +1733,11 @@ lock_rec_create( Enqueues a waiting request for a lock which cannot be granted immediately. Checks for deadlocks. @return DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED, or -DB_SUCCESS; DB_SUCCESS means that there was a deadlock, but another -transaction was chosen as a victim, and we got the lock immediately: -no need to wait then */ +DB_SUCCESS_LOCKED_REC; DB_SUCCESS_LOCKED_REC means that +there was a deadlock, but another transaction was chosen as a victim, +and we got the lock immediately: no need to wait then */ static -ulint +enum db_err lock_rec_enqueue_waiting( /*=====================*/ ulint type_mode,/*!< in: lock mode this @@ -1811,7 +1811,7 @@ lock_rec_enqueue_waiting( if (trx->wait_lock == NULL) { - return(DB_SUCCESS); + return(DB_SUCCESS_LOCKED_REC); } trx->que_state = TRX_QUE_LOCK_WAIT; @@ -1931,6 +1931,16 @@ somebody_waits: return(lock_rec_create(type_mode, block, heap_no, index, trx)); } +/** Record locking request status */ +enum lock_rec_req_status { + /** Failed to acquire a lock */ + LOCK_REC_FAIL, + /** Succeeded in acquiring a lock (implicit or already acquired) */ + LOCK_REC_SUCCESS, + /** Explicitly created a new lock */ + LOCK_REC_SUCCESS_CREATED +}; + /*********************************************************************//** This is a fast routine for locking a record in the most common cases: there are no explicit locks on the page, or there is just one lock, owned @@ -1938,9 +1948,9 @@ by this transaction, and of the right type_mode. This is a low-level function which does NOT look at implicit locks! Checks lock compatibility within explicit locks. This function sets a normal next-key lock, or in the case of a page supremum record, a gap type lock. -@return TRUE if locking succeeded */ +@return whether the locking succeeded */ UNIV_INLINE -ibool +enum lock_rec_req_status lock_rec_lock_fast( /*===============*/ ibool impl, /*!< in: if TRUE, no lock is set @@ -1979,19 +1989,19 @@ lock_rec_lock_fast( lock_rec_create(mode, block, heap_no, index, trx); } - return(TRUE); + return(LOCK_REC_SUCCESS_CREATED); } if (lock_rec_get_next_on_page(lock)) { - return(FALSE); + return(LOCK_REC_FAIL); } if (lock->trx != trx || lock->type_mode != (mode | LOCK_REC) || lock_rec_get_n_bits(lock) <= heap_no) { - return(FALSE); + return(LOCK_REC_FAIL); } if (!impl) { @@ -2000,10 +2010,11 @@ lock_rec_lock_fast( if (!lock_rec_get_nth_bit(lock, heap_no)) { lock_rec_set_nth_bit(lock, heap_no); + return(LOCK_REC_SUCCESS_CREATED); } } - return(TRUE); + return(LOCK_REC_SUCCESS); } /*********************************************************************//** @@ -2011,9 +2022,10 @@ This is the general, and slower, routine for locking a record. This is a low-level function which does NOT look at implicit locks! Checks lock compatibility within explicit locks. This function sets a normal next-key lock, or in the case of a page supremum record, a gap type lock. -@return DB_SUCCESS, DB_LOCK_WAIT, or error code */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK, +or DB_QUE_THR_SUSPENDED */ static -ulint +enum db_err lock_rec_lock_slow( /*===============*/ ibool impl, /*!< in: if TRUE, no lock is set @@ -2030,7 +2042,6 @@ lock_rec_lock_slow( que_thr_t* thr) /*!< in: query thread */ { trx_t* trx; - ulint err; ut_ad(mutex_own(&kernel_mutex)); ut_ad((LOCK_MODE_MASK & mode) != LOCK_S @@ -2049,27 +2060,23 @@ lock_rec_lock_slow( /* The trx already has a strong enough lock on rec: do nothing */ - err = DB_SUCCESS; } else if (lock_rec_other_has_conflicting(mode, block, heap_no, trx)) { /* If another transaction has a non-gap conflicting request in the queue, as this transaction does not have a lock strong enough already granted on the record, we have to wait. */ - err = lock_rec_enqueue_waiting(mode, block, heap_no, - index, thr); - } else { - if (!impl) { - /* Set the requested lock on the record */ - - lock_rec_add_to_queue(LOCK_REC | mode, block, - heap_no, index, trx); - } + return(lock_rec_enqueue_waiting(mode, block, heap_no, + index, thr)); + } else if (!impl) { + /* Set the requested lock on the record */ - err = DB_SUCCESS; + lock_rec_add_to_queue(LOCK_REC | mode, block, + heap_no, index, trx); + return(DB_SUCCESS_LOCKED_REC); } - return(err); + return(DB_SUCCESS); } /*********************************************************************//** @@ -2078,9 +2085,10 @@ possible, enqueues a waiting lock request. This is a low-level function which does NOT look at implicit locks! Checks lock compatibility within explicit locks. This function sets a normal next-key lock, or in the case of a page supremum record, a gap type lock. -@return DB_SUCCESS, DB_LOCK_WAIT, or error code */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK, +or DB_QUE_THR_SUSPENDED */ static -ulint +enum db_err lock_rec_lock( /*==========*/ ibool impl, /*!< in: if TRUE, no lock is set @@ -2096,8 +2104,6 @@ lock_rec_lock( dict_index_t* index, /*!< in: index of record */ que_thr_t* thr) /*!< in: query thread */ { - ulint err; - ut_ad(mutex_own(&kernel_mutex)); ut_ad((LOCK_MODE_MASK & mode) != LOCK_S || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS)); @@ -2109,18 +2115,20 @@ lock_rec_lock( || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP || mode - (LOCK_MODE_MASK & mode) == 0); - if (lock_rec_lock_fast(impl, mode, block, heap_no, index, thr)) { - - /* We try a simplified and faster subroutine for the most - common cases */ - - err = DB_SUCCESS; - } else { - err = lock_rec_lock_slow(impl, mode, block, - heap_no, index, thr); + /* We try a simplified and faster subroutine for the most + common cases */ + switch (lock_rec_lock_fast(impl, mode, block, heap_no, index, thr)) { + case LOCK_REC_SUCCESS: + return(DB_SUCCESS); + case LOCK_REC_SUCCESS_CREATED: + return(DB_SUCCESS_LOCKED_REC); + case LOCK_REC_FAIL: + return(lock_rec_lock_slow(impl, mode, block, + heap_no, index, thr)); } - return(err); + ut_error; + return(DB_ERROR); } /*********************************************************************//** @@ -3948,8 +3956,8 @@ lock_rec_unlock( const rec_t* rec, /*!< in: record */ enum lock_mode lock_mode)/*!< in: LOCK_S or LOCK_X */ { + lock_t* first_lock; lock_t* lock; - lock_t* release_lock = NULL; ulint heap_no; ut_ad(trx && rec); @@ -3959,48 +3967,40 @@ lock_rec_unlock( mutex_enter(&kernel_mutex); - lock = lock_rec_get_first(block, heap_no); + first_lock = lock_rec_get_first(block, heap_no); /* Find the last lock with the same lock_mode and transaction from the record. */ - while (lock != NULL) { + for (lock = first_lock; lock != NULL; + lock = lock_rec_get_next(heap_no, lock)) { if (lock->trx == trx && lock_get_mode(lock) == lock_mode) { - release_lock = lock; ut_a(!lock_get_wait(lock)); + lock_rec_reset_nth_bit(lock, heap_no); + goto released; } - - lock = lock_rec_get_next(heap_no, lock); } - /* If a record lock is found, release the record lock */ - - if (UNIV_LIKELY(release_lock != NULL)) { - lock_rec_reset_nth_bit(release_lock, heap_no); - } else { - mutex_exit(&kernel_mutex); - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Error: unlock row could not" - " find a %lu mode lock on the record\n", - (ulong) lock_mode); + mutex_exit(&kernel_mutex); + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Error: unlock row could not" + " find a %lu mode lock on the record\n", + (ulong) lock_mode); - return; - } + return; +released: /* Check if we can now grant waiting lock requests */ - lock = lock_rec_get_first(block, heap_no); - - while (lock != NULL) { + for (lock = first_lock; lock != NULL; + lock = lock_rec_get_next(heap_no, lock)) { if (lock_get_wait(lock) && !lock_rec_has_to_wait_in_queue(lock)) { /* Grant the lock */ lock_grant(lock); } - - lock = lock_rec_get_next(heap_no, lock); } mutex_exit(&kernel_mutex); @@ -5095,7 +5095,14 @@ lock_rec_insert_check_and_lock( lock_mutex_exit_kernel(); - if ((err == DB_SUCCESS) && !dict_index_is_clust(index)) { + switch (err) { + case DB_SUCCESS_LOCKED_REC: + err = DB_SUCCESS; + /* fall through */ + case DB_SUCCESS: + if (dict_index_is_clust(index)) { + break; + } /* Update the page max trx id field */ page_update_max_trx_id(block, buf_block_get_page_zip(block), @@ -5218,6 +5225,10 @@ lock_clust_rec_modify_check_and_lock( ut_ad(lock_rec_queue_validate(block, rec, index, offsets)); + if (UNIV_UNLIKELY(err == DB_SUCCESS_LOCKED_REC)) { + err = DB_SUCCESS; + } + return(err); } @@ -5284,22 +5295,27 @@ lock_sec_rec_modify_check_and_lock( } #endif /* UNIV_DEBUG */ - if (err == DB_SUCCESS) { + if (err == DB_SUCCESS || err == DB_SUCCESS_LOCKED_REC) { /* Update the page max trx id field */ + /* It might not be necessary to do this if + err == DB_SUCCESS (no new lock created), + but it should not cost too much performance. */ page_update_max_trx_id(block, buf_block_get_page_zip(block), thr_get_trx(thr)->id, mtr); + err = DB_SUCCESS; } return(err); } /*********************************************************************//** -Like the counterpart for a clustered index below, but now we read a +Like lock_clust_rec_read_check_and_lock(), but reads a secondary index record. -@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK, +or DB_QUE_THR_SUSPENDED */ UNIV_INTERN -ulint +enum db_err lock_sec_rec_read_check_and_lock( /*=============================*/ ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG @@ -5320,8 +5336,8 @@ lock_sec_rec_read_check_and_lock( LOCK_REC_NOT_GAP */ que_thr_t* thr) /*!< in: query thread */ { - ulint err; - ulint heap_no; + enum db_err err; + ulint heap_no; ut_ad(!dict_index_is_clust(index)); ut_ad(block->frame == page_align(rec)); @@ -5372,9 +5388,10 @@ if the query thread should anyway be suspended for some reason; if not, then puts the transaction and the query thread to the lock wait state and inserts a waiting request for a record lock to the lock queue. Sets the requested mode lock on the record. -@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK, +or DB_QUE_THR_SUSPENDED */ UNIV_INTERN -ulint +enum db_err lock_clust_rec_read_check_and_lock( /*===============================*/ ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG @@ -5395,8 +5412,8 @@ lock_clust_rec_read_check_and_lock( LOCK_REC_NOT_GAP */ que_thr_t* thr) /*!< in: query thread */ { - ulint err; - ulint heap_no; + enum db_err err; + ulint heap_no; ut_ad(dict_index_is_clust(index)); ut_ad(block->frame == page_align(rec)); @@ -5467,17 +5484,22 @@ lock_clust_rec_read_check_and_lock_alt( mem_heap_t* tmp_heap = NULL; ulint offsets_[REC_OFFS_NORMAL_SIZE]; ulint* offsets = offsets_; - ulint ret; + ulint err; rec_offs_init(offsets_); offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &tmp_heap); - ret = lock_clust_rec_read_check_and_lock(flags, block, rec, index, + err = lock_clust_rec_read_check_and_lock(flags, block, rec, index, offsets, mode, gap_mode, thr); if (tmp_heap) { mem_heap_free(tmp_heap); } - return(ret); + + if (UNIV_UNLIKELY(err == DB_SUCCESS_LOCKED_REC)) { + err = DB_SUCCESS; + } + + return(err); } /*******************************************************************//** diff --git a/storage/xtradb/log/log0log.c b/storage/xtradb/log/log0log.c index 03d097d1c12..fade31037b5 100644 --- a/storage/xtradb/log/log0log.c +++ b/storage/xtradb/log/log0log.c @@ -1111,6 +1111,7 @@ log_io_complete( group = (log_group_t*)((ulint)group - 1); if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC + && srv_unix_file_flush_method != SRV_UNIX_ALL_O_DIRECT && srv_unix_file_flush_method != SRV_UNIX_NOSYNC) { fil_flush(group->space_id); @@ -1132,6 +1133,7 @@ log_io_complete( logs and cannot end up here! */ if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC + && srv_unix_file_flush_method != SRV_UNIX_ALL_O_DIRECT && srv_unix_file_flush_method != SRV_UNIX_NOSYNC && srv_flush_log_at_trx_commit != 2) { @@ -1512,7 +1514,8 @@ loop: mutex_exit(&(log_sys->mutex)); - if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) { + if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC + || srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT) { /* O_DSYNC means the OS did not buffer the log file at all: so we have also flushed to disk what we have written */ diff --git a/storage/xtradb/log/log0recv.c b/storage/xtradb/log/log0recv.c index 1c9b4960ee4..bbb634addb0 100644 --- a/storage/xtradb/log/log0recv.c +++ b/storage/xtradb/log/log0recv.c @@ -2965,9 +2965,12 @@ recv_recovery_from_checkpoint_start_func( ib_uint64_t contiguous_lsn; ib_uint64_t archived_lsn; byte* buf; - byte log_hdr_buf[LOG_FILE_HDR_SIZE]; + byte* log_hdr_buf; + byte log_hdr_buf_base[LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE]; ulint err; + log_hdr_buf = ut_align(log_hdr_buf_base, OS_FILE_LOG_BLOCK_SIZE); + #ifdef UNIV_LOG_ARCHIVE ut_ad(type != LOG_CHECKPOINT || limit_lsn == IB_ULONGLONG_MAX); /** TRUE when recovering from a checkpoint */ diff --git a/storage/xtradb/os/os0file.c b/storage/xtradb/os/os0file.c index c79a41626c9..48d796c38e1 100644 --- a/storage/xtradb/os/os0file.c +++ b/storage/xtradb/os/os0file.c @@ -214,7 +214,7 @@ static os_aio_array_t* os_aio_sync_array = NULL; /*!< Synchronous I/O */ /* Per thread buffer used for merged IO requests. Used by os_aio_simulated_handle so that a buffer doesn't have to be allocated for each request. */ -static char* os_aio_thread_buffer[SRV_MAX_N_IO_THREADS]; +static byte* os_aio_thread_buffer[SRV_MAX_N_IO_THREADS]; static ulint os_aio_thread_buffer_size[SRV_MAX_N_IO_THREADS]; /** Number of asynchronous I/O segments. Set by os_aio_init(). */ @@ -1379,7 +1379,11 @@ try_again: /* When srv_file_per_table is on, file creation failure may not be critical to the whole instance. Do not crash the server in - case of unknown errors. */ + case of unknown errors. + Please note "srv_file_per_table" is a global variable with + no explicit synchronization protection. It could be + changed during this execution path. It might not have the + same value as the one when building the table definition */ if (srv_file_per_table) { retry = os_file_handle_error_no_exit(name, create_mode == OS_FILE_CREATE ? @@ -1466,7 +1470,11 @@ try_again: /* When srv_file_per_table is on, file creation failure may not be critical to the whole instance. Do not crash the server in - case of unknown errors. */ + case of unknown errors. + Please note "srv_file_per_table" is a global variable with + no explicit synchronization protection. It could be + changed during this execution path. It might not have the + same value as the one when building the table definition */ if (srv_file_per_table) { retry = os_file_handle_error_no_exit(name, create_mode == OS_FILE_CREATE ? @@ -1494,6 +1502,11 @@ try_again: os_file_set_nocache(file, name, mode_str); } + /* ALL_O_DIRECT: O_DIRECT also for transaction log file */ + if (srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT) { + os_file_set_nocache(file, name, mode_str); + } + #ifdef USE_FILE_LOCK if (create_mode != OS_FILE_OPEN_RAW && os_file_lock(file, name)) { diff --git a/storage/xtradb/os/os0proc.c b/storage/xtradb/os/os0proc.c index 48922886f23..c101db3d179 100644 --- a/storage/xtradb/os/os0proc.c +++ b/storage/xtradb/os/os0proc.c @@ -229,3 +229,175 @@ os_mem_free_large( } #endif } + +/****************************************************************//** +Allocates or attaches and reuses shared memory segment. +The content is not cleared automatically. +@return allocated memory */ +UNIV_INTERN +void* +os_shm_alloc( +/*=========*/ + ulint* n, /*!< in/out: number of bytes */ + uint key, + ibool* is_new) +{ + void* ptr; +#if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H + ulint size; + int shmid; +#endif + + *is_new = FALSE; +#if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H + fprintf(stderr, + "InnoDB: The shared memory key %#x (%d) is specified.\n", + key, key); +# if defined HAVE_LARGE_PAGES && defined UNIV_LINUX + if (!os_use_large_pages || !os_large_page_size) { + goto skip; + } + + /* Align block size to os_large_page_size */ + ut_ad(ut_is_2pow(os_large_page_size)); + size = ut_2pow_round(*n + (os_large_page_size - 1), + os_large_page_size); + + shmid = shmget((key_t)key, (size_t)size, + IPC_CREAT | IPC_EXCL | SHM_HUGETLB | SHM_R | SHM_W); + if (shmid < 0) { + if (errno == EEXIST) { + fprintf(stderr, + "InnoDB: HugeTLB: The shared memory segment seems to exist already.\n"); + shmid = shmget((key_t)key, (size_t)size, + SHM_HUGETLB | SHM_R | SHM_W); + if (shmid < 0) { + fprintf(stderr, + "InnoDB: HugeTLB: Warning: Failed to allocate %lu bytes.(reuse) errno %d\n", + size, errno); + goto skip; + } else { + fprintf(stderr, + "InnoDB: HugeTLB: The existent shared memory segment is used.\n"); + } + } else { + fprintf(stderr, + "InnoDB: HugeTLB: Warning: Failed to allocate %lu bytes.(new) errno %d\n", + size, errno); + goto skip; + } + } else { + *is_new = TRUE; + fprintf(stderr, + "InnoDB: HugeTLB: The new shared memory segment is created.\n"); + } + + ptr = shmat(shmid, NULL, 0); + if (ptr == (void *)-1) { + fprintf(stderr, + "InnoDB: HugeTLB: Warning: Failed to attach shared memory segment, errno %d\n", + errno); + ptr = NULL; + } + + if (ptr) { + *n = size; + os_fast_mutex_lock(&ut_list_mutex); + ut_total_allocated_memory += size; + os_fast_mutex_unlock(&ut_list_mutex); + UNIV_MEM_ALLOC(ptr, size); + return(ptr); + } +skip: + *is_new = FALSE; +# endif /* HAVE_LARGE_PAGES && defined UNIV_LINUX */ +# ifdef HAVE_GETPAGESIZE + size = getpagesize(); +# else + size = UNIV_PAGE_SIZE; +# endif + /* Align block size to system page size */ + ut_ad(ut_is_2pow(size)); + size = *n = ut_2pow_round(*n + (size - 1), size); + + shmid = shmget((key_t)key, (size_t)size, + IPC_CREAT | IPC_EXCL | SHM_R | SHM_W); + if (shmid < 0) { + if (errno == EEXIST) { + fprintf(stderr, + "InnoDB: The shared memory segment seems to exist already.\n"); + shmid = shmget((key_t)key, (size_t)size, + SHM_R | SHM_W); + if (shmid < 0) { + fprintf(stderr, + "InnoDB: Warning: Failed to allocate %lu bytes.(reuse) errno %d\n", + size, errno); + ptr = NULL; + goto end; + } else { + fprintf(stderr, + "InnoDB: The existent shared memory segment is used.\n"); + } + } else { + fprintf(stderr, + "InnoDB: Warning: Failed to allocate %lu bytes.(new) errno %d\n", + size, errno); + ptr = NULL; + goto end; + } + } else { + *is_new = TRUE; + fprintf(stderr, + "InnoDB: The new shared memory segment is created.\n"); + } + + ptr = shmat(shmid, NULL, 0); + if (ptr == (void *)-1) { + fprintf(stderr, + "InnoDB: Warning: Failed to attach shared memory segment, errno %d\n", + errno); + ptr = NULL; + } + + if (ptr) { + *n = size; + os_fast_mutex_lock(&ut_list_mutex); + ut_total_allocated_memory += size; + os_fast_mutex_unlock(&ut_list_mutex); + UNIV_MEM_ALLOC(ptr, size); + } +end: +#else /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */ + fprintf(stderr, "InnoDB: shared memory segment is not supported.\n"); + ptr = NULL; +#endif /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */ + return(ptr); +} + +/****************************************************************//** +Detach shared memory segment. */ +UNIV_INTERN +void +os_shm_free( +/*========*/ + void *ptr, /*!< in: pointer returned by + os_shm_alloc() */ + ulint size) /*!< in: size returned by + os_shm_alloc() */ +{ + os_fast_mutex_lock(&ut_list_mutex); + ut_a(ut_total_allocated_memory >= size); + os_fast_mutex_unlock(&ut_list_mutex); + +#if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H + if (!shmdt(ptr)) { + os_fast_mutex_lock(&ut_list_mutex); + ut_a(ut_total_allocated_memory >= size); + ut_total_allocated_memory -= size; + os_fast_mutex_unlock(&ut_list_mutex); + UNIV_MEM_FREE(ptr, size); + } +#else /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */ + fprintf(stderr, "InnoDB: shared memory segment is not supported.\n"); +#endif /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */ +} diff --git a/storage/xtradb/page/page0zip.c b/storage/xtradb/page/page0zip.c index 7ef44f3246f..a94d2d54417 100644 --- a/storage/xtradb/page/page0zip.c +++ b/storage/xtradb/page/page0zip.c @@ -571,7 +571,7 @@ page_zip_dir_encode( /* Traverse the list of stored records in the collation order, starting from the first user record. */ - rec = page + PAGE_NEW_INFIMUM, TRUE; + rec = page + PAGE_NEW_INFIMUM; i = 0; @@ -1153,6 +1153,10 @@ page_zip_compress( FILE* logfile = NULL; #endif + if (!page) { + return(FALSE); + } + ut_a(page_is_comp(page)); ut_a(fil_page_get_type(page) == FIL_PAGE_INDEX); ut_ad(page_simple_validate_new((page_t*) page)); @@ -1464,6 +1468,7 @@ page_zip_fields_free( dict_table_t* table = index->table; mem_heap_free(index->heap); mutex_free(&(table->autoinc_mutex)); + ut_free(table->name); mem_heap_free(table->heap); } } diff --git a/storage/xtradb/que/que0que.c b/storage/xtradb/que/que0que.c index 2fe046fa9b8..5c85a04d139 100644 --- a/storage/xtradb/que/que0que.c +++ b/storage/xtradb/que/que0que.c @@ -622,11 +622,21 @@ que_graph_free_recursive( que_graph_free_recursive(cre_ind->ind_def); que_graph_free_recursive(cre_ind->field_def); + if (srv_use_sys_stats_table) + que_graph_free_recursive(cre_ind->stats_def); que_graph_free_recursive(cre_ind->commit_node); mem_heap_free(cre_ind->heap); break; + case QUE_NODE_INSERT_STATS: + cre_ind = node; + + que_graph_free_recursive(cre_ind->stats_def); + que_graph_free_recursive(cre_ind->commit_node); + + mem_heap_free(cre_ind->heap); + break; case QUE_NODE_PROC: que_graph_free_stat_list(((proc_node_t*)node)->stat_list); @@ -1139,6 +1149,8 @@ que_node_print_info( str = "CREATE TABLE"; } else if (type == QUE_NODE_CREATE_INDEX) { str = "CREATE INDEX"; + } else if (type == QUE_NODE_INSERT_STATS) { + str = "INSERT TO SYS_STATS"; } else if (type == QUE_NODE_FOR) { str = "FOR LOOP"; } else if (type == QUE_NODE_RETURN) { @@ -1256,6 +1268,8 @@ que_thr_step( thr = dict_create_table_step(thr); } else if (type == QUE_NODE_CREATE_INDEX) { thr = dict_create_index_step(thr); + } else if (type == QUE_NODE_INSERT_STATS) { + thr = dict_insert_stats_step(thr); } else if (type == QUE_NODE_ROW_PRINTF) { thr = row_printf_step(thr); } else { diff --git a/storage/xtradb/rem/rem0cmp.c b/storage/xtradb/rem/rem0cmp.c index 45230f1d7b1..8ee434f85da 100644 --- a/storage/xtradb/rem/rem0cmp.c +++ b/storage/xtradb/rem/rem0cmp.c @@ -706,7 +706,9 @@ cmp_rec_rec_simple( const rec_t* rec2, /*!< in: physical record */ const ulint* offsets1,/*!< in: rec_get_offsets(rec1, ...) */ const ulint* offsets2,/*!< in: rec_get_offsets(rec2, ...) */ - const dict_index_t* index) /*!< in: data dictionary index */ + const dict_index_t* index, /*!< in: data dictionary index */ + ibool* null_eq)/*!< out: set to TRUE if + found matching null values */ { ulint rec1_f_len; /*!< length of current field in rec1 */ const byte* rec1_b_ptr; /*!< pointer to the current byte @@ -753,6 +755,9 @@ cmp_rec_rec_simple( || rec2_f_len == UNIV_SQL_NULL) { if (rec1_f_len == rec2_f_len) { + if (null_eq) { + *null_eq = TRUE; + } goto next_field; diff --git a/storage/xtradb/row/row0ins.c b/storage/xtradb/row/row0ins.c index d7475d613ad..d4925e46f97 100644 --- a/storage/xtradb/row/row0ins.c +++ b/storage/xtradb/row/row0ins.c @@ -51,6 +51,15 @@ Created 4/20/1996 Heikki Tuuri #define ROW_INS_PREV 1 #define ROW_INS_NEXT 2 +/************************************************************************* +IMPORTANT NOTE: Any operation that generates redo MUST check that there +is enough space in the redo log before for that operation. This is +done by calling log_free_check(). The reason for checking the +availability of the redo log space before the start of the operation is +that we MUST not hold any synchonization objects when performing the +check. +If you make a change in this module make sure that no codepath is +introduced where a call to log_free_check() is bypassed. */ /*********************************************************************//** Creates an insert node struct. @@ -1121,9 +1130,9 @@ nonstandard_exit_func: /*********************************************************************//** Sets a shared lock on a record. Used in locking possible duplicate key records and also in checking foreign key constraints. -@return DB_SUCCESS or error code */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */ static -ulint +enum db_err row_ins_set_shared_rec_lock( /*========================*/ ulint type, /*!< in: LOCK_ORDINARY, LOCK_GAP, or @@ -1134,7 +1143,7 @@ row_ins_set_shared_rec_lock( const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ que_thr_t* thr) /*!< in: query thread */ { - ulint err; + enum db_err err; ut_ad(rec_offs_validate(rec, index, offsets)); @@ -1152,9 +1161,9 @@ row_ins_set_shared_rec_lock( /*********************************************************************//** Sets a exclusive lock on a record. Used in locking possible duplicate key records -@return DB_SUCCESS or error code */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */ static -ulint +enum db_err row_ins_set_exclusive_rec_lock( /*===========================*/ ulint type, /*!< in: LOCK_ORDINARY, LOCK_GAP, or @@ -1165,7 +1174,7 @@ row_ins_set_exclusive_rec_lock( const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ que_thr_t* thr) /*!< in: query thread */ { - ulint err; + enum db_err err; ut_ad(rec_offs_validate(rec, index, offsets)); @@ -1205,7 +1214,6 @@ row_ins_check_foreign_constraint( dict_index_t* check_index; ulint n_fields_cmp; btr_pcur_t pcur; - ibool moved; int cmp; ulint err; ulint i; @@ -1336,7 +1344,7 @@ run_again: /* Scan index records and check if there is a matching record */ - for (;;) { + do { const rec_t* rec = btr_pcur_get_rec(&pcur); const buf_block_t* block = btr_pcur_get_block(&pcur); @@ -1348,7 +1356,7 @@ run_again: if (page_rec_is_infimum(rec)) { - goto next_rec; + continue; } offsets = rec_get_offsets(rec, check_index, @@ -1359,12 +1367,13 @@ run_again: err = row_ins_set_shared_rec_lock(LOCK_ORDINARY, block, rec, check_index, offsets, thr); - if (err != DB_SUCCESS) { - - break; + switch (err) { + case DB_SUCCESS_LOCKED_REC: + case DB_SUCCESS: + continue; + default: + goto end_scan; } - - goto next_rec; } cmp = cmp_dtuple_rec(entry, rec, offsets); @@ -1375,9 +1384,12 @@ run_again: err = row_ins_set_shared_rec_lock( LOCK_ORDINARY, block, rec, check_index, offsets, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS_LOCKED_REC: + case DB_SUCCESS: break; + default: + goto end_scan; } } else { /* Found a matching record. Lock only @@ -1388,15 +1400,18 @@ run_again: LOCK_REC_NOT_GAP, block, rec, check_index, offsets, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS_LOCKED_REC: + case DB_SUCCESS: break; + default: + goto end_scan; } if (check_ref) { err = DB_SUCCESS; - break; + goto end_scan; } else if (foreign->type != 0) { /* There is an ON UPDATE or ON DELETE condition: check them in a separate @@ -1422,7 +1437,7 @@ run_again: err = DB_FOREIGN_DUPLICATE_KEY; } - break; + goto end_scan; } /* row_ins_foreign_check_on_constraint @@ -1435,49 +1450,41 @@ run_again: thr, foreign, rec, entry); err = DB_ROW_IS_REFERENCED; - break; + goto end_scan; } } - } + } else { + ut_a(cmp < 0); - if (cmp < 0) { err = row_ins_set_shared_rec_lock( LOCK_GAP, block, rec, check_index, offsets, thr); - if (err != DB_SUCCESS) { - break; - } - - if (check_ref) { - err = DB_NO_REFERENCED_ROW; - row_ins_foreign_report_add_err( - trx, foreign, rec, entry); - } else { - err = DB_SUCCESS; + switch (err) { + case DB_SUCCESS_LOCKED_REC: + case DB_SUCCESS: + if (check_ref) { + err = DB_NO_REFERENCED_ROW; + row_ins_foreign_report_add_err( + trx, foreign, rec, entry); + } else { + err = DB_SUCCESS; + } } - break; + goto end_scan; } + } while (btr_pcur_move_to_next(&pcur, &mtr)); - ut_a(cmp == 0); -next_rec: - moved = btr_pcur_move_to_next(&pcur, &mtr); - - if (!moved) { - if (check_ref) { - rec = btr_pcur_get_rec(&pcur); - row_ins_foreign_report_add_err( - trx, foreign, rec, entry); - err = DB_NO_REFERENCED_ROW; - } else { - err = DB_SUCCESS; - } - - break; - } + if (check_ref) { + row_ins_foreign_report_add_err( + trx, foreign, btr_pcur_get_rec(&pcur), entry); + err = DB_NO_REFERENCED_ROW; + } else { + err = DB_SUCCESS; } +end_scan: btr_pcur_close(&pcur); mtr_commit(&mtr); @@ -1725,9 +1732,13 @@ row_ins_scan_sec_index_for_duplicate( rec, index, offsets, thr); } - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS_LOCKED_REC: + err = DB_SUCCESS; + case DB_SUCCESS: break; + default: + goto end_scan; } if (page_rec_is_supremum(rec)) { @@ -1744,17 +1755,15 @@ row_ins_scan_sec_index_for_duplicate( thr_get_trx(thr)->error_info = index; - break; + goto end_scan; } + } else { + ut_a(cmp < 0); + goto end_scan; } - - if (cmp < 0) { - break; - } - - ut_a(cmp == 0); } while (btr_pcur_move_to_next(&pcur, &mtr)); +end_scan: if (UNIV_LIKELY_NULL(heap)) { mem_heap_free(heap); } @@ -1843,7 +1852,11 @@ row_ins_duplicate_error_in_clust( cursor->index, offsets, thr); } - if (err != DB_SUCCESS) { + switch (err) { + case DB_SUCCESS_LOCKED_REC: + case DB_SUCCESS: + break; + default: goto func_exit; } @@ -1883,7 +1896,11 @@ row_ins_duplicate_error_in_clust( rec, cursor->index, offsets, thr); } - if (err != DB_SUCCESS) { + switch (err) { + case DB_SUCCESS_LOCKED_REC: + case DB_SUCCESS: + break; + default: goto func_exit; } diff --git a/storage/xtradb/row/row0merge.c b/storage/xtradb/row/row0merge.c index 6ee93d24ed3..47c03c77850 100644 --- a/storage/xtradb/row/row0merge.c +++ b/storage/xtradb/row/row0merge.c @@ -717,14 +717,16 @@ row_merge_read( } /********************************************************************//** -Read a merge block from the file system. +Write a merge block to the file system. @return TRUE if request was successful, FALSE if fail */ static ibool row_merge_write( /*============*/ int fd, /*!< in: file descriptor */ - ulint offset, /*!< in: offset where to write */ + ulint offset, /*!< in: offset where to read + in number of row_merge_block_t + elements */ const void* buf) /*!< in: data */ { ib_uint64_t ofs = ((ib_uint64_t) offset) @@ -1075,11 +1077,14 @@ row_merge_cmp( record to be compared */ const ulint* offsets1, /*!< in: first record offsets */ const ulint* offsets2, /*!< in: second record offsets */ - const dict_index_t* index) /*!< in: index */ + const dict_index_t* index, /*!< in: index */ + ibool* null_eq) /*!< out: set to TRUE if + found matching null values */ { int cmp; - cmp = cmp_rec_rec_simple(mrec1, mrec2, offsets1, offsets2, index); + cmp = cmp_rec_rec_simple(mrec1, mrec2, offsets1, offsets2, index, + null_eq); #ifdef UNIV_DEBUG if (row_merge_print_cmp) { @@ -1452,11 +1457,13 @@ corrupt: } while (mrec0 && mrec1) { + ibool null_eq = FALSE; switch (row_merge_cmp(mrec0, mrec1, - offsets0, offsets1, index)) { + offsets0, offsets1, index, + &null_eq)) { case 0: if (UNIV_UNLIKELY - (dict_index_is_unique(index))) { + (dict_index_is_unique(index) && !null_eq)) { innobase_rec_to_mysql(table, mrec0, index, offsets0); mem_heap_free(heap); @@ -1578,22 +1585,28 @@ row_merge( const dict_index_t* index, /*!< in: index being created */ merge_file_t* file, /*!< in/out: file containing index entries */ - ulint* half, /*!< in/out: half the file */ row_merge_block_t* block, /*!< in/out: 3 buffers */ int* tmpfd, /*!< in/out: temporary file handle */ - TABLE* table) /*!< in/out: MySQL table, for + TABLE* table, /*!< in/out: MySQL table, for reporting erroneous key value if applicable */ + ulint* num_run,/*!< in/out: Number of runs remain + to be merged */ + ulint* run_offset) /*!< in/out: Array contains the + first offset number for each merge + run */ { ulint foffs0; /*!< first input offset */ ulint foffs1; /*!< second input offset */ ulint error; /*!< error code */ merge_file_t of; /*!< output file */ - const ulint ihalf = *half; + const ulint ihalf = run_offset[*num_run / 2]; /*!< half the input file */ - ulint ohalf; /*!< half the output file */ + ulint n_run = 0; + /*!< num of runs generated from this merge */ UNIV_MEM_ASSERT_W(block[0], 3 * sizeof block[0]); + ut_ad(ihalf < file->offset); of.fd = *tmpfd; @@ -1601,17 +1614,20 @@ row_merge( of.n_rec = 0; /* Merge blocks to the output file. */ - ohalf = 0; foffs0 = 0; foffs1 = ihalf; + UNIV_MEM_INVALID(run_offset, *num_run * sizeof *run_offset); + for (; foffs0 < ihalf && foffs1 < file->offset; foffs0++, foffs1++) { - ulint ahalf; /*!< arithmetic half the input file */ if (UNIV_UNLIKELY(trx_is_interrupted(trx))) { return(DB_INTERRUPTED); } + /* Remember the offset number for this run */ + run_offset[n_run++] = of.offset; + error = row_merge_blocks(index, file, block, &foffs0, &foffs1, &of, table); @@ -1619,21 +1635,6 @@ row_merge( return(error); } - /* Record the offset of the output file when - approximately half the output has been generated. In - this way, the next invocation of row_merge() will - spend most of the time in this loop. The initial - estimate is ohalf==0. */ - ahalf = file->offset / 2; - ut_ad(ohalf <= of.offset); - - /* Improve the estimate until reaching half the input - file size, or we can not get any closer to it. All - comparands should be non-negative when !(ohalf < ahalf) - because ohalf <= of.offset. */ - if (ohalf < ahalf || of.offset - ahalf < ohalf - ahalf) { - ohalf = of.offset; - } } /* Copy the last blocks, if there are any. */ @@ -1643,6 +1644,9 @@ row_merge( return(DB_INTERRUPTED); } + /* Remember the offset number for this run */ + run_offset[n_run++] = of.offset; + if (!row_merge_blocks_copy(index, file, block, &foffs0, &of)) { return(DB_CORRUPTION); } @@ -1655,6 +1659,9 @@ row_merge( return(DB_INTERRUPTED); } + /* Remember the offset number for this run */ + run_offset[n_run++] = of.offset; + if (!row_merge_blocks_copy(index, file, block, &foffs1, &of)) { return(DB_CORRUPTION); } @@ -1666,10 +1673,23 @@ row_merge( return(DB_CORRUPTION); } + ut_ad(n_run <= *num_run); + + *num_run = n_run; + + /* Each run can contain one or more offsets. As merge goes on, + the number of runs (to merge) will reduce until we have one + single run. So the number of runs will always be smaller than + the number of offsets in file */ + ut_ad((*num_run) <= file->offset); + + /* The number of offsets in output file is always equal or + smaller than input file */ + ut_ad(of.offset <= file->offset); + /* Swap file descriptors for the next pass. */ *tmpfd = file->fd; *file = of; - *half = ohalf; UNIV_MEM_INVALID(block[0], 3 * sizeof block[0]); @@ -1694,27 +1714,44 @@ row_merge_sort( if applicable */ { ulint half = file->offset / 2; + ulint num_runs; + ulint* run_offset; + ulint error = DB_SUCCESS; + + /* Record the number of merge runs we need to perform */ + num_runs = file->offset; + + /* If num_runs are less than 1, nothing to merge */ + if (num_runs <= 1) { + return(error); + } + + /* "run_offset" records each run's first offset number */ + run_offset = (ulint*) mem_alloc(file->offset * sizeof(ulint)); + + /* This tells row_merge() where to start for the first round + of merge. */ + run_offset[half] = half; /* The file should always contain at least one byte (the end of file marker). Thus, it must be at least one block. */ ut_ad(file->offset > 0); + /* Merge the runs until we have one big run */ do { - ulint error; + error = row_merge(trx, index, file, block, tmpfd, + table, &num_runs, run_offset); - error = row_merge(trx, index, file, &half, - block, tmpfd, table); + UNIV_MEM_ASSERT_RW(run_offset, num_runs * sizeof *run_offset); if (error != DB_SUCCESS) { - return(error); + break; } + } while (num_runs > 1); - /* half > 0 should hold except when the file consists - of one block. No need to merge further then. */ - ut_ad(half > 0 || file->offset == 1); - } while (half < file->offset && half > 0); + mem_free(run_offset); - return(DB_SUCCESS); + return(error); } /*************************************************************//** @@ -1986,6 +2023,8 @@ row_merge_drop_index( "UPDATE SYS_INDEXES SET NAME=CONCAT('" TEMP_INDEX_PREFIX_STR "', NAME) WHERE ID = :indexid;\n" "COMMIT WORK;\n" + /* Drop the statistics of the index. */ + "DELETE FROM SYS_STATS WHERE INDEX_ID = :indexid;\n" /* Drop the field definitions of the index. */ "DELETE FROM SYS_FIELDS WHERE INDEX_ID = :indexid;\n" /* Drop the index definition and the B-tree. */ @@ -2094,13 +2133,16 @@ row_merge_drop_temp_indexes(void) btr_pcur_store_position(&pcur, &mtr); btr_pcur_commit_specify_mtr(&pcur, &mtr); - table = dict_load_table_on_id(table_id); + table = dict_table_get_on_id_low(table_id); if (table) { dict_index_t* index; + dict_index_t* next_index; for (index = dict_table_get_first_index(table); - index; index = dict_table_get_next_index(index)) { + index; index = next_index) { + + next_index = dict_table_get_next_index(index); if (*index->name == TEMP_INDEX_PREFIX) { row_merge_drop_index(index, table, trx); @@ -2303,7 +2345,7 @@ row_merge_rename_tables( { ulint err = DB_ERROR; pars_info_t* info; - const char* old_name= old_table->name; + char old_name[MAX_TABLE_NAME_LEN + 1]; ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); ut_ad(old_table != new_table); @@ -2311,6 +2353,17 @@ row_merge_rename_tables( ut_a(trx->dict_operation_lock_mode == RW_X_LATCH); + /* store the old/current name to an automatic variable */ + if (strlen(old_table->name) + 1 <= sizeof(old_name)) { + memcpy(old_name, old_table->name, strlen(old_table->name) + 1); + } else { + ut_print_timestamp(stderr); + fprintf(stderr, "InnoDB: too long table name: '%s', " + "max length is %d\n", old_table->name, + MAX_TABLE_NAME_LEN); + ut_error; + } + trx->op_info = "renaming tables"; /* We use the private SQL parser of Innobase to generate the query diff --git a/storage/xtradb/row/row0mysql.c b/storage/xtradb/row/row0mysql.c index e520065ea04..62221fa456d 100644 --- a/storage/xtradb/row/row0mysql.c +++ b/storage/xtradb/row/row0mysql.c @@ -30,6 +30,7 @@ Created 9/17/2000 Heikki Tuuri #include "row0mysql.ic" #endif +#include "ha_prototypes.h" #include "row0ins.h" #include "row0merge.h" #include "row0sel.h" @@ -522,6 +523,7 @@ handle_new_error: case DB_CANNOT_ADD_CONSTRAINT: case DB_TOO_MANY_CONCURRENT_TRXS: case DB_OUT_OF_FILE_SPACE: + case DB_INTERRUPTED: if (savept) { /* Roll back the latest, possibly incomplete insertion or update */ @@ -624,6 +626,8 @@ row_create_prebuilt( prebuilt->select_lock_type = LOCK_NONE; prebuilt->stored_select_lock_type = 99999999; + UNIV_MEM_INVALID(&prebuilt->stored_select_lock_type, + sizeof prebuilt->stored_select_lock_type); prebuilt->search_tuple = dtuple_create( heap, 2 * dict_table_get_n_cols(table)); @@ -864,7 +868,7 @@ row_update_statistics_if_needed( if (counter > 2000000000 || ((ib_int64_t)counter > 16 + table->stat_n_rows / 16)) { - dict_update_statistics(table); + dict_update_statistics(table, TRUE); } } @@ -1124,6 +1128,13 @@ row_insert_for_mysql( thr = que_fork_get_first_thr(prebuilt->ins_graph); + if (!prebuilt->mysql_has_locked) { + fprintf(stderr, "InnoDB: Error: row_insert_for_mysql is called without ha_innobase::external_lock()\n"); + if (trx->mysql_thd != NULL) { + innobase_mysql_print_thd(stderr, trx->mysql_thd, 600); + } + } + if (prebuilt->sql_stat_start) { node->state = INS_NODE_SET_IX_LOCK; prebuilt->sql_stat_start = FALSE; @@ -1430,27 +1441,26 @@ run_again: } /*********************************************************************//** -This can only be used when srv_locks_unsafe_for_binlog is TRUE or -this session is using a READ COMMITTED isolation level. Before -calling this function we must use trx_reset_new_rec_lock_info() and -trx_register_new_rec_lock() to store the information which new record locks -really were set. This function removes a newly set lock under prebuilt->pcur, -and also under prebuilt->clust_pcur. Currently, this is only used and tested -in the case of an UPDATE or a DELETE statement, where the row lock is of the -LOCK_X type. -Thus, this implements a 'mini-rollback' that releases the latest record -locks we set. -@return error code or DB_SUCCESS */ +This can only be used when srv_locks_unsafe_for_binlog is TRUE or this +session is using a READ COMMITTED or READ UNCOMMITTED isolation level. +Before calling this function row_search_for_mysql() must have +initialized prebuilt->new_rec_locks to store the information which new +record locks really were set. This function removes a newly set +clustered index record lock under prebuilt->pcur or +prebuilt->clust_pcur. Thus, this implements a 'mini-rollback' that +releases the latest clustered index record lock we set. +@return error code or DB_SUCCESS */ UNIV_INTERN int row_unlock_for_mysql( /*=================*/ - row_prebuilt_t* prebuilt, /*!< in: prebuilt struct in MySQL + row_prebuilt_t* prebuilt, /*!< in/out: prebuilt struct in MySQL handle */ - ibool has_latches_on_recs)/*!< TRUE if called so that we have - the latches on the records under pcur - and clust_pcur, and we do not need to - reposition the cursors. */ + ibool has_latches_on_recs)/*!< in: TRUE if called so + that we have the latches on + the records under pcur and + clust_pcur, and we do not need + to reposition the cursors. */ { btr_pcur_t* pcur = prebuilt->pcur; btr_pcur_t* clust_pcur = prebuilt->clust_pcur; @@ -1648,37 +1658,6 @@ row_table_got_default_clust_index( } /*********************************************************************//** -Calculates the key number used inside MySQL for an Innobase index. We have -to take into account if we generated a default clustered index for the table -@return the key number used inside MySQL */ -UNIV_INTERN -ulint -row_get_mysql_key_number_for_index( -/*===============================*/ - const dict_index_t* index) /*!< in: index */ -{ - const dict_index_t* ind; - ulint i; - - ut_a(index); - - i = 0; - ind = dict_table_get_first_index(index->table); - - while (index != ind) { - ind = dict_table_get_next_index(ind); - i++; - } - - if (row_table_got_default_clust_index(index->table)) { - ut_a(i > 0); - i--; - } - - return(i); -} - -/*********************************************************************//** Locks the data dictionary in shared mode from modifications, for performing foreign key check, rollback, or other operation invisible to MySQL. */ UNIV_INTERN @@ -2044,6 +2023,45 @@ error_handling: } /*********************************************************************//** +*/ +UNIV_INTERN +int +row_insert_stats_for_mysql( +/*=======================*/ + dict_index_t* index, + trx_t* trx) +{ + ind_node_t* node; + mem_heap_t* heap; + que_thr_t* thr; + ulint err; + + ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); + + trx->op_info = "try to insert rows to SYS_STATS"; + + trx_start_if_not_started(trx); + trx->error_state = DB_SUCCESS; + + heap = mem_heap_create(512); + + node = ind_insert_stats_graph_create(index, heap); + + thr = pars_complete_graph_for_exec(node, trx, heap); + + ut_a(thr == que_fork_start_command(que_node_get_parent(thr))); + que_run_threads(thr); + + err = trx->error_state; + + que_graph_free((que_t*) que_node_get_parent(thr)); + + trx->op_info = ""; + + return((int) err); +} + +/*********************************************************************//** Scans a table create SQL string and adds to the data dictionary the foreign key constraints declared in the string. This function should be called after the indexes for a table have been created. @@ -2062,6 +2080,7 @@ row_table_add_foreign_constraints( FOREIGN KEY (a, b) REFERENCES table2(c, d), table2 can be written also with the database name before it: test.table2 */ + size_t sql_length, /*!< in: length of sql_string */ const char* name, /*!< in: table full name in the normalized form database_name/table_name */ @@ -2083,8 +2102,8 @@ row_table_add_foreign_constraints( trx_set_dict_operation(trx, TRX_DICT_OP_TABLE); - err = dict_create_foreign_constraints(trx, sql_string, name, - reject_fks); + err = dict_create_foreign_constraints(trx, sql_string, sql_length, + name, reject_fks); if (err == DB_SUCCESS) { /* Check that also referencing constraints are ok */ err = dict_load_foreigns(name, TRUE); @@ -2428,7 +2447,7 @@ row_discard_tablespace_for_mysql( goto funct_exit; } - new_id = dict_hdr_get_new_id(DICT_HDR_TABLE_ID); + dict_hdr_get_new_id(&new_id, NULL, NULL); /* Remove all locks except the table-level S and X locks. */ lock_remove_all_on_table(table, FALSE); @@ -2790,10 +2809,11 @@ row_truncate_table_for_mysql( dict_index_t* index; - space = 0; + dict_hdr_get_new_id(NULL, NULL, &space); - if (fil_create_new_single_table_tablespace( - &space, table->name, FALSE, flags, + if (space == ULINT_UNDEFINED + || fil_create_new_single_table_tablespace( + space, table->name, FALSE, flags, FIL_IBD_FILE_INITIAL_SIZE) != DB_SUCCESS) { ut_print_timestamp(stderr); fprintf(stderr, @@ -2898,7 +2918,7 @@ next_rec: mem_heap_free(heap); - new_id = dict_hdr_get_new_id(DICT_HDR_TABLE_ID); + dict_hdr_get_new_id(&new_id, NULL, NULL); info = pars_info_create(); @@ -2942,7 +2962,7 @@ next_rec: dict_table_autoinc_lock(table); dict_table_autoinc_initialize(table, 1); dict_table_autoinc_unlock(table); - dict_update_statistics(table); + dict_update_statistics(table, TRUE); trx_commit_for_mysql(trx); @@ -3244,6 +3264,8 @@ check_next_foreign: " IF (SQL % NOTFOUND) THEN\n" " found := 0;\n" " ELSE\n" + " DELETE FROM SYS_STATS\n" + " WHERE INDEX_ID = index_id;\n" " DELETE FROM SYS_FIELDS\n" " WHERE INDEX_ID = index_id;\n" " DELETE FROM SYS_INDEXES\n" diff --git a/storage/xtradb/row/row0purge.c b/storage/xtradb/row/row0purge.c index 500ebe571ab..835af990672 100644 --- a/storage/xtradb/row/row0purge.c +++ b/storage/xtradb/row/row0purge.c @@ -44,6 +44,16 @@ Created 3/14/1997 Heikki Tuuri #include "row0mysql.h" #include "log0log.h" +/************************************************************************* +IMPORTANT NOTE: Any operation that generates redo MUST check that there +is enough space in the redo log before for that operation. This is +done by calling log_free_check(). The reason for checking the +availability of the redo log space before the start of the operation is +that we MUST not hold any synchonization objects when performing the +check. +If you make a change in this module make sure that no codepath is +introduced where a call to log_free_check() is bypassed. */ + /********************************************************************//** Creates a purge node to a query graph. @return own: purge node */ @@ -126,6 +136,7 @@ row_purge_remove_clust_if_poss_low( pcur = &(node->pcur); btr_cur = btr_pcur_get_btr_cur(pcur); + log_free_check(); mtr_start(&mtr); success = row_purge_reposition_pcur(mode, node, &mtr); diff --git a/storage/xtradb/row/row0sel.c b/storage/xtradb/row/row0sel.c index 43e67ff6ded..0db4fb6f3db 100644 --- a/storage/xtradb/row/row0sel.c +++ b/storage/xtradb/row/row0sel.c @@ -863,8 +863,14 @@ row_sel_get_clust_rec( clust_rec, index, offsets, node->row_lock_mode, lock_type, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS: + case DB_SUCCESS_LOCKED_REC: + /* Declare the variable uninitialized in Valgrind. + It should be set to DB_SUCCESS at func_exit. */ + UNIV_MEM_INVALID(&err, sizeof err); + break; + default: goto err_exit; } } else { @@ -934,9 +940,9 @@ err_exit: /*********************************************************************//** Sets a lock on a record. -@return DB_SUCCESS or error code */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */ UNIV_INLINE -ulint +enum db_err sel_set_rec_lock( /*=============*/ const buf_block_t* block, /*!< in: buffer block of rec */ @@ -948,8 +954,8 @@ sel_set_rec_lock( LOC_REC_NOT_GAP */ que_thr_t* thr) /*!< in: query thread */ { - trx_t* trx; - ulint err; + trx_t* trx; + enum db_err err; trx = thr_get_trx(thr); @@ -1482,11 +1488,15 @@ rec_loop: node->row_lock_mode, lock_type, thr); - if (err != DB_SUCCESS) { + switch (err) { + case DB_SUCCESS_LOCKED_REC: + err = DB_SUCCESS; + case DB_SUCCESS: + break; + default: /* Note that in this case we will store in pcur the PREDECESSOR of the record we are waiting the lock for */ - goto lock_wait_or_error; } } @@ -1538,8 +1548,12 @@ skip_lock: rec, index, offsets, node->row_lock_mode, lock_type, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS_LOCKED_REC: + err = DB_SUCCESS; + case DB_SUCCESS: + break; + default: goto lock_wait_or_error; } } @@ -2498,6 +2512,7 @@ row_sel_field_store_in_mysql_format( byte* pad_ptr; ut_ad(len != UNIV_SQL_NULL); + UNIV_MEM_ASSERT_RW(data, len); switch (templ->type) { case DATA_INT: @@ -2752,6 +2767,9 @@ row_sel_store_mysql_rec( /* MySQL assumes that the field for an SQL NULL value is set to the default value. */ + UNIV_MEM_ASSERT_RW(prebuilt->default_rec + + templ->mysql_col_offset, + templ->mysql_col_len); mysql_rec[templ->mysql_null_byte_offset] |= (byte) templ->mysql_null_bit_mask; memcpy(mysql_rec + templ->mysql_col_offset, @@ -2803,9 +2821,9 @@ row_sel_build_prev_vers_for_mysql( Retrieves the clustered index record corresponding to a record in a non-clustered index. Does the necessary locking. Used in the MySQL interface. -@return DB_SUCCESS or error code */ +@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */ static -ulint +enum db_err row_sel_get_clust_rec_for_mysql( /*============================*/ row_prebuilt_t* prebuilt,/*!< in: prebuilt struct in the handle */ @@ -2832,7 +2850,7 @@ row_sel_get_clust_rec_for_mysql( dict_index_t* clust_index; const rec_t* clust_rec; rec_t* old_vers; - ulint err; + enum db_err err; trx_t* trx; *out_rec = NULL; @@ -2891,6 +2909,7 @@ row_sel_get_clust_rec_for_mysql( clust_rec = NULL; + err = DB_SUCCESS; goto func_exit; } @@ -2906,8 +2925,11 @@ row_sel_get_clust_rec_for_mysql( 0, btr_pcur_get_block(prebuilt->clust_pcur), clust_rec, clust_index, *offsets, prebuilt->select_lock_type, LOCK_REC_NOT_GAP, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS: + case DB_SUCCESS_LOCKED_REC: + break; + default: goto err_exit; } } else { @@ -2967,6 +2989,8 @@ row_sel_get_clust_rec_for_mysql( rec, sec_index, clust_rec, clust_index)); #endif } + + err = DB_SUCCESS; } func_exit: @@ -2979,7 +3003,6 @@ func_exit: btr_pcur_store_position(prebuilt->clust_pcur, mtr); } - err = DB_SUCCESS; err_exit: return(err); } @@ -3076,6 +3099,11 @@ row_sel_pop_cached_row_for_mysql( for (i = 0; i < prebuilt->n_template; i++) { templ = prebuilt->mysql_template + i; +#if 0 /* Some of the cached_rec may legitimately be uninitialized. */ + UNIV_MEM_ASSERT_RW(cached_rec + + templ->mysql_col_offset, + templ->mysql_col_len); +#endif ut_memcpy(buf + templ->mysql_col_offset, cached_rec + templ->mysql_col_offset, templ->mysql_col_len); @@ -3090,6 +3118,11 @@ row_sel_pop_cached_row_for_mysql( } } else { +#if 0 /* Some of the cached_rec may legitimately be uninitialized. */ + UNIV_MEM_ASSERT_RW(prebuilt->fetch_cache + [prebuilt->fetch_cache_first], + prebuilt->mysql_prefix_len); +#endif ut_memcpy(buf, prebuilt->fetch_cache[prebuilt->fetch_cache_first], prebuilt->mysql_prefix_len); @@ -3140,6 +3173,8 @@ row_sel_push_cache_row_for_mysql( } ut_ad(prebuilt->fetch_cache_first == 0); + UNIV_MEM_INVALID(prebuilt->fetch_cache[prebuilt->n_fetch_cached], + prebuilt->mysql_row_len); if (UNIV_UNLIKELY(!row_sel_store_mysql_rec( prebuilt->fetch_cache[ @@ -3286,6 +3321,7 @@ row_search_for_mysql( mem_heap_t* heap = NULL; ulint offsets_[REC_OFFS_NORMAL_SIZE]; ulint* offsets = offsets_; + ibool problematic_use = FALSE; rec_offs_init(offsets_); @@ -3601,6 +3637,13 @@ shortcut_fails_too_big_rec: trx->has_search_latch = FALSE; } + ut_ad(prebuilt->sql_stat_start || trx->conc_state == TRX_ACTIVE); + ut_ad(trx->conc_state == TRX_NOT_STARTED + || trx->conc_state == TRX_ACTIVE); + ut_ad(prebuilt->sql_stat_start + || prebuilt->select_lock_type != LOCK_NONE + || trx->read_view); + trx_start_if_not_started(trx); if (trx->isolation_level <= TRX_ISO_READ_COMMITTED @@ -3685,8 +3728,12 @@ shortcut_fails_too_big_rec: prebuilt->select_lock_type, LOCK_GAP, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS_LOCKED_REC: + err = DB_SUCCESS; + case DB_SUCCESS: + break; + default: goto lock_wait_or_error; } } @@ -3702,6 +3749,15 @@ shortcut_fails_too_big_rec: } } + if (!prebuilt->mysql_has_locked) { + fprintf(stderr, "InnoDB: Error: row_search_for_mysql() is called without ha_innobase::external_lock()\n"); + if (trx->mysql_thd != NULL) { + innobase_mysql_print_thd(stderr, trx->mysql_thd, 600); + } + problematic_use = TRUE; + } +retry_check: + if (!prebuilt->sql_stat_start) { /* No need to set an intention lock or assign a read view */ @@ -3712,6 +3768,14 @@ shortcut_fails_too_big_rec: " perform a consistent read\n" "InnoDB: but the read view is not assigned!\n", stderr); + if (problematic_use) { + fprintf(stderr, "InnoDB: It may be caused by calling " + "without ha_innobase::external_lock()\n" + "InnoDB: For the first-aid, avoiding the crash. " + "But it should be fixed ASAP.\n"); + prebuilt->sql_stat_start = TRUE; + goto retry_check; + } trx_print(stderr, trx, 600); fputc('\n', stderr); ut_a(0); @@ -3791,8 +3855,12 @@ rec_loop: prebuilt->select_lock_type, LOCK_ORDINARY, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS_LOCKED_REC: + err = DB_SUCCESS; + case DB_SUCCESS: + break; + default: goto lock_wait_or_error; } } @@ -3922,8 +3990,11 @@ wrong_offs: prebuilt->select_lock_type, LOCK_GAP, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS_LOCKED_REC: + case DB_SUCCESS: + break; + default: goto lock_wait_or_error; } } @@ -3958,8 +4029,11 @@ wrong_offs: prebuilt->select_lock_type, LOCK_GAP, thr); - if (err != DB_SUCCESS) { - + switch (err) { + case DB_SUCCESS_LOCKED_REC: + case DB_SUCCESS: + break; + default: goto lock_wait_or_error; } } @@ -4029,15 +4103,21 @@ no_gap_lock: switch (err) { const rec_t* old_vers; - case DB_SUCCESS: + case DB_SUCCESS_LOCKED_REC: if (srv_locks_unsafe_for_binlog - || trx->isolation_level <= TRX_ISO_READ_COMMITTED) { + || trx->isolation_level + <= TRX_ISO_READ_COMMITTED) { /* Note that a record of prebuilt->index was locked. */ prebuilt->new_rec_locks = 1; } + err = DB_SUCCESS; + case DB_SUCCESS: break; case DB_LOCK_WAIT: + /* Never unlock rows that were part of a conflict. */ + prebuilt->new_rec_locks = 0; + if (UNIV_LIKELY(prebuilt->row_read_type != ROW_READ_TRY_SEMI_CONSISTENT) || unique_search @@ -4067,7 +4147,6 @@ no_gap_lock: if (UNIV_LIKELY(trx->wait_lock != NULL)) { lock_cancel_waiting_and_release( trx->wait_lock); - prebuilt->new_rec_locks = 0; } else { mutex_exit(&kernel_mutex); @@ -4079,9 +4158,6 @@ no_gap_lock: ULINT_UNDEFINED, &heap); err = DB_SUCCESS; - /* Note that a record of - prebuilt->index was locked. */ - prebuilt->new_rec_locks = 1; break; } mutex_exit(&kernel_mutex); @@ -4218,27 +4294,30 @@ requires_clust_rec: err = row_sel_get_clust_rec_for_mysql(prebuilt, index, rec, thr, &clust_rec, &offsets, &heap, &mtr); - if (err != DB_SUCCESS) { + switch (err) { + case DB_SUCCESS: + if (clust_rec == NULL) { + /* The record did not exist in the read view */ + ut_ad(prebuilt->select_lock_type == LOCK_NONE); + goto next_rec; + } + break; + case DB_SUCCESS_LOCKED_REC: + ut_a(clust_rec != NULL); + if (srv_locks_unsafe_for_binlog + || trx->isolation_level + <= TRX_ISO_READ_COMMITTED) { + /* Note that the clustered index record + was locked. */ + prebuilt->new_rec_locks = 2; + } + err = DB_SUCCESS; + break; + default: goto lock_wait_or_error; } - if (clust_rec == NULL) { - /* The record did not exist in the read view */ - ut_ad(prebuilt->select_lock_type == LOCK_NONE); - - goto next_rec; - } - - if ((srv_locks_unsafe_for_binlog - || trx->isolation_level <= TRX_ISO_READ_COMMITTED) - && prebuilt->select_lock_type != LOCK_NONE) { - /* Note that both the secondary index record - and the clustered index record were locked. */ - ut_ad(prebuilt->new_rec_locks == 1); - prebuilt->new_rec_locks = 2; - } - if (UNIV_UNLIKELY(rec_get_deleted_flag(clust_rec, comp))) { /* The record is delete marked: we can skip it */ diff --git a/storage/xtradb/row/row0uins.c b/storage/xtradb/row/row0uins.c index 9f9c814f1a5..930a5cf13b6 100644 --- a/storage/xtradb/row/row0uins.c +++ b/storage/xtradb/row/row0uins.c @@ -46,6 +46,16 @@ Created 2/25/1997 Heikki Tuuri #include "ibuf0ibuf.h" #include "log0log.h" +/************************************************************************* +IMPORTANT NOTE: Any operation that generates redo MUST check that there +is enough space in the redo log before for that operation. This is +done by calling log_free_check(). The reason for checking the +availability of the redo log space before the start of the operation is +that we MUST not hold any synchonization objects when performing the +check. +If you make a change in this module make sure that no codepath is +introduced where a call to log_free_check() is bypassed. */ + /***************************************************************//** Removes a clustered index record. The pcur in node was positioned on the record, now it is detached. @@ -152,7 +162,6 @@ row_undo_ins_remove_sec_low( ulint err; mtr_t mtr; - log_free_check(); mtr_start(&mtr); found = row_search_index_entry(index, entry, mode, &pcur, &mtr); @@ -335,6 +344,7 @@ row_undo_ins( transactions. */ ut_a(trx_is_recv(node->trx)); } else { + log_free_check(); err = row_undo_ins_remove_sec(node->index, entry); if (err != DB_SUCCESS) { @@ -346,5 +356,6 @@ row_undo_ins( node->index = dict_table_get_next_index(node->index); } + log_free_check(); return(row_undo_ins_remove_clust_rec(node)); } diff --git a/storage/xtradb/row/row0umod.c b/storage/xtradb/row/row0umod.c index e7245dbee41..8464b0f95cc 100644 --- a/storage/xtradb/row/row0umod.c +++ b/storage/xtradb/row/row0umod.c @@ -58,12 +58,22 @@ delete marked clustered index record was delete unmarked and possibly also some of its fields were changed. Now, it is possible that the delete marked version has become obsolete at the time the undo is started. */ +/************************************************************************* +IMPORTANT NOTE: Any operation that generates redo MUST check that there +is enough space in the redo log before for that operation. This is +done by calling log_free_check(). The reason for checking the +availability of the redo log space before the start of the operation is +that we MUST not hold any synchonization objects when performing the +check. +If you make a change in this module make sure that no codepath is +introduced where a call to log_free_check() is bypassed. */ + /***********************************************************//** Checks if also the previous version of the clustered index record was modified or inserted by the same transaction, and its undo number is such that it should be undone in the same rollback. @return TRUE if also previous modify or insert of this row should be undone */ -UNIV_INLINE +static ibool row_undo_mod_undo_also_prev_vers( /*=============================*/ @@ -231,6 +241,8 @@ row_undo_mod_clust( ut_ad(node && thr); + log_free_check(); + /* Check if also the previous version of the clustered index record should be undone in this same rollback operation */ @@ -657,24 +669,55 @@ row_undo_mod_upd_exist_sec( /* Build the newest version of the index entry */ entry = row_build_index_entry(node->row, node->ext, index, heap); - ut_a(entry); - /* NOTE that if we updated the fields of a - delete-marked secondary index record so that - alphabetically they stayed the same, e.g., - 'abc' -> 'aBc', we cannot return to the original - values because we do not know them. But this should - not cause problems because in row0sel.c, in queries - we always retrieve the clustered index record or an - earlier version of it, if the secondary index record - through which we do the search is delete-marked. */ - - err = row_undo_mod_del_mark_or_remove_sec(node, thr, - index, - entry); - if (err != DB_SUCCESS) { - mem_heap_free(heap); - - return(err); + if (UNIV_UNLIKELY(!entry)) { + /* The server must have crashed in + row_upd_clust_rec_by_insert(), in + row_ins_index_entry_low() before + btr_store_big_rec_extern_fields() + has written the externally stored columns + (BLOBs) of the new clustered index entry. */ + + /* The table must be in DYNAMIC or COMPRESSED + format. REDUNDANT and COMPACT formats + store a local 768-byte prefix of each + externally stored column. */ + ut_a(dict_table_get_format(index->table) + >= DICT_TF_FORMAT_ZIP); + + /* This is only legitimate when + rolling back an incomplete transaction + after crash recovery. */ + ut_a(thr_get_trx(thr)->is_recovered); + + /* The server must have crashed before + completing the insert of the new + clustered index entry and before + inserting to the secondary indexes. + Because node->row was not yet written + to this index, we can ignore it. But + we must restore node->undo_row. */ + } else { + /* NOTE that if we updated the fields of a + delete-marked secondary index record so that + alphabetically they stayed the same, e.g., + 'abc' -> 'aBc', we cannot return to the + original values because we do not know them. + But this should not cause problems because + in row0sel.c, in queries we always retrieve + the clustered index record or an earlier + version of it, if the secondary index record + through which we do the search is + delete-marked. */ + + err = row_undo_mod_del_mark_or_remove_sec( + node, thr, index, entry); + if (err != DB_SUCCESS) { + mem_heap_free(heap); + + return(err); + } + + mem_heap_empty(heap); } /* We may have to update the delete mark in the @@ -683,7 +726,6 @@ row_undo_mod_upd_exist_sec( the secondary index record if we updated its fields but alphabetically they stayed the same, e.g., 'abc' -> 'aBc'. */ - mem_heap_empty(heap); entry = row_build_index_entry(node->undo_row, node->undo_ext, index, heap); diff --git a/storage/xtradb/row/row0undo.c b/storage/xtradb/row/row0undo.c index 3d739c9689a..9ef842b5114 100644 --- a/storage/xtradb/row/row0undo.c +++ b/storage/xtradb/row/row0undo.c @@ -297,7 +297,7 @@ row_undo( if (locked_data_dict) { - row_mysql_lock_data_dictionary(trx); + row_mysql_freeze_data_dictionary(trx); } if (node->state == UNDO_NODE_INSERT) { @@ -312,7 +312,7 @@ row_undo( if (locked_data_dict) { - row_mysql_unlock_data_dictionary(trx); + row_mysql_unfreeze_data_dictionary(trx); } /* Do some cleanup */ diff --git a/storage/xtradb/row/row0upd.c b/storage/xtradb/row/row0upd.c index 95d1d00aeef..d0aaecd3dae 100644 --- a/storage/xtradb/row/row0upd.c +++ b/storage/xtradb/row/row0upd.c @@ -92,6 +92,16 @@ the x-latch freed? The most efficient way for performing a searched delete is obviously to keep the x-latch for several steps of query graph execution. */ +/************************************************************************* +IMPORTANT NOTE: Any operation that generates redo MUST check that there +is enough space in the redo log before for that operation. This is +done by calling log_free_check(). The reason for checking the +availability of the redo log space before the start of the operation is +that we MUST not hold any synchonization objects when performing the +check. +If you make a change in this module make sure that no codepath is +introduced where a call to log_free_check() is bypassed. */ + /***********************************************************//** Checks if an update vector changes some of the first ordering fields of an index record. This is only used in foreign key checks and we can assume @@ -1453,7 +1463,6 @@ row_upd_sec_index_entry( entry = row_build_index_entry(node->row, node->ext, index, heap); ut_a(entry); - log_free_check(); mtr_start(&mtr); found = row_search_index_entry(index, entry, BTR_MODIFY_LEAF, &pcur, @@ -1529,7 +1538,7 @@ Updates the secondary index record if it is changed in the row update or deletes it if this is a delete. @return DB_SUCCESS if operation successfully completed, else error code or DB_LOCK_WAIT */ -UNIV_INLINE +static ulint row_upd_sec_step( /*=============*/ @@ -2015,6 +2024,7 @@ row_upd( if (node->state == UPD_NODE_UPDATE_CLUSTERED || node->state == UPD_NODE_INSERT_CLUSTERED) { + log_free_check(); err = row_upd_clust_step(node, thr); if (err != DB_SUCCESS) { @@ -2029,6 +2039,8 @@ row_upd( } while (node->index != NULL) { + + log_free_check(); err = row_upd_sec_step(node, thr); if (err != DB_SUCCESS) { diff --git a/storage/xtradb/srv/srv0srv.c b/storage/xtradb/srv/srv0srv.c index bc2dd562697..b9905116603 100644 --- a/storage/xtradb/srv/srv0srv.c +++ b/storage/xtradb/srv/srv0srv.c @@ -211,6 +211,9 @@ UNIV_INTERN ulint srv_buf_pool_curr_size = 0; UNIV_INTERN ulint srv_mem_pool_size = ULINT_MAX; UNIV_INTERN ulint srv_lock_table_size = ULINT_MAX; +/* key value for shm */ +UNIV_INTERN uint srv_buffer_pool_shm_key = 0; + /* This parameter is deprecated. Use srv_n_io_[read|write]_threads instead. */ UNIV_INTERN ulint srv_n_file_io_threads = ULINT_MAX; @@ -380,6 +383,7 @@ UNIV_INTERN unsigned long long srv_stats_sample_pages = 8; UNIV_INTERN ulong srv_stats_method = 0; UNIV_INTERN ulong srv_stats_auto_update = 1; UNIV_INTERN ulint srv_stats_update_need_lock = 1; +UNIV_INTERN ibool srv_use_sys_stats_table = FALSE; UNIV_INTERN ibool srv_use_doublewrite_buf = TRUE; UNIV_INTERN ibool srv_use_checksums = TRUE; @@ -1758,12 +1762,16 @@ srv_suspend_mysql_thread( innodb_lock_wait_timeout, because trx->mysql_thd == NULL. */ lock_wait_timeout = thd_lock_wait_timeout(trx->mysql_thd); - if (trx_is_interrupted(trx) - || (lock_wait_timeout < 100000000 - && wait_time > (double) lock_wait_timeout)) { + if (lock_wait_timeout < 100000000 + && wait_time > (double) lock_wait_timeout) { trx->error_state = DB_LOCK_WAIT_TIMEOUT; } + + if (trx_is_interrupted(trx)) { + + trx->error_state = DB_INTERRUPTED; + } } /********************************************************************//** diff --git a/storage/xtradb/srv/srv0start.c b/storage/xtradb/srv/srv0start.c index 7b5581a24f0..62ffa366f18 100644 --- a/storage/xtradb/srv/srv0start.c +++ b/storage/xtradb/srv/srv0start.c @@ -1302,6 +1302,9 @@ innobase_start_or_create_for_mysql(void) } else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT")) { srv_unix_file_flush_method = SRV_UNIX_O_DIRECT; + } else if (0 == ut_strcmp(srv_file_flush_method_str, "ALL_O_DIRECT")) { + srv_unix_file_flush_method = SRV_UNIX_ALL_O_DIRECT; + } else if (0 == ut_strcmp(srv_file_flush_method_str, "littlesync")) { srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC; @@ -1716,6 +1719,8 @@ innobase_start_or_create_for_mysql(void) Note that this is not as heavy weight as it seems. At this point there will be only ONE page in the buf_LRU and there must be no page in the buf_flush list. */ + /* TODO: treat more correctly */ + if (!srv_buffer_pool_shm_key) buf_pool_invalidate(); /* We always try to do a recovery, even if the database had diff --git a/storage/xtradb/sync/sync0arr.c b/storage/xtradb/sync/sync0arr.c index cfa52cdcc88..223e1715944 100644 --- a/storage/xtradb/sync/sync0arr.c +++ b/storage/xtradb/sync/sync0arr.c @@ -498,7 +498,9 @@ sync_array_cell_print( || type == RW_LOCK_WAIT_EX || type == RW_LOCK_SHARED) { - fputs(type == RW_LOCK_EX ? "X-lock on" : "S-lock on", file); + fputs(type == RW_LOCK_EX ? "X-lock on" + : type == RW_LOCK_WAIT_EX ? "X-lock (wait_ex) on" + : "S-lock on", file); rwlock = cell->old_wait_rw_lock; diff --git a/storage/xtradb/sync/sync0rw.c b/storage/xtradb/sync/sync0rw.c index 07eac403dfe..9e10f6e943b 100644 --- a/storage/xtradb/sync/sync0rw.c +++ b/storage/xtradb/sync/sync0rw.c @@ -268,7 +268,7 @@ rw_lock_create_func( lock->level = level; #endif /* UNIV_SYNC_DEBUG */ - lock->magic_n = RW_LOCK_MAGIC_N; + ut_d(lock->magic_n = RW_LOCK_MAGIC_N); lock->lock_name = cmutex_name; @@ -282,10 +282,8 @@ rw_lock_create_func( mutex_enter(&rw_lock_list_mutex); - if (UT_LIST_GET_LEN(rw_lock_list) > 0) { - ut_a(UT_LIST_GET_FIRST(rw_lock_list)->magic_n - == RW_LOCK_MAGIC_N); - } + ut_ad(UT_LIST_GET_FIRST(rw_lock_list) == NULL + || UT_LIST_GET_FIRST(rw_lock_list)->magic_n == RW_LOCK_MAGIC_N); UT_LIST_ADD_FIRST(list, rw_lock_list, lock); @@ -314,18 +312,16 @@ rw_lock_free( os_event_free(lock->wait_ex_event); - if (UT_LIST_GET_PREV(list, lock)) { - ut_a(UT_LIST_GET_PREV(list, lock)->magic_n == RW_LOCK_MAGIC_N); - } - if (UT_LIST_GET_NEXT(list, lock)) { - ut_a(UT_LIST_GET_NEXT(list, lock)->magic_n == RW_LOCK_MAGIC_N); - } + ut_ad(UT_LIST_GET_PREV(list, lock) == NULL + || UT_LIST_GET_PREV(list, lock)->magic_n == RW_LOCK_MAGIC_N); + ut_ad(UT_LIST_GET_NEXT(list, lock) == NULL + || UT_LIST_GET_NEXT(list, lock)->magic_n == RW_LOCK_MAGIC_N); UT_LIST_REMOVE(list, rw_lock_list, lock); mutex_exit(&rw_lock_list_mutex); - lock->magic_n = 0; + ut_d(lock->magic_n = 0); } #ifdef UNIV_DEBUG @@ -344,7 +340,7 @@ rw_lock_validate( ulint waiters = rw_lock_get_waiters(lock); lint lock_word = lock->lock_word; - ut_a(lock->magic_n == RW_LOCK_MAGIC_N); + ut_ad(lock->magic_n == RW_LOCK_MAGIC_N); ut_a(waiters == 0 || waiters == 1); ut_a(lock_word > -X_LOCK_DECR ||(-lock_word) % X_LOCK_DECR == 0); diff --git a/storage/xtradb/sync/sync0sync.c b/storage/xtradb/sync/sync0sync.c index fe14cbf0886..225f28df78e 100644 --- a/storage/xtradb/sync/sync0sync.c +++ b/storage/xtradb/sync/sync0sync.c @@ -434,20 +434,19 @@ mutex_set_waiters( mutex_t* mutex, /*!< in: mutex */ ulint n) /*!< in: value to set */ { -#ifndef INNODB_RW_LOCKS_USE_ATOMICS - volatile ulint* ptr; /* declared volatile to ensure that - the value is stored to memory */ -#endif - +#ifdef INNODB_RW_LOCKS_USE_ATOMICS ut_ad(mutex); -#ifdef INNODB_RW_LOCKS_USE_ATOMICS if (n) { os_compare_and_swap_ulint(&mutex->waiters, 0, 1); } else { os_compare_and_swap_ulint(&mutex->waiters, 1, 0); } #else + volatile ulint* ptr; /* declared volatile to ensure that + the value is stored to memory */ + ut_ad(mutex); + ptr = &(mutex->waiters); *ptr = n; /* Here we assume that the write of a single diff --git a/storage/xtradb/trx/trx0i_s.c b/storage/xtradb/trx/trx0i_s.c index c160eb2942a..5bc8302d0c0 100644 --- a/storage/xtradb/trx/trx0i_s.c +++ b/storage/xtradb/trx/trx0i_s.c @@ -429,6 +429,9 @@ fill_trx_row( which to copy volatile strings */ { + const char* stmt; + size_t stmt_len; + row->trx_id = trx_get_id(trx); row->trx_started = (ib_time_t) trx->start_time; row->trx_state = trx_get_que_state_str(trx); @@ -449,37 +452,32 @@ fill_trx_row( row->trx_weight = (ullint) ut_conv_dulint_to_longlong(TRX_WEIGHT(trx)); - if (trx->mysql_thd != NULL) { - row->trx_mysql_thread_id - = thd_get_thread_id(trx->mysql_thd); - } else { + if (trx->mysql_thd == NULL) { /* For internal transactions e.g., purge and transactions being recovered at startup there is no associated MySQL thread data structure. */ row->trx_mysql_thread_id = 0; + row->trx_query = NULL; + return(TRUE); } - if (trx->mysql_query_str != NULL && *trx->mysql_query_str != NULL) { + row->trx_mysql_thread_id = thd_get_thread_id(trx->mysql_thd); + stmt = innobase_get_stmt(trx->mysql_thd, &stmt_len); - if (strlen(*trx->mysql_query_str) - > TRX_I_S_TRX_QUERY_MAX_LEN) { + if (stmt != NULL) { - char query[TRX_I_S_TRX_QUERY_MAX_LEN + 1]; + char query[TRX_I_S_TRX_QUERY_MAX_LEN + 1]; - memcpy(query, *trx->mysql_query_str, - TRX_I_S_TRX_QUERY_MAX_LEN); - query[TRX_I_S_TRX_QUERY_MAX_LEN] = '\0'; + if (stmt_len > TRX_I_S_TRX_QUERY_MAX_LEN) { + stmt_len = TRX_I_S_TRX_QUERY_MAX_LEN; + } - row->trx_query = ha_storage_put_memlim( - cache->storage, query, - TRX_I_S_TRX_QUERY_MAX_LEN + 1, - MAX_ALLOWED_FOR_STORAGE(cache)); - } else { + memcpy(query, stmt, stmt_len); + query[stmt_len] = '\0'; - row->trx_query = ha_storage_put_str_memlim( - cache->storage, *trx->mysql_query_str, - MAX_ALLOWED_FOR_STORAGE(cache)); - } + row->trx_query = ha_storage_put_memlim( + cache->storage, stmt, stmt_len + 1, + MAX_ALLOWED_FOR_STORAGE(cache)); if (row->trx_query == NULL) { diff --git a/storage/xtradb/trx/trx0purge.c b/storage/xtradb/trx/trx0purge.c index 41e16b35e85..1c317665878 100644 --- a/storage/xtradb/trx/trx0purge.c +++ b/storage/xtradb/trx/trx0purge.c @@ -1148,8 +1148,7 @@ trx_purge(void) /* If we cannot advance the 'purge view' because of an old 'consistent read view', then the DML statements cannot be delayed. Also, srv_max_purge_lag <= 0 means 'infinity'. */ - if (srv_max_purge_lag > 0 - && !UT_LIST_GET_LAST(trx_sys->view_list)) { + if (srv_max_purge_lag > 0) { float ratio = (float) trx_sys->rseg_history_len / srv_max_purge_lag; if (ratio > ULINT_MAX / 10000) { diff --git a/storage/xtradb/trx/trx0sys.c b/storage/xtradb/trx/trx0sys.c index 47a21c3a318..ad4471ada0b 100644 --- a/storage/xtradb/trx/trx0sys.c +++ b/storage/xtradb/trx/trx0sys.c @@ -840,13 +840,13 @@ UNIV_INTERN void trx_sys_update_mysql_binlog_offset( /*===============================*/ + trx_sysf_t* sys_header, const char* file_name_in,/*!< in: MySQL log file name */ ib_int64_t offset, /*!< in: position in that log file */ ulint field, /*!< in: offset of the MySQL log info field in the trx sys header */ mtr_t* mtr) /*!< in: mtr */ { - trx_sysf_t* sys_header; const char* file_name; if (ut_strlen(file_name_in) >= TRX_SYS_MYSQL_MASTER_LOG_NAME_LEN) { @@ -860,8 +860,6 @@ trx_sys_update_mysql_binlog_offset( file_name = file_name_in; } - sys_header = trx_sysf_get(mtr); - if (mach_read_from_4(sys_header + field + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD) != TRX_SYS_MYSQL_LOG_MAGIC_N) { @@ -1143,14 +1141,8 @@ trx_sysf_dummy_create( ulint space, mtr_t* mtr) { -#ifdef UNDEFINED - trx_sysf_t* sys_header; - ulint slot_no; - ulint page_no; - ulint i; -#endif - page_t* page; buf_block_t* block; + page_t* page; ut_ad(mtr); diff --git a/storage/xtradb/trx/trx0trx.c b/storage/xtradb/trx/trx0trx.c index f150d64f8dc..9584f0c4c46 100644 --- a/storage/xtradb/trx/trx0trx.c +++ b/storage/xtradb/trx/trx0trx.c @@ -109,6 +109,8 @@ trx_create( trx->support_xa = TRUE; + trx->flush_log_at_trx_commit_session = 3; /* means to use innodb_flush_log_at_trx_commit value */ + trx->check_foreigns = TRUE; trx->check_unique_secondary = TRUE; @@ -119,7 +121,6 @@ trx_create( trx->table_id = ut_dulint_zero; trx->mysql_thd = NULL; - trx->mysql_query_str = NULL; trx->active_trans = 0; trx->duplicates = 0; @@ -736,6 +737,9 @@ trx_start( generated by the same transaction, doesn't. */ trx->support_xa = thd_supports_xa(trx->mysql_thd); + trx->flush_log_at_trx_commit_session = + thd_flush_log_at_trx_commit_session(trx->mysql_thd); + mutex_enter(&kernel_mutex); ret = trx_start_low(trx, rseg_id); @@ -758,6 +762,7 @@ trx_commit_off_kernel( trx_rseg_t* rseg; trx_undo_t* undo; mtr_t mtr; + trx_sysf_t* sys_header = NULL; ut_ad(mutex_own(&kernel_mutex)); @@ -815,7 +820,11 @@ trx_commit_off_kernel( if (trx->mysql_log_file_name && trx->mysql_log_file_name[0] != '\0') { + if (!sys_header) { + sys_header = trx_sysf_get(&mtr); + } trx_sys_update_mysql_binlog_offset( + sys_header, trx->mysql_log_file_name, trx->mysql_log_offset, TRX_SYS_MYSQL_LOG_INFO, &mtr); @@ -824,11 +833,16 @@ trx_commit_off_kernel( if (trx->mysql_master_log_file_name[0] != '\0') { /* This database server is a MySQL replication slave */ + if (!sys_header) { + sys_header = trx_sysf_get(&mtr); + } trx_sys_update_mysql_binlog_offset( + sys_header, trx->mysql_relay_log_file_name, trx->mysql_relay_log_pos, TRX_SYS_MYSQL_RELAY_LOG_INFO, &mtr); trx_sys_update_mysql_binlog_offset( + sys_header, trx->mysql_master_log_file_name, trx->mysql_master_log_pos, TRX_SYS_MYSQL_MASTER_LOG_INFO, &mtr); @@ -907,6 +921,7 @@ trx_commit_off_kernel( trx->read_view = NULL; if (lsn) { + ulint flush_log_at_trx_commit; mutex_exit(&kernel_mutex); @@ -915,6 +930,12 @@ trx_commit_off_kernel( trx_undo_insert_cleanup(trx); } + if (trx->flush_log_at_trx_commit_session == 3) { + flush_log_at_trx_commit = srv_flush_log_at_trx_commit; + } else { + flush_log_at_trx_commit = trx->flush_log_at_trx_commit_session; + } + /* NOTE that we could possibly make a group commit more efficient here: call os_thread_yield here to allow also other trxs to come to commit! */ @@ -946,9 +967,9 @@ trx_commit_off_kernel( if (trx->flush_log_later) { /* Do nothing yet */ trx->must_flush_log_later = TRUE; - } else if (srv_flush_log_at_trx_commit == 0) { + } else if (flush_log_at_trx_commit == 0) { /* Do nothing */ - } else if (srv_flush_log_at_trx_commit == 1) { + } else if (flush_log_at_trx_commit == 1) { if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) { /* Write the log but do not flush it to disk */ @@ -960,7 +981,7 @@ trx_commit_off_kernel( log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE); } - } else if (srv_flush_log_at_trx_commit == 2) { + } else if (flush_log_at_trx_commit == 2) { /* Write the log but do not flush it to disk */ @@ -983,7 +1004,6 @@ trx_commit_off_kernel( trx->rseg = NULL; trx->undo_no = ut_dulint_zero; trx->last_sql_stat_start.least_undo_no = ut_dulint_zero; - trx->mysql_query_str = NULL; ut_ad(UT_LIST_GET_LEN(trx->wait_thrs) == 0); ut_ad(UT_LIST_GET_LEN(trx->trx_locks) == 0); @@ -1641,16 +1661,23 @@ trx_commit_complete_for_mysql( trx_t* trx) /*!< in: trx handle */ { ib_uint64_t lsn = trx->commit_lsn; + ulint flush_log_at_trx_commit; ut_a(trx); trx->op_info = "flushing log"; + if (trx->flush_log_at_trx_commit_session == 3) { + flush_log_at_trx_commit = srv_flush_log_at_trx_commit; + } else { + flush_log_at_trx_commit = trx->flush_log_at_trx_commit_session; + } + if (!trx->must_flush_log_later) { /* Do nothing */ - } else if (srv_flush_log_at_trx_commit == 0) { + } else if (flush_log_at_trx_commit == 0) { /* Do nothing */ - } else if (srv_flush_log_at_trx_commit == 1) { + } else if (flush_log_at_trx_commit == 1) { if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) { /* Write the log but do not flush it to disk */ @@ -1661,7 +1688,7 @@ trx_commit_complete_for_mysql( log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE); } - } else if (srv_flush_log_at_trx_commit == 2) { + } else if (flush_log_at_trx_commit == 2) { /* Write the log but do not flush it to disk */ @@ -1922,6 +1949,8 @@ trx_prepare_off_kernel( /*--------------------------------------*/ if (lsn) { + ulint flush_log_at_trx_commit; + /* Depending on the my.cnf options, we may now write the log buffer to the log files, making the prepared state of the transaction durable if the OS does not crash. We may also @@ -1941,9 +1970,15 @@ trx_prepare_off_kernel( mutex_exit(&kernel_mutex); - if (srv_flush_log_at_trx_commit == 0) { + if (trx->flush_log_at_trx_commit_session == 3) { + flush_log_at_trx_commit = srv_flush_log_at_trx_commit; + } else { + flush_log_at_trx_commit = trx->flush_log_at_trx_commit_session; + } + + if (flush_log_at_trx_commit == 0) { /* Do nothing */ - } else if (srv_flush_log_at_trx_commit == 1) { + } else if (flush_log_at_trx_commit == 1) { if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) { /* Write the log but do not flush it to disk */ @@ -1955,7 +1990,7 @@ trx_prepare_off_kernel( log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE); } - } else if (srv_flush_log_at_trx_commit == 2) { + } else if (flush_log_at_trx_commit == 2) { /* Write the log but do not flush it to disk */ diff --git a/storage/xtradb/ut/ut0auxconf_atomic_pthread_t_gcc.c b/storage/xtradb/ut/ut0auxconf_atomic_pthread_t_gcc.c deleted file mode 100644 index 30de5aa6f17..00000000000 --- a/storage/xtradb/ut/ut0auxconf_atomic_pthread_t_gcc.c +++ /dev/null @@ -1,43 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2009, Innobase Oy. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA - -*****************************************************************************/ - -/***************************************************************************** -If this program compiles, then pthread_t objects can be used as arguments -to GCC atomic builtin functions. - -Created March 5, 2009 Vasil Dimov -*****************************************************************************/ - -#include <pthread.h> -#include <string.h> - -int -main(int argc, char** argv) -{ - pthread_t x1; - pthread_t x2; - pthread_t x3; - - memset(&x1, 0x0, sizeof(x1)); - memset(&x2, 0x0, sizeof(x2)); - memset(&x3, 0x0, sizeof(x3)); - - __sync_bool_compare_and_swap(&x1, x2, x3); - - return(0); -} diff --git a/storage/xtradb/ut/ut0auxconf_atomic_pthread_t_solaris.c b/storage/xtradb/ut/ut0auxconf_atomic_pthread_t_solaris.c deleted file mode 100644 index 310603c7503..00000000000 --- a/storage/xtradb/ut/ut0auxconf_atomic_pthread_t_solaris.c +++ /dev/null @@ -1,54 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2009, Innobase Oy. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA - -*****************************************************************************/ - -/***************************************************************************** -If this program compiles and returns 0, then pthread_t objects can be used as -arguments to Solaris libc atomic functions. - -Created April 18, 2009 Vasil Dimov -*****************************************************************************/ - -#include <pthread.h> -#include <string.h> - -int -main(int argc, char** argv) -{ - pthread_t x1; - pthread_t x2; - pthread_t x3; - - memset(&x1, 0x0, sizeof(x1)); - memset(&x2, 0x0, sizeof(x2)); - memset(&x3, 0x0, sizeof(x3)); - - if (sizeof(pthread_t) == 4) { - - atomic_cas_32(&x1, x2, x3); - - } else if (sizeof(pthread_t) == 8) { - - atomic_cas_64(&x1, x2, x3); - - } else { - - return(1); - } - - return(0); -} diff --git a/storage/xtradb/ut/ut0auxconf_have_gcc_atomics.c b/storage/xtradb/ut/ut0auxconf_have_gcc_atomics.c deleted file mode 100644 index da5c13d7d79..00000000000 --- a/storage/xtradb/ut/ut0auxconf_have_gcc_atomics.c +++ /dev/null @@ -1,61 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2009, Innobase Oy. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA - -*****************************************************************************/ - -/***************************************************************************** -If this program compiles and returns 0, then GCC atomic funcions are available. - -Created September 12, 2009 Vasil Dimov -*****************************************************************************/ - -int -main(int argc, char** argv) -{ - long x; - long y; - long res; - char c; - - x = 10; - y = 123; - res = __sync_bool_compare_and_swap(&x, x, y); - if (!res || x != y) { - return(1); - } - - x = 10; - y = 123; - res = __sync_bool_compare_and_swap(&x, x + 1, y); - if (res || x != 10) { - return(1); - } - - x = 10; - y = 123; - res = __sync_add_and_fetch(&x, y); - if (res != 123 + 10 || x != 123 + 10) { - return(1); - } - - c = 10; - res = __sync_lock_test_and_set(&c, 123); - if (res != 10 || c != 123) { - return(1); - } - - return(0); -} diff --git a/storage/xtradb/ut/ut0auxconf_have_solaris_atomics.c b/storage/xtradb/ut/ut0auxconf_have_solaris_atomics.c deleted file mode 100644 index 7eb704edd4b..00000000000 --- a/storage/xtradb/ut/ut0auxconf_have_solaris_atomics.c +++ /dev/null @@ -1,39 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2009, Innobase Oy. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA - -*****************************************************************************/ - -/***************************************************************************** -If this program compiles, then Solaris libc atomic funcions are available. - -Created April 18, 2009 Vasil Dimov -*****************************************************************************/ -#include <atomic.h> - -int -main(int argc, char** argv) -{ - ulong_t ulong = 0; - uint32_t uint32 = 0; - uint64_t uint64 = 0; - - atomic_cas_ulong(&ulong, 0, 1); - atomic_cas_32(&uint32, 0, 1); - atomic_cas_64(&uint64, 0, 1); - atomic_add_long(&ulong, 0); - - return(0); -} diff --git a/storage/xtradb/ut/ut0auxconf_pause.c b/storage/xtradb/ut/ut0auxconf_pause.c deleted file mode 100644 index 54d63bdd9bc..00000000000 --- a/storage/xtradb/ut/ut0auxconf_pause.c +++ /dev/null @@ -1,32 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2009, Innobase Oy. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA - -*****************************************************************************/ - -/***************************************************************************** -If this program compiles and can be run and returns 0, then the pause -instruction is available. - -Created Jul 21, 2009 Vasil Dimov -*****************************************************************************/ - -int -main(int argc, char** argv) -{ - __asm__ __volatile__ ("pause"); - - return(0); -} diff --git a/storage/xtradb/ut/ut0auxconf_sizeof_pthread_t.c b/storage/xtradb/ut/ut0auxconf_sizeof_pthread_t.c deleted file mode 100644 index 96add4526ef..00000000000 --- a/storage/xtradb/ut/ut0auxconf_sizeof_pthread_t.c +++ /dev/null @@ -1,35 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2009, Innobase Oy. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA - -*****************************************************************************/ - -/***************************************************************************** -This program should compile and when run, print a single line like: -#define SIZEOF_PTHREAD_T %d - -Created April 18, 2009 Vasil Dimov -*****************************************************************************/ - -#include <stdio.h> -#include <pthread.h> - -int -main(int argc, char** argv) -{ - printf("#define SIZEOF_PTHREAD_T %d\n", (int) sizeof(pthread_t)); - - return(0); -} |